From d99336559ed897f18fee879f0e39811b26be4c0c Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Thu, 5 Sep 2024 12:29:29 -0700 Subject: [PATCH] Insert software prefetches into merge functions. This improves performance when hardware prefetchers are disabled on AMD machines. PiperOrigin-RevId: 671468969 --- src/google/protobuf/extension_set.cc | 1 + src/google/protobuf/port.h | 9 +++++++++ src/google/protobuf/repeated_ptr_field.cc | 4 ++++ 3 files changed, 14 insertions(+) diff --git a/src/google/protobuf/extension_set.cc b/src/google/protobuf/extension_set.cc index b839efa9e6907..34a13fe5039e6 100644 --- a/src/google/protobuf/extension_set.cc +++ b/src/google/protobuf/extension_set.cc @@ -971,6 +971,7 @@ size_t SizeOfUnion(ItX it_dest, ItX end_dest, ItY it_source, ItY end_source) { void ExtensionSet::MergeFrom(const MessageLite* extendee, const ExtensionSet& other) { + Prefetch5LinesFrom1Line(&other); if (PROTOBUF_PREDICT_TRUE(!is_large())) { if (PROTOBUF_PREDICT_TRUE(!other.is_large())) { GrowCapacity(SizeOfUnion(flat_begin(), flat_end(), other.flat_begin(), diff --git a/src/google/protobuf/port.h b/src/google/protobuf/port.h index a011b7383ff82..4a4d690c0bf25 100644 --- a/src/google/protobuf/port.h +++ b/src/google/protobuf/port.h @@ -306,6 +306,15 @@ inline PROTOBUF_ALWAYS_INLINE void Prefetch5LinesFrom7Lines(const void* ptr) { PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 704); } +// Prefetch 5 64-byte cache lines starting from 1 cache-line ahead. +inline PROTOBUF_ALWAYS_INLINE void Prefetch5LinesFrom1Line(const void* ptr) { + PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 64); + PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 128); + PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 192); + PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 256); + PROTOBUF_PREFETCH_WITH_OFFSET(ptr, 320); +} + #if defined(NDEBUG) && ABSL_HAVE_BUILTIN(__builtin_unreachable) [[noreturn]] ABSL_ATTRIBUTE_COLD PROTOBUF_ALWAYS_INLINE inline void Unreachable() { diff --git a/src/google/protobuf/repeated_ptr_field.cc b/src/google/protobuf/repeated_ptr_field.cc index 891439c14d257..19992e454e971 100644 --- a/src/google/protobuf/repeated_ptr_field.cc +++ b/src/google/protobuf/repeated_ptr_field.cc @@ -132,6 +132,7 @@ memswap::value>( template <> void RepeatedPtrFieldBase::MergeFrom( const RepeatedPtrFieldBase& from) { + Prefetch5LinesFrom1Line(&from); ABSL_DCHECK_NE(&from, this); int new_size = current_size_ + from.current_size_; auto dst = reinterpret_cast(InternalReserve(new_size)); @@ -159,6 +160,7 @@ void RepeatedPtrFieldBase::MergeFrom( int RepeatedPtrFieldBase::MergeIntoClearedMessages( const RepeatedPtrFieldBase& from) { + Prefetch5LinesFrom1Line(&from); auto dst = reinterpret_cast(elements() + current_size_); auto src = reinterpret_cast(from.elements()); int count = std::min(ClearedCount(), from.current_size_); @@ -173,6 +175,7 @@ int RepeatedPtrFieldBase::MergeIntoClearedMessages( void RepeatedPtrFieldBase::MergeFromConcreteMessage( const RepeatedPtrFieldBase& from, CopyFn copy_fn) { + Prefetch5LinesFrom1Line(&from); ABSL_DCHECK_NE(&from, this); int new_size = current_size_ + from.current_size_; void** dst = InternalReserve(new_size); @@ -196,6 +199,7 @@ void RepeatedPtrFieldBase::MergeFromConcreteMessage( template <> void RepeatedPtrFieldBase::MergeFrom( const RepeatedPtrFieldBase& from) { + Prefetch5LinesFrom1Line(&from); ABSL_DCHECK_NE(&from, this); ABSL_DCHECK(from.current_size_ > 0); int new_size = current_size_ + from.current_size_;