From e9584192996ba6ae9a071973c3c903b693359217 Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Fri, 31 Jan 2025 10:21:26 -0800 Subject: [PATCH] Use __builtin_expect_with_probability for proto field presence checks. PiperOrigin-RevId: 721817344 --- src/google/protobuf/compiler/cpp/helpers.cc | 23 ++- src/google/protobuf/compiler/cpp/helpers.h | 11 +- src/google/protobuf/compiler/cpp/message.cc | 133 +++++++++++++----- .../compiler/cpp/parse_function_generator.cc | 4 +- src/google/protobuf/port_def.inc | 14 ++ src/google/protobuf/port_undef.inc | 1 + 6 files changed, 145 insertions(+), 41 deletions(-) diff --git a/src/google/protobuf/compiler/cpp/helpers.cc b/src/google/protobuf/compiler/cpp/helpers.cc index 55be3d38e37c0..69a208c52e281 100644 --- a/src/google/protobuf/compiler/cpp/helpers.cc +++ b/src/google/protobuf/compiler/cpp/helpers.cc @@ -1042,9 +1042,26 @@ bool IsLikelyPresent(const FieldDescriptor* field, const Options& options) { return false; } -float GetPresenceProbability(const FieldDescriptor* field, - const Options& options) { - return 1.f; +absl::optional GetPresenceProbability(const FieldDescriptor* field, + const Options& options) { + return absl::nullopt; +} + +absl::optional GetFieldGroupPresenceProbability( + const std::vector& fields, const Options& options) { + ABSL_DCHECK(!fields.empty()); + if (!IsProfileDriven(options)) return absl::nullopt; + + double all_absent_probability = 1.0; + + for (const auto* field : fields) { + absl::optional probability = GetPresenceProbability(field, options); + if (!probability) { + return absl::nullopt; + } + all_absent_probability *= 1.0 - *probability; + } + return 1.0 - all_absent_probability; } bool IsStringInliningEnabled(const Options& options) { diff --git a/src/google/protobuf/compiler/cpp/helpers.h b/src/google/protobuf/compiler/cpp/helpers.h index d7092c51e3751..34ad966c773da 100644 --- a/src/google/protobuf/compiler/cpp/helpers.h +++ b/src/google/protobuf/compiler/cpp/helpers.h @@ -366,8 +366,15 @@ bool IsRarelyPresent(const FieldDescriptor* field, const Options& options); // Returns true if `field` is likely to be present based on PDProto profile. bool IsLikelyPresent(const FieldDescriptor* field, const Options& options); -float GetPresenceProbability(const FieldDescriptor* field, - const Options& options); +absl::optional GetPresenceProbability(const FieldDescriptor* field, + const Options& options); + +// GetFieldGroupPresenceProbability computes presence probability for a group of +// fields. It uses the absence probability (easier to compute) +// (1 - p1) * (1 - p2) * ... * (1 - pn), and in the end the aggregate presence +// probability can be expressed as (1 - all_absent_probability). +absl::optional GetFieldGroupPresenceProbability( + const std::vector& fields, const Options& options); bool IsStringInliningEnabled(const Options& options); diff --git a/src/google/protobuf/compiler/cpp/message.cc b/src/google/protobuf/compiler/cpp/message.cc index 73bfa9b119410..7496d726a58c0 100644 --- a/src/google/protobuf/compiler/cpp/message.cc +++ b/src/google/protobuf/compiler/cpp/message.cc @@ -95,9 +95,71 @@ std::string ConditionalToCheckBitmasks( return result + (return_success ? " == 0" : " != 0"); } +template +void DebugAssertUniform(const std::vector& fields, + const Options& options, PredicateT&& pred) { + ABSL_DCHECK(!fields.empty() && absl::c_all_of(fields, [&](const auto* f) { + return pred(f) == pred(fields.front()); + })); +} + +void DebugAssertUniformLikelyPresence( + const std::vector& fields, const Options& options) { + DebugAssertUniform(fields, options, [&](const FieldDescriptor* f) { + return IsLikelyPresent(f, options); + }); +} + +// Generates a condition that checks presence of a field. If probability is +// provided, the condition will be wrapped with +// PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY. +// +// If use_cached_has_bits is true, the condition will be generated based on +// cached_has_bits. Otherwise, the condition will be generated based on the +// _has_bits_ array, with has_array_index indicating which element of the array +// to use. +std::string GenerateConditionMaybeWithProbability( + uint32_t mask, absl::optional probability, bool use_cached_has_bits, + absl::optional has_array_index) { + std::string condition; + if (use_cached_has_bits) { + condition = absl::StrFormat("(cached_has_bits & 0x%08xu) != 0", mask); + } else { + // We only use has_array_index when use_cached_has_bits is false, make sure + // we pas a valid index when we need it. + ABSL_DCHECK(has_array_index.has_value()); + condition = absl::StrFormat("(this_._impl_._has_bits_[%d] & 0x%08xu) != 0", + *has_array_index, mask); + } + if (probability.has_value()) { + return absl::StrFormat("PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY(%s, %.3f)", + condition, *probability); + } + return condition; +} + +std::string GenerateConditionMaybeWithProbabilityForField( + int has_bit_index, const FieldDescriptor* field, const Options& options) { + auto prob = GetPresenceProbability(field, options); + return GenerateConditionMaybeWithProbability( + 1u << (has_bit_index % 32), prob, + /*use_cached_has_bits*/ true, + /*has_array_index*/ absl::nullopt); +} + +std::string GenerateConditionMaybeWithProbabilityForGroup( + uint32_t mask, const std::vector& fields, + const Options& options) { + auto prob = GetFieldGroupPresenceProbability(fields, options); + return GenerateConditionMaybeWithProbability( + mask, prob, + /*use_cached_has_bits*/ true, + /*has_array_index*/ absl::nullopt); +} + void PrintPresenceCheck(const FieldDescriptor* field, const std::vector& has_bit_indices, io::Printer* p, - int* cached_has_word_index) { + int* cached_has_word_index, const Options& options) { if (!field->options().weak()) { int has_bit_index = has_bit_indices[field->index()]; if (*cached_has_word_index != (has_bit_index / 32)) { @@ -107,9 +169,10 @@ void PrintPresenceCheck(const FieldDescriptor* field, cached_has_bits = $has_bits$[$index$]; )cc"); } - p->Emit({{"mask", absl::StrFormat("0x%08xu", 1u << (has_bit_index % 32))}}, + p->Emit({{"condition", GenerateConditionMaybeWithProbabilityForField( + has_bit_index, field, options)}}, R"cc( - if ((cached_has_bits & $mask$) != 0) { + if ($condition$) { )cc"); } else { p->Emit(R"cc( @@ -1319,8 +1382,8 @@ void MessageGenerator::EmitCheckAndUpdateByteSizeForField( } int has_bit_index = has_bit_indices_[field->index()]; - p->Emit({{"mask", - absl::StrFormat("0x%08xu", uint32_t{1} << (has_bit_index % 32))}, + p->Emit({{"condition", GenerateConditionMaybeWithProbabilityForField( + has_bit_index, field, options_)}, {"check_nondefault_and_emit_body", [&] { // Note that it's possible that the field has explicit presence. @@ -1330,7 +1393,7 @@ void MessageGenerator::EmitCheckAndUpdateByteSizeForField( /*with_enclosing_braces_always=*/false); }}}, R"cc( - if ((cached_has_bits & $mask$) != 0) { + if ($condition$) { $check_nondefault_and_emit_body$; } )cc"); @@ -3186,9 +3249,10 @@ void MessageGenerator::GenerateCopyInitFields(io::Printer* p) const { if (has_bit_indices_.empty()) { p->Emit("from.$field$ != nullptr"); } else { - int index = has_bit_indices_[field->index()]; - std::string mask = absl::StrFormat("0x%08xu", 1u << (index % 32)); - p->Emit({{"mask", mask}}, "(cached_has_bits & $mask$) != 0"); + int has_bit_index = has_bit_indices_[field->index()]; + p->Emit({{"condition", GenerateConditionMaybeWithProbabilityForField( + has_bit_index, field, options_)}}, + "$condition$"); } }; @@ -3603,11 +3667,11 @@ void MessageGenerator::GenerateClear(io::Printer* p) { !IsLikelyPresent(fields.back(), options_) && (memset_end != fields.back() || merge_zero_init); + DebugAssertUniformLikelyPresence(fields, options_); + if (check_has_byte) { // Emit an if() that will let us skip the whole chunk if none are set. uint32_t chunk_mask = GenChunkMask(fields, has_bit_indices_); - std::string chunk_mask_str = - absl::StrCat(absl::Hex(chunk_mask, absl::kZeroPad8)); // Check (up to) 8 has_bits at a time if we have more than one field in // this chunk. Due to field layout ordering, we may check @@ -3619,7 +3683,9 @@ void MessageGenerator::GenerateClear(io::Printer* p) { cached_has_word_index = HasWordIndex(fields.front()); format("cached_has_bits = $has_bits$[$1$];\n", cached_has_word_index); } - format("if ((cached_has_bits & 0x$1$u) != 0) {\n", chunk_mask_str); + + format("if ($1$) {\n", GenerateConditionMaybeWithProbabilityForGroup( + chunk_mask, fields, options_)); format.Indent(); } @@ -3652,8 +3718,8 @@ void MessageGenerator::GenerateClear(io::Printer* p) { field->cpp_type() == FieldDescriptor::CPPTYPE_STRING); if (have_enclosing_if) { - PrintPresenceCheck(field, has_bit_indices_, p, - &cached_has_word_index); + PrintPresenceCheck(field, has_bit_indices_, p, &cached_has_word_index, + options_); format.Indent(); } @@ -4266,6 +4332,8 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) { const bool check_has_byte = cache_has_bits && fields.size() > 1 && !IsLikelyPresent(fields.back(), options_); + DebugAssertUniformLikelyPresence(fields, options_); + if (cache_has_bits && cached_has_word_index != HasWordIndex(fields.front())) { cached_has_word_index = HasWordIndex(fields.front()); @@ -4276,8 +4344,6 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) { if (check_has_byte) { // Emit an if() that will let us skip the whole chunk if none are set. uint32_t chunk_mask = GenChunkMask(fields, has_bit_indices_); - std::string chunk_mask_str = - absl::StrCat(absl::Hex(chunk_mask, absl::kZeroPad8)); // Check (up to) 8 has_bits at a time if we have more than one field in // this chunk. Due to field layout ordering, we may check @@ -4285,7 +4351,8 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) { ABSL_DCHECK_LE(2, popcnt(chunk_mask)); ABSL_DCHECK_GE(8, popcnt(chunk_mask)); - format("if ((cached_has_bits & 0x$1$u) != 0) {\n", chunk_mask_str); + format("if ($1$) {\n", GenerateConditionMaybeWithProbabilityForGroup( + chunk_mask, fields, options_)); format.Indent(); } @@ -4317,9 +4384,8 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) { // Check hasbit, using cached bits. ABSL_CHECK(HasHasbit(field)); int has_bit_index = has_bit_indices_[field->index()]; - const std::string mask = absl::StrCat( - absl::Hex(1u << (has_bit_index % 32), absl::kZeroPad8)); - format("if ((cached_has_bits & 0x$1$u) != 0) {\n", mask); + format("if ($1$) {\n", GenerateConditionMaybeWithProbabilityForField( + has_bit_index, field, options_)); format.Indent(); if (GetFieldHasbitMode(field) == HasbitMode::kHintHasbit) { @@ -4556,6 +4622,9 @@ void MessageGenerator::GenerateSerializeOneField(io::Printer* p, PrintFieldComment(Formatter{p}, field, options_); if (HasHasbit(field)) { + int has_bit_index = HasBitIndex(field); + int has_word_index = has_bit_index / 32; + bool use_cached_has_bits = cached_has_bits_index == has_word_index; p->Emit( { {"body", @@ -4564,18 +4633,10 @@ void MessageGenerator::GenerateSerializeOneField(io::Printer* p, std::move(emit_body), /*with_enclosing_braces_always=*/false); }}, - {"cond", - [&] { - int has_bit_index = HasBitIndex(field); - auto v = p->WithVars(HasBitVars(field)); - // Attempt to use the state of cached_has_bits, if possible. - if (cached_has_bits_index == has_bit_index / 32) { - p->Emit("(cached_has_bits & $has_mask$) != 0"); - } else { - p->Emit( - "(this_.$has_bits$[$has_array_index$] & $has_mask$) != 0"); - } - }}, + {"cond", GenerateConditionMaybeWithProbability( + 1u << (has_bit_index % 32), + GetPresenceProbability(field, options_), + use_cached_has_bits, has_word_index)}, }, R"cc( if ($cond$) { @@ -5188,6 +5249,7 @@ void MessageGenerator::GenerateByteSize(io::Printer* p) { const bool check_has_byte = fields.size() > 1 && HasWordIndex(fields[0]) != kNoHasbit && !IsLikelyPresent(fields.back(), options_); + DebugAssertUniformLikelyPresence(fields, options_); p->Emit( {{"update_byte_size_for_chunk", [&] { @@ -5221,9 +5283,10 @@ void MessageGenerator::GenerateByteSize(io::Printer* p) { ABSL_DCHECK_LE(2, popcnt(chunk_mask)); ABSL_DCHECK_GE(8, popcnt(chunk_mask)); - p->Emit( - {{"mask", absl::StrFormat("0x%08xu", chunk_mask)}}, - "if ((cached_has_bits & $mask$) != 0)"); + p->Emit({{"condition", + GenerateConditionMaybeWithProbabilityForGroup( + chunk_mask, fields, options_)}}, + "if ($condition$)"); }}}, R"cc( $may_update_cached_has_word_index$; diff --git a/src/google/protobuf/compiler/cpp/parse_function_generator.cc b/src/google/protobuf/compiler/cpp/parse_function_generator.cc index 65776dc67be05..0332ff74f4725 100644 --- a/src/google/protobuf/compiler/cpp/parse_function_generator.cc +++ b/src/google/protobuf/compiler/cpp/parse_function_generator.cc @@ -74,7 +74,9 @@ ParseFunctionGenerator::ParseFunctionGenerator( fields.push_back({ field, index < has_bit_indices.size() ? has_bit_indices[index] : -1, - GetPresenceProbability(field, options_), + // When not present, we're not sure how likely "field" is present. + // Assign a 50% probability to avoid pessimizing it. + GetPresenceProbability(field, options_).value_or(0.5f), GetLazyStyle(field, options_, scc_analyzer_), IsStringInlined(field, options_), IsImplicitWeakField(field, options_, scc_analyzer_), diff --git a/src/google/protobuf/port_def.inc b/src/google/protobuf/port_def.inc index d3ecae2c2f22a..5eaed27112f1f 100644 --- a/src/google/protobuf/port_def.inc +++ b/src/google/protobuf/port_def.inc @@ -890,6 +890,20 @@ namespace internal { #endif // !NDEBUG #endif // has_builtin(__builtin_assume) +// PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY(expr, prob) tells the compiler that +// the expression expr is likely to be true with probability prob. This is +// helpful for the compiler to make better optimization decisions when we know +// the probability of a certain branch (e.g. from a profile). +#ifdef PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY +#error PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY was previously defined +#endif +#if ABSL_HAVE_BUILTIN(__builtin_expect_with_probability) +#define PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY(expr, prob) \ + __builtin_expect_with_probability((expr), 1, (prob)) +#else +#define PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY(expr, prob) (expr) +#endif + #ifdef PROTOBUF_DEPRECATE_AND_INLINE #error PROTOBUF_DEPRECATE_AND_INLINE was previously defined #endif diff --git a/src/google/protobuf/port_undef.inc b/src/google/protobuf/port_undef.inc index 08217437bf60e..0096ec6a2f0a0 100644 --- a/src/google/protobuf/port_undef.inc +++ b/src/google/protobuf/port_undef.inc @@ -36,6 +36,7 @@ #undef PROTOBUF_RESTRICT #undef PROTOBUF_UNUSED #undef PROTOBUF_ASSUME +#undef PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY #undef PROTOBUF_DEPRECATE_AND_INLINE #undef PROTOBUF_EXPORT_TEMPLATE_DECLARE #undef PROTOBUF_EXPORT_TEMPLATE_DEFINE