Skip to content

Commit

Permalink
Use __builtin_expect_with_probability for proto field presence checks.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 721817344
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Jan 31, 2025
1 parent 2141d1c commit e958419
Show file tree
Hide file tree
Showing 6 changed files with 145 additions and 41 deletions.
23 changes: 20 additions & 3 deletions src/google/protobuf/compiler/cpp/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1042,9 +1042,26 @@ bool IsLikelyPresent(const FieldDescriptor* field, const Options& options) {
return false;
}

float GetPresenceProbability(const FieldDescriptor* field,
const Options& options) {
return 1.f;
absl::optional<float> GetPresenceProbability(const FieldDescriptor* field,
const Options& options) {
return absl::nullopt;
}

absl::optional<float> GetFieldGroupPresenceProbability(
const std::vector<const FieldDescriptor*>& fields, const Options& options) {
ABSL_DCHECK(!fields.empty());
if (!IsProfileDriven(options)) return absl::nullopt;

double all_absent_probability = 1.0;

for (const auto* field : fields) {
absl::optional<float> probability = GetPresenceProbability(field, options);
if (!probability) {
return absl::nullopt;
}
all_absent_probability *= 1.0 - *probability;
}
return 1.0 - all_absent_probability;
}

bool IsStringInliningEnabled(const Options& options) {
Expand Down
11 changes: 9 additions & 2 deletions src/google/protobuf/compiler/cpp/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -366,8 +366,15 @@ bool IsRarelyPresent(const FieldDescriptor* field, const Options& options);
// Returns true if `field` is likely to be present based on PDProto profile.
bool IsLikelyPresent(const FieldDescriptor* field, const Options& options);

float GetPresenceProbability(const FieldDescriptor* field,
const Options& options);
absl::optional<float> GetPresenceProbability(const FieldDescriptor* field,
const Options& options);

// GetFieldGroupPresenceProbability computes presence probability for a group of
// fields. It uses the absence probability (easier to compute)
// (1 - p1) * (1 - p2) * ... * (1 - pn), and in the end the aggregate presence
// probability can be expressed as (1 - all_absent_probability).
absl::optional<float> GetFieldGroupPresenceProbability(
const std::vector<const FieldDescriptor*>& fields, const Options& options);

bool IsStringInliningEnabled(const Options& options);

Expand Down
133 changes: 98 additions & 35 deletions src/google/protobuf/compiler/cpp/message.cc
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,71 @@ std::string ConditionalToCheckBitmasks(
return result + (return_success ? " == 0" : " != 0");
}

template <typename PredicateT>
void DebugAssertUniform(const std::vector<const FieldDescriptor*>& fields,
const Options& options, PredicateT&& pred) {
ABSL_DCHECK(!fields.empty() && absl::c_all_of(fields, [&](const auto* f) {
return pred(f) == pred(fields.front());
}));
}

void DebugAssertUniformLikelyPresence(
const std::vector<const FieldDescriptor*>& fields, const Options& options) {
DebugAssertUniform(fields, options, [&](const FieldDescriptor* f) {
return IsLikelyPresent(f, options);
});
}

// Generates a condition that checks presence of a field. If probability is
// provided, the condition will be wrapped with
// PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY.
//
// If use_cached_has_bits is true, the condition will be generated based on
// cached_has_bits. Otherwise, the condition will be generated based on the
// _has_bits_ array, with has_array_index indicating which element of the array
// to use.
std::string GenerateConditionMaybeWithProbability(
uint32_t mask, absl::optional<float> probability, bool use_cached_has_bits,
absl::optional<int> has_array_index) {
std::string condition;
if (use_cached_has_bits) {
condition = absl::StrFormat("(cached_has_bits & 0x%08xu) != 0", mask);
} else {
// We only use has_array_index when use_cached_has_bits is false, make sure
// we pas a valid index when we need it.
ABSL_DCHECK(has_array_index.has_value());
condition = absl::StrFormat("(this_._impl_._has_bits_[%d] & 0x%08xu) != 0",
*has_array_index, mask);
}
if (probability.has_value()) {
return absl::StrFormat("PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY(%s, %.3f)",
condition, *probability);
}
return condition;
}

std::string GenerateConditionMaybeWithProbabilityForField(
int has_bit_index, const FieldDescriptor* field, const Options& options) {
auto prob = GetPresenceProbability(field, options);
return GenerateConditionMaybeWithProbability(
1u << (has_bit_index % 32), prob,
/*use_cached_has_bits*/ true,
/*has_array_index*/ absl::nullopt);
}

std::string GenerateConditionMaybeWithProbabilityForGroup(
uint32_t mask, const std::vector<const FieldDescriptor*>& fields,
const Options& options) {
auto prob = GetFieldGroupPresenceProbability(fields, options);
return GenerateConditionMaybeWithProbability(
mask, prob,
/*use_cached_has_bits*/ true,
/*has_array_index*/ absl::nullopt);
}

void PrintPresenceCheck(const FieldDescriptor* field,
const std::vector<int>& has_bit_indices, io::Printer* p,
int* cached_has_word_index) {
int* cached_has_word_index, const Options& options) {
if (!field->options().weak()) {
int has_bit_index = has_bit_indices[field->index()];
if (*cached_has_word_index != (has_bit_index / 32)) {
Expand All @@ -107,9 +169,10 @@ void PrintPresenceCheck(const FieldDescriptor* field,
cached_has_bits = $has_bits$[$index$];
)cc");
}
p->Emit({{"mask", absl::StrFormat("0x%08xu", 1u << (has_bit_index % 32))}},
p->Emit({{"condition", GenerateConditionMaybeWithProbabilityForField(
has_bit_index, field, options)}},
R"cc(
if ((cached_has_bits & $mask$) != 0) {
if ($condition$) {
)cc");
} else {
p->Emit(R"cc(
Expand Down Expand Up @@ -1319,8 +1382,8 @@ void MessageGenerator::EmitCheckAndUpdateByteSizeForField(
}

int has_bit_index = has_bit_indices_[field->index()];
p->Emit({{"mask",
absl::StrFormat("0x%08xu", uint32_t{1} << (has_bit_index % 32))},
p->Emit({{"condition", GenerateConditionMaybeWithProbabilityForField(
has_bit_index, field, options_)},
{"check_nondefault_and_emit_body",
[&] {
// Note that it's possible that the field has explicit presence.
Expand All @@ -1330,7 +1393,7 @@ void MessageGenerator::EmitCheckAndUpdateByteSizeForField(
/*with_enclosing_braces_always=*/false);
}}},
R"cc(
if ((cached_has_bits & $mask$) != 0) {
if ($condition$) {
$check_nondefault_and_emit_body$;
}
)cc");
Expand Down Expand Up @@ -3186,9 +3249,10 @@ void MessageGenerator::GenerateCopyInitFields(io::Printer* p) const {
if (has_bit_indices_.empty()) {
p->Emit("from.$field$ != nullptr");
} else {
int index = has_bit_indices_[field->index()];
std::string mask = absl::StrFormat("0x%08xu", 1u << (index % 32));
p->Emit({{"mask", mask}}, "(cached_has_bits & $mask$) != 0");
int has_bit_index = has_bit_indices_[field->index()];
p->Emit({{"condition", GenerateConditionMaybeWithProbabilityForField(
has_bit_index, field, options_)}},
"$condition$");
}
};

Expand Down Expand Up @@ -3603,11 +3667,11 @@ void MessageGenerator::GenerateClear(io::Printer* p) {
!IsLikelyPresent(fields.back(), options_) &&
(memset_end != fields.back() || merge_zero_init);

DebugAssertUniformLikelyPresence(fields, options_);

if (check_has_byte) {
// Emit an if() that will let us skip the whole chunk if none are set.
uint32_t chunk_mask = GenChunkMask(fields, has_bit_indices_);
std::string chunk_mask_str =
absl::StrCat(absl::Hex(chunk_mask, absl::kZeroPad8));

// Check (up to) 8 has_bits at a time if we have more than one field in
// this chunk. Due to field layout ordering, we may check
Expand All @@ -3619,7 +3683,9 @@ void MessageGenerator::GenerateClear(io::Printer* p) {
cached_has_word_index = HasWordIndex(fields.front());
format("cached_has_bits = $has_bits$[$1$];\n", cached_has_word_index);
}
format("if ((cached_has_bits & 0x$1$u) != 0) {\n", chunk_mask_str);

format("if ($1$) {\n", GenerateConditionMaybeWithProbabilityForGroup(
chunk_mask, fields, options_));
format.Indent();
}

Expand Down Expand Up @@ -3652,8 +3718,8 @@ void MessageGenerator::GenerateClear(io::Printer* p) {
field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);

if (have_enclosing_if) {
PrintPresenceCheck(field, has_bit_indices_, p,
&cached_has_word_index);
PrintPresenceCheck(field, has_bit_indices_, p, &cached_has_word_index,
options_);
format.Indent();
}

Expand Down Expand Up @@ -4266,6 +4332,8 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) {
const bool check_has_byte = cache_has_bits && fields.size() > 1 &&
!IsLikelyPresent(fields.back(), options_);

DebugAssertUniformLikelyPresence(fields, options_);

if (cache_has_bits &&
cached_has_word_index != HasWordIndex(fields.front())) {
cached_has_word_index = HasWordIndex(fields.front());
Expand All @@ -4276,16 +4344,15 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) {
if (check_has_byte) {
// Emit an if() that will let us skip the whole chunk if none are set.
uint32_t chunk_mask = GenChunkMask(fields, has_bit_indices_);
std::string chunk_mask_str =
absl::StrCat(absl::Hex(chunk_mask, absl::kZeroPad8));

// Check (up to) 8 has_bits at a time if we have more than one field in
// this chunk. Due to field layout ordering, we may check
// _has_bits_[last_chunk * 8 / 32] multiple times.
ABSL_DCHECK_LE(2, popcnt(chunk_mask));
ABSL_DCHECK_GE(8, popcnt(chunk_mask));

format("if ((cached_has_bits & 0x$1$u) != 0) {\n", chunk_mask_str);
format("if ($1$) {\n", GenerateConditionMaybeWithProbabilityForGroup(
chunk_mask, fields, options_));
format.Indent();
}

Expand Down Expand Up @@ -4317,9 +4384,8 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) {
// Check hasbit, using cached bits.
ABSL_CHECK(HasHasbit(field));
int has_bit_index = has_bit_indices_[field->index()];
const std::string mask = absl::StrCat(
absl::Hex(1u << (has_bit_index % 32), absl::kZeroPad8));
format("if ((cached_has_bits & 0x$1$u) != 0) {\n", mask);
format("if ($1$) {\n", GenerateConditionMaybeWithProbabilityForField(
has_bit_index, field, options_));
format.Indent();

if (GetFieldHasbitMode(field) == HasbitMode::kHintHasbit) {
Expand Down Expand Up @@ -4556,6 +4622,9 @@ void MessageGenerator::GenerateSerializeOneField(io::Printer* p,

PrintFieldComment(Formatter{p}, field, options_);
if (HasHasbit(field)) {
int has_bit_index = HasBitIndex(field);
int has_word_index = has_bit_index / 32;
bool use_cached_has_bits = cached_has_bits_index == has_word_index;
p->Emit(
{
{"body",
Expand All @@ -4564,18 +4633,10 @@ void MessageGenerator::GenerateSerializeOneField(io::Printer* p,
std::move(emit_body),
/*with_enclosing_braces_always=*/false);
}},
{"cond",
[&] {
int has_bit_index = HasBitIndex(field);
auto v = p->WithVars(HasBitVars(field));
// Attempt to use the state of cached_has_bits, if possible.
if (cached_has_bits_index == has_bit_index / 32) {
p->Emit("(cached_has_bits & $has_mask$) != 0");
} else {
p->Emit(
"(this_.$has_bits$[$has_array_index$] & $has_mask$) != 0");
}
}},
{"cond", GenerateConditionMaybeWithProbability(
1u << (has_bit_index % 32),
GetPresenceProbability(field, options_),
use_cached_has_bits, has_word_index)},
},
R"cc(
if ($cond$) {
Expand Down Expand Up @@ -5188,6 +5249,7 @@ void MessageGenerator::GenerateByteSize(io::Printer* p) {
const bool check_has_byte =
fields.size() > 1 && HasWordIndex(fields[0]) != kNoHasbit &&
!IsLikelyPresent(fields.back(), options_);
DebugAssertUniformLikelyPresence(fields, options_);
p->Emit(
{{"update_byte_size_for_chunk",
[&] {
Expand Down Expand Up @@ -5221,9 +5283,10 @@ void MessageGenerator::GenerateByteSize(io::Printer* p) {
ABSL_DCHECK_LE(2, popcnt(chunk_mask));
ABSL_DCHECK_GE(8, popcnt(chunk_mask));

p->Emit(
{{"mask", absl::StrFormat("0x%08xu", chunk_mask)}},
"if ((cached_has_bits & $mask$) != 0)");
p->Emit({{"condition",
GenerateConditionMaybeWithProbabilityForGroup(
chunk_mask, fields, options_)}},
"if ($condition$)");
}}},
R"cc(
$may_update_cached_has_word_index$;
Expand Down
4 changes: 3 additions & 1 deletion src/google/protobuf/compiler/cpp/parse_function_generator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,9 @@ ParseFunctionGenerator::ParseFunctionGenerator(
fields.push_back({
field,
index < has_bit_indices.size() ? has_bit_indices[index] : -1,
GetPresenceProbability(field, options_),
// When not present, we're not sure how likely "field" is present.
// Assign a 50% probability to avoid pessimizing it.
GetPresenceProbability(field, options_).value_or(0.5f),
GetLazyStyle(field, options_, scc_analyzer_),
IsStringInlined(field, options_),
IsImplicitWeakField(field, options_, scc_analyzer_),
Expand Down
14 changes: 14 additions & 0 deletions src/google/protobuf/port_def.inc
Original file line number Diff line number Diff line change
Expand Up @@ -890,6 +890,20 @@ namespace internal {
#endif // !NDEBUG
#endif // has_builtin(__builtin_assume)

// PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY(expr, prob) tells the compiler that
// the expression expr is likely to be true with probability prob. This is
// helpful for the compiler to make better optimization decisions when we know
// the probability of a certain branch (e.g. from a profile).
#ifdef PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY
#error PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY was previously defined
#endif
#if ABSL_HAVE_BUILTIN(__builtin_expect_with_probability)
#define PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY(expr, prob) \
__builtin_expect_with_probability((expr), 1, (prob))
#else
#define PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY(expr, prob) (expr)
#endif

#ifdef PROTOBUF_DEPRECATE_AND_INLINE
#error PROTOBUF_DEPRECATE_AND_INLINE was previously defined
#endif
Expand Down
1 change: 1 addition & 0 deletions src/google/protobuf/port_undef.inc
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
#undef PROTOBUF_RESTRICT
#undef PROTOBUF_UNUSED
#undef PROTOBUF_ASSUME
#undef PROTOBUF_EXPECT_TRUE_WITH_PROBABILITY
#undef PROTOBUF_DEPRECATE_AND_INLINE
#undef PROTOBUF_EXPORT_TEMPLATE_DECLARE
#undef PROTOBUF_EXPORT_TEMPLATE_DEFINE
Expand Down

0 comments on commit e958419

Please sign in to comment.