diff --git a/src/google/protobuf/compiler/cpp/field_generators/string_field.cc b/src/google/protobuf/compiler/cpp/field_generators/string_field.cc index 1fc3951d49afd..bd1aa7f2b7c24 100644 --- a/src/google/protobuf/compiler/cpp/field_generators/string_field.cc +++ b/src/google/protobuf/compiler/cpp/field_generators/string_field.cc @@ -16,6 +16,7 @@ #include "absl/log/absl_check.h" #include "absl/memory/memory.h" #include "absl/strings/str_cat.h" +#include "absl/strings/string_view.h" #include "google/protobuf/compiler/cpp/field.h" #include "google/protobuf/compiler/cpp/field_generators/generators.h" #include "google/protobuf/compiler/cpp/helpers.h" @@ -23,12 +24,15 @@ #include "google/protobuf/descriptor.h" #include "google/protobuf/descriptor.pb.h" #include "google/protobuf/io/printer.h" +#include "google/protobuf/port.h" namespace google { namespace protobuf { namespace compiler { namespace cpp { namespace { +using ::google::protobuf::internal::cpp::GetFieldHasbitMode; +using ::google::protobuf::internal::cpp::HasbitMode; using ::google::protobuf::internal::cpp::HasHasbit; using ::google::protobuf::io::AnnotationCollector; using Sub = ::google::protobuf::io::Printer::Sub; @@ -533,6 +537,22 @@ void SingularString::GenerateClearingCode(io::Printer* p) const { )cc"); } +// Returns "ClearNonDefaultToEmpty" or "ClearToEmpty" depending on whether the +// field might still point to the default string instance. +absl::string_view GetClearFunctionForField(const FieldDescriptor* field) { + switch (GetFieldHasbitMode(field)) { + case HasbitMode::kNoHasbit: + case HasbitMode::kHintHasbit: + // TODO: b/376149315 - Would be nice to call ClearNonDefaultToEmpty for + // hint hasbits too. + return "ClearToEmpty"; + case HasbitMode::kTrueHasbit: + return "ClearNonDefaultToEmpty"; + default: + internal::Unreachable(); + } +} + void SingularString::GenerateMessageClearingCode(io::Printer* p) const { if (is_oneof()) { p->Emit(R"cc( @@ -573,8 +593,7 @@ void SingularString::GenerateMessageClearingCode(io::Printer* p) const { return; } - p->Emit({{"Clear", - HasHasbit(field_) ? "ClearNonDefaultToEmpty" : "ClearToEmpty"}}, + p->Emit({{"Clear", GetClearFunctionForField(field_)}}, R"cc( $field_$.$Clear$(); )cc"); diff --git a/src/google/protobuf/compiler/cpp/message.cc b/src/google/protobuf/compiler/cpp/message.cc index f5ac295c41992..903331515a7cd 100644 --- a/src/google/protobuf/compiler/cpp/message.cc +++ b/src/google/protobuf/compiler/cpp/message.cc @@ -62,6 +62,8 @@ namespace cpp { namespace { using ::google::protobuf::internal::WireFormat; using ::google::protobuf::internal::WireFormatLite; +using ::google::protobuf::internal::cpp::GetFieldHasbitMode; +using ::google::protobuf::internal::cpp::HasbitMode; using ::google::protobuf::internal::cpp::HasHasbit; using Semantic = ::google::protobuf::io::AnnotationCollector::Semantic; using Sub = ::google::protobuf::io::Printer::Sub; @@ -187,7 +189,7 @@ RunMap FindRuns(const std::vector& fields, void EmitNonDefaultCheck(io::Printer* p, const std::string& prefix, const FieldDescriptor* field) { - ABSL_CHECK(!HasHasbit(field)); + ABSL_CHECK(GetFieldHasbitMode(field) != HasbitMode::kTrueHasbit); ABSL_CHECK(!field->is_repeated()); ABSL_CHECK(!field->containing_oneof() || field->real_containing_oneof()); @@ -216,19 +218,27 @@ void EmitNonDefaultCheck(io::Printer* p, const std::string& prefix, } bool ShouldEmitNonDefaultCheck(const FieldDescriptor* field) { - return (!field->is_repeated() && !field->containing_oneof()) || - field->real_containing_oneof(); + if (GetFieldHasbitMode(field) == HasbitMode::kTrueHasbit) { + return false; + } + return !field->is_repeated(); } // Emits an if-statement with a condition that evaluates to true if |field| is // considered non-default (will be sent over the wire), for message types // without true field presence. Should only be called if // !HasHasbit(field). +// If |with_enclosing_braces_always| is set to true, will generate enclosing +// braces even if nondefault check is not emitted -- i.e. code may look like: +// { +// // code... +// } +// If |with_enclosing_braces_always| is set to false, enclosing braces will not +// be generated if nondefault check is not emitted. void MayEmitIfNonDefaultCheck(io::Printer* p, const std::string& prefix, const FieldDescriptor* field, - absl::AnyInvocable emit_body) { - ABSL_CHECK(!HasHasbit(field)); - + absl::AnyInvocable emit_body, + bool with_enclosing_braces_always) { if (ShouldEmitNonDefaultCheck(field)) { p->Emit( { @@ -240,7 +250,10 @@ void MayEmitIfNonDefaultCheck(io::Printer* p, const std::string& prefix, $emit_body$; } )cc"); - } else { + return; + } + + if (with_enclosing_braces_always) { // In repeated fields, the same variable name may be emitted multiple // times, hence the need for emitting braces even when the if condition is // not necessary, so that the code looks like: @@ -259,7 +272,11 @@ void MayEmitIfNonDefaultCheck(io::Printer* p, const std::string& prefix, $emit_body$; } )cc"); + return; } + + // If no enclosing braces need to be emitted, just emit the body directly. + emit_body(); } bool HasInternalHasMethod(const FieldDescriptor* field) { @@ -1034,7 +1051,7 @@ void MessageGenerator::GenerateSingularFieldHasBits( )cc"); return; } - if (HasHasbit(field)) { + if (GetFieldHasbitMode(field) == HasbitMode::kTrueHasbit) { auto v = p->WithVars(HasBitVars(field)); p->Emit( {Sub{"ASSUME", @@ -1244,7 +1261,8 @@ void MessageGenerator::EmitCheckAndUpdateByteSizeForField( }; if (!HasHasbit(field)) { - MayEmitIfNonDefaultCheck(p, "this_.", field, std::move(emit_body)); + MayEmitIfNonDefaultCheck(p, "this_.", field, std::move(emit_body), + /*with_enclosing_braces_always=*/true); return; } if (field->options().weak()) { @@ -1260,10 +1278,17 @@ void MessageGenerator::EmitCheckAndUpdateByteSizeForField( int has_bit_index = has_bit_indices_[field->index()]; p->Emit({{"mask", absl::StrFormat("0x%08xu", uint32_t{1} << (has_bit_index % 32))}, - {"emit_body", [&] { emit_body(); }}}, + {"check_nondefault_and_emit_body", + [&] { + // Note that it's possible that the field has explicit presence. + // In that case, nondefault check will not be emitted but + // emit_body will still be emitted. + MayEmitIfNonDefaultCheck(p, "this_.", field, std::move(emit_body), + /*with_enclosing_braces_always=*/false); + }}}, R"cc( if (cached_has_bits & $mask$) { - $emit_body$; + $check_nondefault_and_emit_body$; } )cc"); } @@ -4227,9 +4252,10 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) { } else if (field->is_optional() && !HasHasbit(field)) { // Merge semantics without true field presence: primitive fields are // merged only if non-zero (numeric) or non-empty (string). - MayEmitIfNonDefaultCheck(p, "from.", field, /*emit_body=*/[&]() { - generator.GenerateMergingCode(p); - }); + MayEmitIfNonDefaultCheck( + p, "from.", field, + /*emit_body=*/[&]() { generator.GenerateMergingCode(p); }, + /*with_enclosing_braces_always=*/true); } else if (field->options().weak() || cached_has_word_index != HasWordIndex(field)) { // Check hasbit, not using cached bits. @@ -4250,10 +4276,20 @@ void MessageGenerator::GenerateClassSpecificMergeImpl(io::Printer* p) { format("if (cached_has_bits & 0x$1$u) {\n", mask); format.Indent(); - if (check_has_byte && IsPOD(field)) { - generator.GenerateCopyConstructorCode(p); + if (GetFieldHasbitMode(field) == HasbitMode::kHintHasbit) { + // Merge semantics without true field presence: primitive fields are + // merged only if non-zero (numeric) or non-empty (string). + MayEmitIfNonDefaultCheck( + p, "from.", field, + /*emit_body=*/[&]() { generator.GenerateMergingCode(p); }, + /*with_enclosing_braces_always=*/false); } else { - generator.GenerateMergingCode(p); + ABSL_DCHECK(GetFieldHasbitMode(field) == HasbitMode::kTrueHasbit); + if (check_has_byte && IsPOD(field)) { + generator.GenerateCopyConstructorCode(p); + } else { + generator.GenerateMergingCode(p); + } } format.Outdent(); @@ -4476,7 +4512,12 @@ void MessageGenerator::GenerateSerializeOneField(io::Printer* p, if (HasHasbit(field)) { p->Emit( { - {"body", emit_body}, + {"body", + [&]() { + MayEmitIfNonDefaultCheck(p, "this_.", field, + std::move(emit_body), + /*with_enclosing_braces_always=*/false); + }}, {"cond", [&] { int has_bit_index = HasBitIndex(field); @@ -4496,7 +4537,8 @@ void MessageGenerator::GenerateSerializeOneField(io::Printer* p, } )cc"); } else if (field->is_optional()) { - MayEmitIfNonDefaultCheck(p, "this_.", field, std::move(emit_body)); + MayEmitIfNonDefaultCheck(p, "this_.", field, std::move(emit_body), + /*with_enclosing_braces_always=*/true); } else { emit_body(); } diff --git a/src/google/protobuf/descriptor.cc b/src/google/protobuf/descriptor.cc index 637921b0ce92a..2ff141059ca3f 100644 --- a/src/google/protobuf/descriptor.cc +++ b/src/google/protobuf/descriptor.cc @@ -9798,9 +9798,27 @@ bool HasPreservingUnknownEnumSemantics(const FieldDescriptor* field) { return field->enum_type() != nullptr && !field->enum_type()->is_closed(); } +HasbitMode GetFieldHasbitMode(const FieldDescriptor* field) { + // Do not generate hasbits for "real-oneof" and weak fields. + if (field->real_containing_oneof() || field->options().weak()) { + return HasbitMode::kNoHasbit; + } + + // Explicit-presence fields always have true hasbits. + if (field->has_presence()) { + return HasbitMode::kTrueHasbit; + } + + // Implicit presence fields. + if (!field->is_repeated()) { + return HasbitMode::kHintHasbit; + } + // We currently don't implement hasbits for implicit repeated fields. + return HasbitMode::kNoHasbit; +} + bool HasHasbit(const FieldDescriptor* field) { - return field->has_presence() && !field->real_containing_oneof() && - !field->options().weak(); + return GetFieldHasbitMode(field) != HasbitMode::kNoHasbit; } static bool IsVerifyUtf8(const FieldDescriptor* field, bool is_lite) { diff --git a/src/google/protobuf/descriptor.h b/src/google/protobuf/descriptor.h index 9ac74624032a5..3bc2a1b5e2f98 100644 --- a/src/google/protobuf/descriptor.h +++ b/src/google/protobuf/descriptor.h @@ -42,6 +42,7 @@ #include "google/protobuf/stubs/common.h" #include "absl/base/attributes.h" #include "absl/base/call_once.h" +#include "absl/base/optimization.h" #include "absl/container/btree_map.h" #include "absl/container/flat_hash_map.h" #include "absl/functional/any_invocable.h" @@ -2949,9 +2950,33 @@ constexpr int MaxMessageDeclarationNestingDepth() { return 32; } PROTOBUF_EXPORT bool HasPreservingUnknownEnumSemantics( const FieldDescriptor* field); -PROTOBUF_EXPORT bool HasHasbit(const FieldDescriptor* field); - #ifndef SWIG +enum class HasbitMode : uint8_t { + // Hasbits do not exist for the field. + kNoHasbit, + // Hasbits exist and indicate field presence. + // Hasbit is set if and only if field is present. + kTrueHasbit, + // Hasbits exist and "hint at" field presence. + // When hasbit is set, field is 'probably' present, but field accessors must + // still check for field presence (i.e. false positives are possible). + // When hasbit is unset, field is guaranteed to be not present. + kHintHasbit, +}; + +// Returns the "hasbit mode" of the field. Depending on the implementation, a +// field can: +// - have no hasbits in its internal object (kNoHasbit); +// - have hasbits where hasbit == 1 indicates field presence and hasbit == 0 +// indicates an unset field (kTrueHasbit); +// - have hasbits where hasbit == 1 indicates "field is possibly modified" and +// hasbit == 0 indicates "field is definitely missing" (kHintHasbit). +PROTOBUF_EXPORT HasbitMode GetFieldHasbitMode(const FieldDescriptor* field); + +// Returns true if there are hasbits for the field. +// Note that this does not correlate with "hazzer"s, i.e., whether has_foo APIs +// are emitted. +PROTOBUF_EXPORT bool HasHasbit(const FieldDescriptor* field); enum class Utf8CheckMode : uint8_t { kStrict = 0, // Parsing will fail if non UTF-8 data is in string fields. diff --git a/src/google/protobuf/descriptor_unittest.cc b/src/google/protobuf/descriptor_unittest.cc index c9a109c6691e7..347e05fec846c 100644 --- a/src/google/protobuf/descriptor_unittest.cc +++ b/src/google/protobuf/descriptor_unittest.cc @@ -20,8 +20,10 @@ #include #include #include +#include #include #include +#include #include #include #include @@ -77,7 +79,10 @@ // Must be included last. #include "google/protobuf/port_def.inc" +using ::google::protobuf::internal::cpp::GetFieldHasbitMode; using ::google::protobuf::internal::cpp::GetUtf8CheckMode; +using ::google::protobuf::internal::cpp::HasbitMode; +using ::google::protobuf::internal::cpp::HasHasbit; using ::google::protobuf::internal::cpp::HasPreservingUnknownEnumSemantics; using ::google::protobuf::internal::cpp::Utf8CheckMode; using ::testing::AnyOf; @@ -2994,6 +2999,273 @@ TEST_F(MiscTest, FieldOptions) { EXPECT_EQ(FieldOptions::CORD, bar->options().ctype()); } +// =================================================================== + +struct HasHasbitTestParam { + struct ExpectedOutput { + HasbitMode expected_hasbitmode; + bool expected_has_presence; + bool expected_has_hasbit; + }; + + std::string input_foo_proto; + ExpectedOutput expected_output; +}; + +class HasHasbitTest : public testing::TestWithParam { + protected: + void SetUp() override { + ASSERT_TRUE( + TextFormat::ParseFromString(GetParam().input_foo_proto, &foo_proto_)); + foo_ = pool_.BuildFile(foo_proto_); + } + + const FieldDescriptor* GetField() { return foo_->message_type(0)->field(0); } + + DescriptorPool pool_; + FileDescriptorProto foo_proto_; + const FileDescriptor* foo_; +}; + +TEST_P(HasHasbitTest, TestHasHasbitExplicitPresence) { + EXPECT_EQ(GetField()->has_presence(), + GetParam().expected_output.expected_has_presence); + EXPECT_EQ(GetFieldHasbitMode(GetField()), + GetParam().expected_output.expected_hasbitmode); + EXPECT_EQ(HasHasbit(GetField()), + GetParam().expected_output.expected_has_hasbit); +} + +// NOTE: with C++20 we can use designated initializers to ensure +// that struct members match commented names, but as we are still working with +// C++17 in the foreseeable future, we won't be able to refactor this for a +// while... +// https://github.com/google/oss-policies-info/blob/main/foundational-cxx-support-matrix.md +INSTANTIATE_TEST_SUITE_P( + HasHasbitLegacySyntaxTests, HasHasbitTest, + testing::Values( + // Test case: proto2 singular fields + HasHasbitTestParam{R"pb(name: 'foo.proto' + package: 'foo' + syntax: 'proto2' + message_type { + name: 'FooMessage' + field { + name: 'f' + number: 1 + type: TYPE_INT64 + label: LABEL_OPTIONAL + } + } + )pb", + /*expected_output=*/{ + /*expected_hasbitmode=*/HasbitMode::kTrueHasbit, + /*expected_has_presence=*/true, + /*expected_has_hasbit=*/true, + }}, + // Test case: proto2 repeated fields + HasHasbitTestParam{R"pb(name: 'foo.proto' + package: 'foo' + syntax: 'proto2' + message_type { + name: 'FooMessage' + field { + name: 'f' + number: 1 + type: TYPE_STRING + label: LABEL_REPEATED + } + } + )pb", + /*expected_output=*/{ + /*expected_hasbitmode=*/HasbitMode::kNoHasbit, + /*expected_has_presence=*/false, + /*expected_has_hasbit=*/false, + }}, + // Test case: proto3 singular fields + HasHasbitTestParam{R"pb(name: 'foo.proto' + package: 'foo' + syntax: 'proto3' + message_type { + name: 'FooMessage' + field { + name: 'f' + number: 1 + type: TYPE_INT64 + label: LABEL_OPTIONAL + } + } + )pb", + /*expected_output=*/{ + /*expected_hasbitmode=*/HasbitMode::kHintHasbit, + /*expected_has_presence=*/false, + /*expected_has_hasbit=*/true, + }}, + // Test case: proto3 optional fields + HasHasbitTestParam{ + R"pb(name: 'foo.proto' + package: 'foo' + syntax: 'proto3' + message_type { + name: 'Foo' + field { + name: 'int_field' + number: 1 + type: TYPE_INT32 + label: LABEL_OPTIONAL + oneof_index: 0 + proto3_optional: true + } + oneof_decl { name: '_int_field' } + } + )pb", + /*expected_output=*/{ + /*expected_hasbitmode=*/HasbitMode::kTrueHasbit, + /*expected_has_presence=*/true, + /*expected_has_hasbit=*/true, + }}, + // Test case: proto3 repeated fields + HasHasbitTestParam{R"pb(name: 'foo.proto' + package: 'foo' + syntax: 'proto3' + message_type { + name: 'FooMessage' + field { + name: 'f' + number: 1 + type: TYPE_STRING + label: LABEL_REPEATED + } + } + )pb", + /*expected_output=*/{ + /*expected_hasbitmode=*/HasbitMode::kNoHasbit, + /*expected_has_presence=*/false, + /*expected_has_hasbit=*/false, + }})); + +// NOTE: with C++20 we can use designated initializers to ensure +// that struct members match commented names, but as we are still working with +// C++17 in the foreseeable future, we won't be able to refactor this for a +// while... +// https://github.com/google/oss-policies-info/blob/main/foundational-cxx-support-matrix.md +INSTANTIATE_TEST_SUITE_P( + HasHasbitEditionsTests, HasHasbitTest, + testing::Values( + // Test case: explicit-presence, singular fields + HasHasbitTestParam{ + R"pb(name: 'foo.proto' + package: 'foo' + syntax: 'editions' + edition: EDITION_2023 + message_type { + name: 'FooMessage' + field { + name: 'f' + number: 1 + type: TYPE_INT64 + options { features { field_presence: EXPLICIT } } + } + } + )pb", + /*expected_output=*/{ + /*expected_hasbitmode=*/HasbitMode::kTrueHasbit, + /*expected_has_presence=*/true, + /*expected_has_hasbit=*/true, + }}, + // Test case: implicit-presence, singular fields + HasHasbitTestParam{ + R"pb(name: 'foo.proto' + package: 'foo' + syntax: 'editions' + edition: EDITION_2023 + message_type { + name: 'FooMessage' + field { + name: 'f' + number: 1 + type: TYPE_INT64 + options { features { field_presence: IMPLICIT } } + } + } + )pb", + /*expected_output=*/{ + /*expected_hasbitmode=*/HasbitMode::kHintHasbit, + /*expected_has_presence=*/false, + /*expected_has_hasbit=*/true, + }}, + // Test case: oneof fields. + // Note that oneof fields can't specify field presence. + HasHasbitTestParam{ + R"pb(name: 'foo.proto' + package: 'foo' + syntax: 'editions' + edition: EDITION_2023 + message_type { + name: 'FooMessage' + field { + name: 'f' + number: 1 + type: TYPE_STRING + oneof_index: 0 + } + oneof_decl { name: "onebar" } + } + )pb", + /*expected_output=*/{ + /*expected_hasbitmode=*/HasbitMode::kNoHasbit, + /*expected_has_presence=*/true, + /*expected_has_hasbit=*/false, + }}, + // Test case: message fields. + // Note that message fields cannot specify implicit presence. + HasHasbitTestParam{ + R"pb(name: 'foo.proto' + package: 'foo' + syntax: 'editions' + edition: EDITION_2023 + message_type { + name: 'FooMessage' + field { + name: 'f' + number: 1 + type: TYPE_MESSAGE + type_name: "Bar" + } + } + message_type { + name: 'Bar' + field { name: 'int_field' number: 1 type: TYPE_INT32 } + } + )pb", + /*expected_output=*/{ + /*expected_hasbitmode=*/HasbitMode::kTrueHasbit, + /*expected_has_presence=*/true, + /*expected_has_hasbit=*/true, + }}, + // Test case: repeated fields. + // Note that repeated fields can't specify presence. + HasHasbitTestParam{R"pb(name: 'foo.proto' + package: 'foo' + syntax: 'editions' + edition: EDITION_2023 + message_type { + name: 'FooMessage' + field { + name: 'f' + number: 1 + type: TYPE_STRING + label: LABEL_REPEATED + } + } + )pb", + /*expected_output=*/{ + /*expected_hasbitmode=*/HasbitMode::kNoHasbit, + /*expected_has_presence=*/false, + /*expected_has_hasbit=*/false, + }})); + + // =================================================================== enum DescriptorPoolMode { NO_DATABASE, FALLBACK_DATABASE }; diff --git a/src/google/protobuf/dynamic_message.cc b/src/google/protobuf/dynamic_message.cc index 43a00ad800cd1..5c403716d6719 100644 --- a/src/google/protobuf/dynamic_message.cc +++ b/src/google/protobuf/dynamic_message.cc @@ -53,6 +53,7 @@ #include "absl/base/attributes.h" #include "absl/hash/hash.h" #include "absl/log/absl_check.h" +#include "absl/log/absl_log.h" #include "absl/types/variant.h" #include "absl/utility/utility.h" #include "google/protobuf/arenastring.h" @@ -465,6 +466,11 @@ namespace { bool IsMapFieldInApi(const FieldDescriptor* field) { return field->is_map(); } +bool IsMapEntryField(const FieldDescriptor* field) { + return (field->containing_type() != nullptr && + field->containing_type()->options().map_entry()); +} + inline bool InRealOneof(const FieldDescriptor* field) { return field->real_containing_oneof() != nullptr; @@ -1086,7 +1092,43 @@ const Message* DynamicMessageFactory::GetPrototypeNoLock( type_info->has_bits_offset = -1; int max_hasbit = 0; for (int i = 0; i < type->field_count(); i++) { - if (internal::cpp::HasHasbit(type->field(i))) { + const FieldDescriptor* field = type->field(i); + + // If a field has hasbits, it could be either an explicit-presence or + // implicit-presence field. Explicit presence fields will have "true + // hasbits" where hasbit is set iff field is present. Implicit presence + // fields will have "hint hasbits" where + // - if hasbit is unset, field is not present. + // - if hasbit is set, field is present if it is also nonempty. + if (internal::cpp::HasHasbit(field)) { + // TODO: b/112602698 - during Python textproto serialization, MapEntry + // messages may be generated from DynamicMessage on the fly. C++ + // implementations of MapEntry messages always have hasbits, but + // has_presence return values might be different depending on how field + // presence is set. For MapEntrys, has_presence returns true for + // explicit-presence (proto2) messages and returns false for + // implicit-presence (proto3) messages. + // + // In the case of implicit presence, there is a potential inconsistency in + // code behavior between C++ and Python: + // - If C++ implementation is linked, hasbits are always generated for + // MapEntry messages, and MapEntry messages will behave like explicit + // presence. + // - If C++ implementation is not linked, Python defaults to the + // DynamicMessage implementation for MapEntrys which traditionally does + // not assume the presence of hasbits, so the default Python behavior + // for MapEntry messages (by default C++ implementations are not linked) + // will fall back to the DynamicMessage implementation and behave like + // implicit presence. + // This is an inconsistency and this if-condition preserves it. + // + // Longer term, we want to get rid of this additional if-check of + // IsMapEntryField. It might take one or more breaking changes and more + // consensus gathering & clarification though. + if (!field->has_presence() && IsMapEntryField(field)) { + continue; + } + if (type_info->has_bits_offset == -1) { // At least one field in the message requires a hasbit, so allocate // hasbits. diff --git a/src/google/protobuf/generated_message_reflection.cc b/src/google/protobuf/generated_message_reflection.cc index baf77078f662c..6ce9db5684fd3 100644 --- a/src/google/protobuf/generated_message_reflection.cc +++ b/src/google/protobuf/generated_message_reflection.cc @@ -31,6 +31,7 @@ #include "absl/strings/str_format.h" #include "absl/strings/string_view.h" #include "absl/synchronization/mutex.h" +#include "google/protobuf/arena.h" #include "google/protobuf/descriptor.h" #include "google/protobuf/descriptor.pb.h" #include "google/protobuf/descriptor_lite.h" @@ -79,6 +80,11 @@ namespace protobuf { namespace { bool IsMapFieldInApi(const FieldDescriptor* field) { return field->is_map(); } +bool IsMapEntry(const FieldDescriptor* field) { + return (field->containing_type() != nullptr && + field->containing_type()->options().map_entry()); +} + Message* MaybeForceCopy(Arena* arena, Message* msg) { if (arena != nullptr || msg == nullptr) return msg; @@ -1178,7 +1184,7 @@ void Reflection::SwapFieldsImpl( // oneof already. This has to be done after SwapField, because SwapField // may depend on the information in has bits. if (!field->is_repeated()) { - SwapHasBit(message1, message2, field); + NaiveSwapHasBit(message1, message2, field); if (field->cpp_type() == FieldDescriptor::CPPTYPE_STRING && field->cpp_string_type() == FieldDescriptor::CppStringType::kString && @@ -1749,7 +1755,8 @@ void Reflection::ListFields(const Message& message, } } else if (has_bits && has_bits_indices[i] != static_cast(-1)) { // Equivalent to: HasFieldSingular(message, field) - if (IsIndexInHasBitSet(has_bits, has_bits_indices[i])) { + if (IsFieldPresentGivenHasbits(message, field, has_bits, + has_bits_indices[i])) { append_to_output(field); } } else if (HasFieldSingular(message, field)) { @@ -2989,74 +2996,127 @@ void Reflection::SwapInlinedStringDonated(Message* lhs, Message* rhs, } } -// Simple accessors for manipulating has_bits_. +bool Reflection::IsSingularFieldNonEmpty(const Message& message, + const FieldDescriptor* field) const { + ABSL_DCHECK(IsMapEntry(field) || !field->has_presence()); + ABSL_DCHECK(!field->is_repeated()); + ABSL_DCHECK(!field->is_map()); + ABSL_DCHECK(field->cpp_type() != FieldDescriptor::CPPTYPE_MESSAGE); + // Scalar primitive (numeric or string/bytes) fields are present if + // their value is non-zero (numeric) or non-empty (string/bytes). N.B.: + // we must use this definition here, rather than the "scalar fields + // always present" in the proto3 docs, because MergeFrom() semantics + // require presence as "present on wire", and reflection-based merge + // (which uses HasField()) needs to be consistent with this. + switch (field->cpp_type()) { + case FieldDescriptor::CPPTYPE_BOOL: + return GetRaw(message, field) != false; + case FieldDescriptor::CPPTYPE_INT32: + return GetRaw(message, field) != 0; + case FieldDescriptor::CPPTYPE_INT64: + return GetRaw(message, field) != 0; + case FieldDescriptor::CPPTYPE_UINT32: + return GetRaw(message, field) != 0; + case FieldDescriptor::CPPTYPE_UINT64: + return GetRaw(message, field) != 0; + case FieldDescriptor::CPPTYPE_FLOAT: + static_assert(sizeof(uint32_t) == sizeof(float), + "Code assumes uint32_t and float are the same size."); + return GetRaw(message, field) != 0; + case FieldDescriptor::CPPTYPE_DOUBLE: + static_assert(sizeof(uint64_t) == sizeof(double), + "Code assumes uint64_t and double are the same size."); + return GetRaw(message, field) != 0; + case FieldDescriptor::CPPTYPE_ENUM: + return GetRaw(message, field) != 0; + case FieldDescriptor::CPPTYPE_STRING: + switch (field->cpp_string_type()) { + case FieldDescriptor::CppStringType::kCord: + return !GetField(message, field).empty(); + case FieldDescriptor::CppStringType::kView: + case FieldDescriptor::CppStringType::kString: { + if (IsInlined(field)) { + return !GetField(message, field) + .GetNoArena() + .empty(); + } + + return !GetField(message, field).Get().empty(); + } + default: + internal::Unreachable(); + } + case FieldDescriptor::CPPTYPE_MESSAGE: + default: + internal::Unreachable(); + } +} + +bool Reflection::IsFieldPresentGivenHasbits(const Message& message, + const FieldDescriptor* field, + const uint32_t* hasbits, + uint32_t hasbit_index) const { + // If hasbit exists but is not set, field is guaranteed to be missing. + if (!IsIndexInHasBitSet(hasbits, hasbit_index)) { + return false; + } + + // For explicit-presence fields, a set hasbit indicates a present field. + if (field->has_presence()) { + return true; + } + + // proto3: hasbits are present, but an additional zero check must be + // performed because hasbit can be set to true while field is zero. + + // Repeated fields do not have hasbits enabled in proto3. + ABSL_DCHECK(!field->is_repeated()) + << "repeated fields do not have hasbits in proto3."; + + // Handling map entries in proto3: + // Implicit presence map fields are represented as a native C++ map, but their + // corresponding MapEntry messages (e.g. if we want to access them as repeated + // MapEntry fields) will unconditionally be generated with hasbits. MapEntrys + // behave like explicit presence fields. That is, in MapEntry's C++ + // implementation... + // - key can be null, empty, or nonempty; + // - value can be null, empty, or nonempty. + if (IsMapEntry(field)) { + return true; + } + + // This is the vanilla case: for a non-repeated primitive or string field, + // returns if the field is nonzero (i.e. present in proto3 semantics). + return IsSingularFieldNonEmpty(message, field); +} + bool Reflection::HasFieldSingular(const Message& message, const FieldDescriptor* field) const { ABSL_DCHECK(!field->options().weak()); if (schema_.HasBitIndex(field) != static_cast(-1)) { - return IsIndexInHasBitSet(GetHasBits(message), schema_.HasBitIndex(field)); + return IsFieldPresentGivenHasbits(message, field, GetHasBits(message), + schema_.HasBitIndex(field)); } - // proto3: no has-bits. All fields present except messages, which are + // The python implementation traditionally assumes that proto3 messages don't + // have hasbits. As a result, proto3 objects created through dynamic message + // in Python won't have hasbits. We need the following code to preserve + // compatibility. + // NOTE: It would be nice to be able to remove it, but we need one + // or more breaking changes in order to do so. + // + // proto3 with no has-bits. All fields present except messages, which are // present only if their message-field pointer is non-null. if (field->cpp_type() == FieldDescriptor::CPPTYPE_MESSAGE) { return !schema_.IsDefaultInstance(message) && GetRaw(message, field) != nullptr; - } else { - // Non-message field (and non-oneof, since that was handled in HasField() - // before calling us), and singular (again, checked in HasField). So, this - // field must be a scalar. - - // Scalar primitive (numeric or string/bytes) fields are present if - // their value is non-zero (numeric) or non-empty (string/bytes). N.B.: - // we must use this definition here, rather than the "scalar fields - // always present" in the proto3 docs, because MergeFrom() semantics - // require presence as "present on wire", and reflection-based merge - // (which uses HasField()) needs to be consistent with this. - switch (field->cpp_type()) { - case FieldDescriptor::CPPTYPE_STRING: - switch (field->cpp_string_type()) { - case FieldDescriptor::CppStringType::kCord: - return !GetField(message, field).empty(); - case FieldDescriptor::CppStringType::kView: - case FieldDescriptor::CppStringType::kString: { - if (IsInlined(field)) { - return !GetField(message, field) - .GetNoArena() - .empty(); - } - - return GetField(message, field).Get().size() > 0; - } - } - internal::Unreachable(); - case FieldDescriptor::CPPTYPE_BOOL: - return GetRaw(message, field) != false; - case FieldDescriptor::CPPTYPE_INT32: - return GetRaw(message, field) != 0; - case FieldDescriptor::CPPTYPE_INT64: - return GetRaw(message, field) != 0; - case FieldDescriptor::CPPTYPE_UINT32: - return GetRaw(message, field) != 0; - case FieldDescriptor::CPPTYPE_UINT64: - return GetRaw(message, field) != 0; - case FieldDescriptor::CPPTYPE_FLOAT: - static_assert(sizeof(uint32_t) == sizeof(float), - "Code assumes uint32_t and float are the same size."); - return GetRaw(message, field) != 0; - case FieldDescriptor::CPPTYPE_DOUBLE: - static_assert(sizeof(uint64_t) == sizeof(double), - "Code assumes uint64_t and double are the same size."); - return GetRaw(message, field) != 0; - case FieldDescriptor::CPPTYPE_ENUM: - return GetRaw(message, field) != 0; - case FieldDescriptor::CPPTYPE_MESSAGE: - // handled above; avoid warning - break; - } - ABSL_LOG(FATAL) << "Reached impossible case in HasFieldSingular()."; - return false; } + + // Non-message field (and non-oneof, since that was handled in HasField() + // before calling us), and singular (again, checked in HasField). So, this + // field must be a scalar. + + return IsSingularFieldNonEmpty(message, field); } void Reflection::SetHasBit(Message* message, @@ -3077,23 +3137,31 @@ void Reflection::ClearHasBit(Message* message, ~(static_cast(1) << (index % 32)); } -void Reflection::SwapHasBit(Message* message1, Message* message2, - const FieldDescriptor* field) const { +void Reflection::NaiveSwapHasBit(Message* message1, Message* message2, + const FieldDescriptor* field) const { ABSL_DCHECK(!field->options().weak()); if (!schema_.HasHasbits()) { return; } - bool temp_is_present = HasFieldSingular(*message1, field); - if (HasFieldSingular(*message2, field)) { - SetHasBit(message1, field); - } else { - ClearHasBit(message1, field); - } - if (temp_is_present) { + const Reflection* r1 = message1->GetReflection(); + const Reflection* r2 = message2->GetReflection(); + + bool is_m1_hasbit_set = IsIndexInHasBitSet(r1->GetHasBits(*message1), + r1->schema_.HasBitIndex(field)); + bool is_m2_hasbit_set = IsIndexInHasBitSet(r2->GetHasBits(*message2), + r2->schema_.HasBitIndex(field)); + + if (is_m1_hasbit_set) { SetHasBit(message2, field); } else { ClearHasBit(message2, field); } + + if (is_m2_hasbit_set) { + SetHasBit(message1, field); + } else { + ClearHasBit(message1, field); + } } bool Reflection::HasOneof(const Message& message, diff --git a/src/google/protobuf/generated_message_reflection.h b/src/google/protobuf/generated_message_reflection.h index aa75cd0a9511d..ef08dc77de163 100644 --- a/src/google/protobuf/generated_message_reflection.h +++ b/src/google/protobuf/generated_message_reflection.h @@ -148,6 +148,10 @@ struct ReflectionSchema { sizeof(uint32_t)); } + // Returns true iff the field object has usable hasbit offset. + // Note that this is not necessarily correlated with *field presence* : + // Fields with implicit presence (i.e. ones that don't expose has_foo API) + // can still have hasbits in their underlying implementation. bool HasHasbits() const { return has_bits_offset_ != -1; } // Bit index within the bit array of hasbits. Bit order is low-to-high. diff --git a/src/google/protobuf/message.h b/src/google/protobuf/message.h index 92edb721bdcc6..ee79157d12bcb 100644 --- a/src/google/protobuf/message.h +++ b/src/google/protobuf/message.h @@ -1234,6 +1234,17 @@ class PROTOBUF_EXPORT Reflection final { return schema_.IsFieldInlined(field); } + // For "proto3 non-optional" primitive fields, aka implicit-presence fields, + // returns true if the field is populated, i.e., nonzero. False otherwise. + bool IsSingularFieldNonEmpty(const Message& message, + const FieldDescriptor* field) const; + // Returns whether the field is present if there are usable hasbits in the + // field schema. (Note that in some cases hasbits are merely a hint to + // indicate "possible presence", and another empty-check is required). + bool IsFieldPresentGivenHasbits(const Message& message, + const FieldDescriptor* field, + const uint32_t* hasbits, + uint32_t hasbit_index) const; // Returns true if the field is considered to be present. // Requires the input to be 'singular' i.e. non-extension, non-oneof, non-weak // field. @@ -1243,8 +1254,14 @@ class PROTOBUF_EXPORT Reflection final { const FieldDescriptor* field) const; void SetHasBit(Message* message, const FieldDescriptor* field) const; inline void ClearHasBit(Message* message, const FieldDescriptor* field) const; - inline void SwapHasBit(Message* message1, Message* message2, - const FieldDescriptor* field) const; + // Naively swaps the hasbit without checking for field existence. + // For explicit presence fields, the hasbit is swapped normally. + // For implicit presence fields, the hasbit is swapped without checking for + // field emptiness. That is, the destination message may have hasbit set even + // if the field is empty. This should still result in correct behaviour due to + // HasbitMode being set to kHintHasbits for implicit presence fields. + inline void NaiveSwapHasBit(Message* message1, Message* message2, + const FieldDescriptor* field) const; inline const uint32_t* GetInlinedStringDonatedArray( const Message& message) const;