From 06a520cdb78be66f506b88a8adba2763f2301dc0 Mon Sep 17 00:00:00 2001 From: Tony Liao Date: Tue, 30 Jul 2024 14:09:12 -0700 Subject: [PATCH] Add wire format parsing unit tests for implicit presence fields. This gist of the new test case coverage can be summarized as: - The wire does not distinguish between explicit and implicit fields. - For implicit presence fields, zeros on the wire can overwrite nonzero values (i.e. they are treated as a 'clear' operation). It's TBD whether we want to accept this behaviour going forward. Right now we are leaning towards keeping this behaviour, because: - If we receive zeros on the wire for implicit-presence fields, the protobuf wire format is "wrong" anyway. Well-behaved code should never generate zeros on the wire for implicit presence fields or serialize the same field multiple times. - There might be some value to enforce that "anything on the wire is accepted". This can make handling of wire format simpler. There are some drawbacks with this approach: - It might be somewhat surprising for users that zeros on the wire are always read, even for implicit-presence fields. - It might make the transition from implicit-presence to explicit-presence harder (or more unsafe) if user wants to migrate. - It leads to an inconsistency between what it means to "Merge". - Merging from a constructed object, with implicit presence and with field set to zero, will not overwrite. - Merging from the wire, with implicit presence and with zero explicitly present on the wire, WILL overwrite. I still need to add conformance tests to ensure that this is a consistent behavior across all languages, but for now let's at least add some coverage in C++ to ensure that this is a tested behaviour. PiperOrigin-RevId: 657724599 --- src/google/protobuf/BUILD.bazel | 2 + src/google/protobuf/no_field_presence_test.cc | 73 +++++++++++++++++++ .../protobuf/unittest_no_field_presence.proto | 8 +- 3 files changed, 82 insertions(+), 1 deletion(-) diff --git a/src/google/protobuf/BUILD.bazel b/src/google/protobuf/BUILD.bazel index 0321694748318..82a2dc6ea68d2 100644 --- a/src/google/protobuf/BUILD.bazel +++ b/src/google/protobuf/BUILD.bazel @@ -1657,6 +1657,8 @@ cc_test( deps = [ ":cc_test_protos", ":protobuf", + "@com_google_absl//absl/log:absl_check", + "@com_google_absl//absl/strings:string_view", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", ], diff --git a/src/google/protobuf/no_field_presence_test.cc b/src/google/protobuf/no_field_presence_test.cc index 56586731c75da..a5192ca5028a9 100644 --- a/src/google/protobuf/no_field_presence_test.cc +++ b/src/google/protobuf/no_field_presence_test.cc @@ -8,7 +8,10 @@ #include #include "google/protobuf/descriptor.pb.h" +#include #include +#include "absl/log/absl_check.h" +#include "absl/strings/string_view.h" #include "google/protobuf/descriptor.h" #include "google/protobuf/unittest.pb.h" #include "google/protobuf/unittest_no_field_presence.pb.h" @@ -17,6 +20,8 @@ namespace google { namespace protobuf { namespace { +using ::testing::StrEq; + // Helper: checks that all fields have default (zero/empty) values. void CheckDefaultValues( const proto2_nofieldpresence_unittest::TestAllTypes& m) { @@ -457,6 +462,74 @@ TEST(NoFieldPresenceTest, MergeFromIfNonzeroTest) { EXPECT_EQ("test2", dest.optional_string()); } +TEST(NoFieldPresenceTest, ExtraZeroesInWireParseTest) { + // check extra serialized zeroes on the wire are parsed into the object. + proto2_nofieldpresence_unittest::ForeignMessage dest; + dest.set_c(42); + ASSERT_EQ(42, dest.c()); + + // ExplicitForeignMessage has the same fields as ForeignMessage, but with + // explicit presence instead of implicit presence. + proto2_nofieldpresence_unittest::ExplicitForeignMessage source; + source.set_c(0); + std::string wire = source.SerializeAsString(); + ASSERT_THAT(wire, StrEq(absl::string_view{"\x08\x00", 2})); + + // The "parse" operation clears all fields before merging from wire. + ASSERT_TRUE(dest.ParseFromString(wire)); + EXPECT_EQ(0, dest.c()); + std::string dest_data; + EXPECT_TRUE(dest.SerializeToString(&dest_data)); + EXPECT_TRUE(dest_data.empty()); +} + +TEST(NoFieldPresenceTest, ExtraZeroesInWireMergeTest) { + // check explicit zeros on the wire are merged into an implicit one. + proto2_nofieldpresence_unittest::ForeignMessage dest; + dest.set_c(42); + ASSERT_EQ(42, dest.c()); + + // ExplicitForeignMessage has the same fields as ForeignMessage, but with + // explicit presence instead of implicit presence. + proto2_nofieldpresence_unittest::ExplicitForeignMessage source; + source.set_c(0); + std::string wire = source.SerializeAsString(); + ASSERT_THAT(wire, StrEq(absl::string_view{"\x08\x00", 2})); + + // TODO: b/356132170 -- Add conformance tests to ensure this behaviour is + // well-defined. + // As implemented, the C++ "merge" operation does not distinguish between + // implicit and explicit fields when reading from the wire. + ASSERT_TRUE(dest.MergeFromString(wire)); + // If zero is present on the wire, the original value is overwritten, even + // though this is specified as an "implicit presence" field. + EXPECT_EQ(0, dest.c()); + std::string dest_data; + EXPECT_TRUE(dest.SerializeToString(&dest_data)); + EXPECT_TRUE(dest_data.empty()); +} + +TEST(NoFieldPresenceTest, ExtraZeroesInWireLastWins) { + // check that, when the same field is present multiple times on the wire, we + // always take the last one -- even if it is a zero. + + absl::string_view wire{"\x08\x01\x08\x00", /*len=*/4}; // note the null-byte. + proto2_nofieldpresence_unittest::ForeignMessage dest; + + // TODO: b/356132170 -- Add conformance tests to ensure this behaviour is + // well-defined. + // As implemented, the C++ "merge" operation does not distinguish between + // implicit and explicit fields when reading from the wire. + ASSERT_TRUE(dest.MergeFromString(wire)); + // If the same field is present multiple times on the wire, "last one wins". + // i.e. -- the last seen field content will always overwrite, even if it's + // zero and the field is implicit presence. + EXPECT_EQ(0, dest.c()); + std::string dest_data; + EXPECT_TRUE(dest.SerializeToString(&dest_data)); + EXPECT_TRUE(dest_data.empty()); +} + TEST(NoFieldPresenceTest, IsInitializedTest) { // Check that IsInitialized works properly. proto2_nofieldpresence_unittest::TestProto2Required message; diff --git a/src/google/protobuf/unittest_no_field_presence.proto b/src/google/protobuf/unittest_no_field_presence.proto index f6880a58e9331..cc02acc913c0e 100644 --- a/src/google/protobuf/unittest_no_field_presence.proto +++ b/src/google/protobuf/unittest_no_field_presence.proto @@ -92,7 +92,7 @@ message TestAllTypes { repeated string repeated_string_piece = 54 [ctype = STRING_PIECE]; repeated string repeated_cord = 55 [ctype = CORD]; - repeated NestedMessage repeated_lazy_message = 57 ; + repeated NestedMessage repeated_lazy_message = 57; oneof oneof_field { uint32 oneof_uint32 = 111; @@ -112,6 +112,12 @@ message ForeignMessage { int32 c = 1; } +// Same as ForeignMessage, but all fields have explicit presence. +// It can be useful for testing explicit-implicit presence interop behaviour. +message ExplicitForeignMessage { + int32 c = 1 [features.field_presence = EXPLICIT]; +} + enum ForeignEnum { FOREIGN_FOO = 0; FOREIGN_BAR = 1;