From d0e49dfe3161714046a4ee2aeafba133b171e26f Mon Sep 17 00:00:00 2001 From: Mike Kruskal Date: Thu, 18 Jul 2024 10:41:30 -0700 Subject: [PATCH] Introduce FieldDescriptor::cpp_string_type() API to replace direct ctype inspection which will be removed in the next breaking change This should provide the roughly same result as ctype, except that it reflects actual behavior rather than the specification in the proto file. VIEW will now be visible, and some subtleties around CORD and PIECE will change. PiperOrigin-RevId: 653677655 --- src/google/protobuf/descriptor.cc | 23 +++++++ src/google/protobuf/descriptor.h | 19 +++--- src/google/protobuf/descriptor_lite.h | 11 ++++ src/google/protobuf/descriptor_unittest.cc | 70 ++++++++++++++++++++++ 4 files changed, 115 insertions(+), 8 deletions(-) diff --git a/src/google/protobuf/descriptor.cc b/src/google/protobuf/descriptor.cc index ed5f6af920f1f..4561431797bfa 100644 --- a/src/google/protobuf/descriptor.cc +++ b/src/google/protobuf/descriptor.cc @@ -3939,6 +3939,29 @@ bool FieldDescriptor::has_optional_keyword() const { is_optional() && !containing_oneof()); } +FieldDescriptor::CppStringType FieldDescriptor::cpp_string_type() const { + ABSL_DCHECK(cpp_type() == FieldDescriptor::CPPTYPE_STRING); + switch (features().GetExtension(pb::cpp).string_type()) { + case pb::CppFeatures::VIEW: + return CppStringType::kView; + case pb::CppFeatures::CORD: + // In open-source, protobuf CORD is only supported for singular bytes + // fields. + if (type() != FieldDescriptor::TYPE_BYTES || is_repeated() || + is_extension()) { + return CppStringType::kString; + } + return CppStringType::kCord; + case pb::CppFeatures::STRING: + return CppStringType::kString; + default: + // If features haven't been resolved, this is a dynamic build not for C++ + // codegen. Just use string type. + ABSL_DCHECK(!features().GetExtension(pb::cpp).has_string_type()); + return CppStringType::kString; + } +} + // Location methods =============================================== bool FileDescriptor::GetSourceLocation(const std::vector& path, diff --git a/src/google/protobuf/descriptor.h b/src/google/protobuf/descriptor.h index 28db34d95ee61..08a3d5c510279 100644 --- a/src/google/protobuf/descriptor.h +++ b/src/google/protobuf/descriptor.h @@ -875,6 +875,10 @@ class PROTOBUF_EXPORT FieldDescriptor : private internal::SymbolBase, const char* cpp_type_name() const; // Name of the C++ type. Label label() const; // optional/required/repeated +#ifndef SWIG + CppStringType cpp_string_type() const; // The C++ string type of this field. +#endif + bool is_required() const; // shorthand for label() == LABEL_REQUIRED bool is_optional() const; // shorthand for label() == LABEL_OPTIONAL bool is_repeated() const; // shorthand for label() == LABEL_REPEATED @@ -2932,22 +2936,21 @@ PROTOBUF_EXPORT bool HasPreservingUnknownEnumSemantics( PROTOBUF_EXPORT bool HasHasbit(const FieldDescriptor* field); +#ifndef SWIG // For a string field, returns the effective ctype. If the actual ctype is // not supported, returns the default of STRING. template typename FieldOpts::CType EffectiveStringCType(const FieldDesc* field) { - ABSL_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING); - // Open-source protobuf release only supports STRING ctype and CORD for - // sinuglar bytes. - if (field->type() == FieldDescriptor::TYPE_BYTES && !field->is_repeated() && - field->options().ctype() == FieldOpts::CORD && !field->is_extension()) { - return FieldOpts::CORD; + // TODO Replace this function with FieldDescriptor::string_type; + switch (field->cpp_string_type()) { + case FieldDescriptor::CppStringType::kCord: + return FieldOpts::CORD; + default: + return FieldOpts::STRING; } - return FieldOpts::STRING; } -#ifndef SWIG enum class Utf8CheckMode : uint8_t { kStrict = 0, // Parsing will fail if non UTF-8 data is in string fields. kVerify = 1, // Only log an error but parsing will succeed. diff --git a/src/google/protobuf/descriptor_lite.h b/src/google/protobuf/descriptor_lite.h index db5805affee3b..e1a90b7fa60d5 100644 --- a/src/google/protobuf/descriptor_lite.h +++ b/src/google/protobuf/descriptor_lite.h @@ -78,6 +78,17 @@ class FieldDescriptorLite { MAX_LABEL = 3, // Constant useful for defining lookup tables // indexed by Label. }; + + // Identifies the storage type of a C++ string field. This corresponds to + // pb.CppFeatures.StringType, but is compatible with ctype prior to Edition + // 2024. 0 is reserved for errors. +#ifndef SWIG + enum class CppStringType { + kView = 1, + kCord = 2, + kString = 3, + }; +#endif }; } // namespace internal diff --git a/src/google/protobuf/descriptor_unittest.cc b/src/google/protobuf/descriptor_unittest.cc index 0c13b3a2b46e4..f4d3137099f98 100644 --- a/src/google/protobuf/descriptor_unittest.cc +++ b/src/google/protobuf/descriptor_unittest.cc @@ -7594,6 +7594,9 @@ TEST_F(FeaturesTest, Proto2Features) { EXPECT_TRUE(message->FindFieldByName("req")->is_required()); EXPECT_TRUE(file->enum_type(0)->is_closed()); + EXPECT_EQ(message->FindFieldByName("str")->cpp_string_type(), + FieldDescriptor::CppStringType::kString); + // Check round-trip consistency. FileDescriptorProto proto; file->CopyTo(&proto); @@ -9710,6 +9713,73 @@ TEST_F(FeaturesTest, EnumFeatureHelpers) { EXPECT_FALSE(HasPreservingUnknownEnumSemantics(field_legacy_closed)); } +TEST_F(FeaturesTest, FieldCppStringType) { + BuildDescriptorMessagesInTestPool(); + const std::string file_contents = absl::Substitute( + R"pb( + name: "foo.proto" + syntax: "editions" + edition: EDITION_2024 + message_type { + name: "Foo" + field { + name: "view" + number: 1 + label: LABEL_OPTIONAL + type: TYPE_STRING + } + field { + name: "str" + number: 2 + label: LABEL_OPTIONAL + type: TYPE_STRING + options { + features { + [pb.cpp] { string_type: STRING } + } + } + } + field { + name: "cord" + number: 3 + label: LABEL_OPTIONAL + type: TYPE_STRING + options { + features { + [pb.cpp] { string_type: CORD } + } + } + } + field { + name: "cord_bytes" + number: 4 + label: LABEL_OPTIONAL + type: TYPE_BYTES + options { + features { + [pb.cpp] { string_type: CORD } + } + } + } $0 + } + )pb", + "" + ); + const FileDescriptor* file = BuildFile(file_contents); + const Descriptor* message = file->message_type(0); + const FieldDescriptor* view = message->field(0); + const FieldDescriptor* str = message->field(1); + const FieldDescriptor* cord = message->field(2); + const FieldDescriptor* cord_bytes = message->field(3); + + EXPECT_EQ(view->cpp_string_type(), FieldDescriptor::CppStringType::kView); + EXPECT_EQ(str->cpp_string_type(), FieldDescriptor::CppStringType::kString); + EXPECT_EQ(cord_bytes->cpp_string_type(), + FieldDescriptor::CppStringType::kCord); + EXPECT_EQ(cord->cpp_string_type(), FieldDescriptor::CppStringType::kString); + +} + TEST_F(FeaturesTest, MergeFeatureValidationFailed) { BuildDescriptorMessagesInTestPool(); BuildFileInTestPool(pb::TestFeatures::descriptor()->file());