Skip to content

Commit

Permalink
Add name mangling to nested names that collide with known generated n…
Browse files Browse the repository at this point in the history
…ames, like

the `New` function under messages.

This allows C++ codegen to compile on .proto files where it did not work
before.

PiperOrigin-RevId: 691852276
  • Loading branch information
protobuf-github-bot authored and copybara-github committed Oct 31, 2024
1 parent f27532d commit f2cf85c
Show file tree
Hide file tree
Showing 7 changed files with 225 additions and 24 deletions.
6 changes: 5 additions & 1 deletion src/google/protobuf/compiler/cpp/enum.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,11 @@ absl::flat_hash_map<absl::string_view, std::string> EnumVars(
return {
{"DEPRECATED", enum_->options().deprecated() ? "[[deprecated]]" : ""},
{"Enum", std::string(enum_->name())},
{"Enum_", ResolveKeyword(enum_->name())},
{"Enum_", ResolveKnownNameCollisions(enum_->name(),
enum_->containing_type() != nullptr
? NameContext::kMessage
: NameContext::kFile,
NameKind::kType)},
{"Msg_Enum", classname},
{"::Msg_Enum", QualifiedClassName(enum_, options)},
{"Msg_Enum_",
Expand Down
6 changes: 5 additions & 1 deletion src/google/protobuf/compiler/cpp/extension.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ ExtensionGenerator::ExtensionGenerator(const FieldDescriptor* descriptor,
variables_["extendee"] =
QualifiedClassName(descriptor_->containing_type(), options_);
variables_["type_traits"] = type_traits_;
variables_["name"] = ResolveKeyword(descriptor_->name());
variables_["name"] = ResolveKnownNameCollisions(
descriptor_->name(),
descriptor_->extension_scope() != nullptr ? NameContext::kMessage
: NameContext::kFile,
NameKind::kValue);
variables_["constant_name"] = FieldConstantName(descriptor_);
variables_["field_type"] =
absl::StrCat(static_cast<int>(descriptor_->type()));
Expand Down
124 changes: 103 additions & 21 deletions src/google/protobuf/compiler/cpp/helpers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,50 @@ namespace {
constexpr absl::string_view kAnyMessageName = "Any";
constexpr absl::string_view kAnyProtoFile = "google/protobuf/any.proto";

const absl::flat_hash_set<absl::string_view>& FileScopeKnownNames() {
static constexpr const char* kValue[] = {
"swap",
};
static const auto* const methods = new absl::flat_hash_set<absl::string_view>(
std::begin(kValue), std::end(kValue));
return *methods;
}

const absl::flat_hash_set<absl::string_view>& MessageKnownMethodsCamelCase() {
static constexpr const char* kMessageKnownMethods[] = {
"GetDescriptor", "GetReflection", "default_instance",
"Swap", "UnsafeArenaSwap", "New",
"CopyFrom", "MergeFrom", "IsInitialized",
"GetMetadata", "Clear",
};
static const auto* const methods = new absl::flat_hash_set<absl::string_view>(
std::begin(kMessageKnownMethods), std::end(kMessageKnownMethods));
return *methods;
}

const absl::flat_hash_set<absl::string_view>&
MessageKnownNullaryMethodsSnakeCase() {
static constexpr const char* kMessageKnownMethods[] = {
"unknown_fields",
"mutable_unknown_fields",
"descriptor",
"default_instance",
};
static const auto* const methods = new absl::flat_hash_set<absl::string_view>(
std::begin(kMessageKnownMethods), std::end(kMessageKnownMethods));
return *methods;
}

const absl::flat_hash_set<absl::string_view>&
MessageKnownNonNullaryMethodsSnakeCase() {
static constexpr const char* kMessageKnownMethods[] = {
"swap",
};
static const auto* const methods = new absl::flat_hash_set<absl::string_view>(
std::begin(kMessageKnownMethods), std::end(kMessageKnownMethods));
return *methods;
}

static const char* const kKeywordList[] = {
// clang-format off
"NULL",
Expand Down Expand Up @@ -406,12 +450,14 @@ std::string ClassName(const Descriptor* descriptor) {
if (parent) absl::StrAppend(&res, ClassName(parent), "_");
absl::StrAppend(&res, descriptor->name());
if (IsMapEntryMessage(descriptor)) absl::StrAppend(&res, "_DoNotUse");
return ResolveKeyword(res);
// This is the mangled message name which always goes in file scope.
return ResolveKnownNameCollisions(res, NameContext::kFile, NameKind::kType);
}

std::string ClassName(const EnumDescriptor* enum_descriptor) {
if (enum_descriptor->containing_type() == nullptr) {
return ResolveKeyword(enum_descriptor->name());
return ResolveKnownNameCollisions(enum_descriptor->name(),
NameContext::kFile, NameKind::kType);
} else {
return absl::StrCat(ClassName(enum_descriptor->containing_type()), "_",
enum_descriptor->name());
Expand All @@ -436,9 +482,14 @@ std::string QualifiedClassName(const EnumDescriptor* d) {
}

std::string ExtensionName(const FieldDescriptor* d) {
if (const Descriptor* scope = d->extension_scope())
return absl::StrCat(ClassName(scope), "::", ResolveKeyword(d->name()));
return ResolveKeyword(d->name());
if (const Descriptor* scope = d->extension_scope()) {
return absl::StrCat(
ClassName(scope), "::",
ResolveKnownNameCollisions(d->name(), NameContext::kMessage,
NameKind::kValue));
}
return ResolveKnownNameCollisions(d->name(), NameContext::kFile,
NameKind::kValue);
}

std::string QualifiedExtensionName(const FieldDescriptor* d,
Expand All @@ -452,23 +503,53 @@ std::string QualifiedExtensionName(const FieldDescriptor* d) {
}

std::string ResolveKeyword(absl::string_view name) {
if (Keywords().count(name) > 0) {
if (Keywords().contains(name)) {
return absl::StrCat(name, "_");
}
return std::string(name);
}

std::string DotsToColons(absl::string_view name) {
std::vector<std::string> scope = absl::StrSplit(name, '.', absl::SkipEmpty());
for (auto& word : scope) {
word = ResolveKeyword(word);
std::string ResolveKnownNameCollisions(absl::string_view name,
NameContext name_context,
NameKind name_kind) {
const auto has_conflict = [&] {
if (Keywords().contains(name)) return true;

switch (name_kind) {
// We assume the style guide: types are CamelCase, fields are snake_case.
case NameKind::kType:
// Types can't overload names of existing functions.
return MessageKnownMethodsCamelCase().contains(name);
case NameKind::kValue:
if (name_context == NameContext::kFile) {
// At file scope we don't have the normal names, except a few.
return FileScopeKnownNames().contains(name);
}
// Values can't overload names of existing functions.
return MessageKnownNullaryMethodsSnakeCase().contains(name) ||
MessageKnownNonNullaryMethodsSnakeCase().contains(name);
case NameKind::kFunction:
// For functions, we can't overload existing nullary functions.
// Non-nullary functions are fine.
return MessageKnownNullaryMethodsSnakeCase().contains(name);
}
return false;
};
if (has_conflict()) {
return absl::StrCat(name, "_");
}
return absl::StrJoin(scope, "::");
return std::string(name);
}

std::string Namespace(absl::string_view package) {
if (package.empty()) return "";
return absl::StrCat("::", DotsToColons(package));

std::vector<std::string> scope =
absl::StrSplit(package, '.', absl::SkipEmpty());
for (auto& word : scope) {
word = ResolveKeyword(word);
}
return absl::StrCat("::", absl::StrJoin(scope, "::"));
}

std::string Namespace(const FileDescriptor* d) { return Namespace(d, {}); }
Expand Down Expand Up @@ -555,12 +636,17 @@ std::string SuperClassName(const Descriptor* descriptor,
}

std::string FieldName(const FieldDescriptor* field) {
if (field->containing_type() != nullptr &&
field->containing_type()->options().no_standard_descriptor_accessor() &&
field->name() == "descriptor") {
// Special case for `optional no_standard_descriptor_accessor = true;`
return "descriptor";
}
std::string result = std::string(field->name());
absl::AsciiStrToLower(&result);
if (Keywords().count(result) > 0) {
result.append("_");
}
return result;
ABSL_CHECK(field->containing_type() != nullptr);
return ResolveKnownNameCollisions(result, NameContext::kMessage,
NameKind::kFunction);
}

std::string FieldMemberName(const FieldDescriptor* field, bool split) {
Expand Down Expand Up @@ -589,11 +675,7 @@ std::string QualifiedOneofCaseConstantName(const FieldDescriptor* field) {
}

std::string EnumValueName(const EnumValueDescriptor* enum_value) {
std::string result = std::string(enum_value->name());
if (Keywords().count(result) > 0) {
result.append("_");
}
return result;
return ResolveKeyword(enum_value->name());
}

int EstimateAlignmentSize(const FieldDescriptor* field) {
Expand Down
18 changes: 18 additions & 0 deletions src/google/protobuf/compiler/cpp/helpers.h
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,24 @@ std::string FileDllExport(const FileDescriptor* file, const Options& options);
std::string SuperClassName(const Descriptor* descriptor,
const Options& options);

// Add an underscore if necessary to prevent conflicting with known names and
// keywords.
// We use the context and the kind of entity to try to determine if mangling is
// necessary or not.
// For example, a message named `New` at file scope is fine, but at message
// scope it needs mangling because it collides with the `New` function.
enum class NameContext {
kFile,
kMessage,
};
enum class NameKind {
kType,
kFunction,
kValue,
};
std::string ResolveKnownNameCollisions(absl::string_view name,
NameContext name_context,
NameKind name_kind);
// Adds an underscore if necessary to prevent conflicting with a keyword.
std::string ResolveKeyword(absl::string_view name);

Expand Down
5 changes: 4 additions & 1 deletion src/google/protobuf/compiler/cpp/message.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1976,7 +1976,10 @@ void MessageGenerator::GenerateClassDefinition(io::Printer* p) {
{
Sub{"nested_full_name", ClassName(nested_type, false)}
.AnnotatedAs(nested_type),
Sub{"nested_name", ResolveKeyword(nested_type->name())}
Sub{"nested_name",
ResolveKnownNameCollisions(nested_type->name(),
NameContext::kMessage,
NameKind::kType)}
.AnnotatedAs(nested_type),
},
R"cc(
Expand Down
55 changes: 55 additions & 0 deletions src/google/protobuf/compiler/cpp/test_bad_identifiers.proto
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,41 @@ message TestConflictingSymbolNames {
optional uint32 typedecl = 39;
optional uint32 auto = 40;

// Methods generated in the parent type
message BadKnownNamesFields {
optional int32 unknown_fields = 1;
optional int32 mutable_unknown_fields = 2;
optional int32 descriptor = 3;
optional int32 default_instance = 4;
optional int32 swap = 5;
}
message BadKnownNamesFieldsNoStandardDescriptor {
option no_standard_descriptor_accessor = true;
optional int32 descriptor = 3;
}
message BadKnownNamesTypes {
message GetDescriptor {}
message GetReflection {}
message Swap {}
message UnsafeArenaSwap {}
message New {}
message CopyFrom {}
message MergeFrom {}
message GetMetadata {}
message Clear {}
message IsInitialized {}
}
message BadKnownNamesValues { // NO_PROTO3
extensions 1 to 100; // NO_PROTO3
extend BadKnownNamesValues { // NO_PROTO3
optional int32 unknown_fields = 1; // NO_PROTO3
optional int32 mutable_unknown_fields = 2; // NO_PROTO3
optional int32 descriptor = 3; // NO_PROTO3
optional int32 default_instance = 4; // NO_PROTO3
optional int32 swap = 5; // NO_PROTO3
} // NO_PROTO3
} // NO_PROTO3

// The generator used to #define a macro called "DO" inside the .cc file.
message DO {}
optional DO do = 32;
Expand All @@ -137,6 +172,26 @@ message TestConflictingSymbolNames {
extensions 1000 to max [verification = UNVERIFIED]; // NO_PROTO3
}

// Special names as above, but at file scope.
message GetDescriptor {}
message GetReflection {}
message Swap {}
message UnsafeArenaSwap {}
message New {}
message CopyFrom {}
message MergeFrom {}
message GetMetadata {}
message Clear {}
message IsInitialized {}

extend TestConflictingSymbolNames.BadKnownNamesValues { // NO_PROTO3
optional int32 unknown_fields = 11; // NO_PROTO3
optional int32 mutable_unknown_fields = 12; // NO_PROTO3
optional int32 descriptor = 13; // NO_PROTO3
optional int32 default_instance = 14; // NO_PROTO3
optional int32 swap = 15; // NO_PROTO3
} // NO_PROTO3

message TestConflictingSymbolNamesExtension { // NO_PROTO3
extend TestConflictingSymbolNames { // NO_PROTO3
repeated int32 repeated_int32_ext = 20423638 [packed = true]; // NO_PROTO3
Expand Down
35 changes: 35 additions & 0 deletions src/google/protobuf/compiler/cpp/unittest.cc
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,41 @@ TEST(GENERATED_MESSAGE_TEST_NAME, TestConflictingSymbolNames) {
EXPECT_EQ(123, message.GetExtension(ExtensionMessage::repeated_int32_ext, 0));
}

TEST(GENERATED_MESSAGE_TEST_NAME, TestSwapNameIsNotMangledForFields) {
// For backwards compatibility we do not mangle `swap`. It works thanks to
// overload resolution.
int v [[maybe_unused]] =
protobuf_unittest::TestConflictingSymbolNames::BadKnownNamesFields().swap();

// But we do mangle `swap` for extensions because there is no overloading
// there.
v = protobuf_unittest::TestConflictingSymbolNames::BadKnownNamesValues()
.GetExtension(protobuf_unittest::TestConflictingSymbolNames::
BadKnownNamesValues::swap_);
}

TEST(GENERATED_MESSAGE_TEST_NAME, TestNoStandardDescriptorOption) {
// When no_standard_descriptor_accessor = true, we should not mangle fields
// named `descriptor`.
int v [[maybe_unused]] =
protobuf_unittest::TestConflictingSymbolNames::BadKnownNamesFields()
.descriptor_();
v = protobuf_unittest::TestConflictingSymbolNames::
BadKnownNamesFieldsNoStandardDescriptor()
.descriptor();
}

TEST(GENERATED_MESSAGE_TEST_NAME, TestFileVsMessageScope) {
// Special names at message scope are mangled,
int v [[maybe_unused]] =
protobuf_unittest::TestConflictingSymbolNames::BadKnownNamesValues()
.GetExtension(protobuf_unittest::TestConflictingSymbolNames::
BadKnownNamesValues::unknown_fields_);
// But not at file scope.
v = protobuf_unittest::TestConflictingSymbolNames::BadKnownNamesValues()
.GetExtension(protobuf_unittest::unknown_fields);
}

TEST(GENERATED_MESSAGE_TEST_NAME, TestConflictingEnumNames) {
protobuf_unittest::TestConflictingEnumNames message;
message.set_conflicting_enum(
Expand Down

0 comments on commit f2cf85c

Please sign in to comment.