Skip to content

Commit

Permalink
Merge with main to pull in a fix to an unrelated test.
Browse files Browse the repository at this point in the history
  • Loading branch information
deannagarcia committed Nov 2, 2023
2 parents cf02e0b + f78f9c5 commit c46f0e4
Show file tree
Hide file tree
Showing 29 changed files with 465 additions and 132 deletions.
49 changes: 31 additions & 18 deletions .github/workflows/test_cpp.yml
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,8 @@ jobs:
# for Apple Silicon to detect issues there.
bazel: build --cpu=darwin_arm64 //src/...
- name: Windows
os: windows-2019
cache_key: windows-2019
os: windows-2022
cache_key: windows-2022
bazel: test //src/... @com_google_protobuf_examples//... --test_tag_filters=-conformance --build_tag_filters=-conformance
name: ${{ matrix.name }} Bazel
runs-on: ${{ matrix.os }}
Expand All @@ -351,44 +351,46 @@ jobs:
flags: -DCMAKE_CXX_STANDARD=14
cache-prefix: macos-cmake
- name: Windows CMake
os: windows-2019
flags: >-
-G Ninja -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_CONFORMANCE=OFF
-Dprotobuf_BUILD_SHARED_LIBS=OFF
-Dprotobuf_BUILD_EXAMPLES=ON
vsversion: '2019'
cache-prefix: windows-2019-cmake
- name: Windows CMake 2022
os: windows-2022
flags: >-
-G Ninja -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_CONFORMANCE=OFF
-Dprotobuf_BUILD_SHARED_LIBS=OFF
-Dprotobuf_BUILD_EXAMPLES=ON
vsversion: '2022'
cache-prefix: windows-2022-cmake
- name: Windows CMake 32-bit
- name: Windows CMake 2019
os: windows-2019
flags: >-
-G Ninja -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_CONFORMANCE=OFF
-Dprotobuf_BUILD_SHARED_LIBS=OFF
-Dprotobuf_BUILD_EXAMPLES=ON
vsversion: '2019'
cache-prefix: windows-2019-cmake
# windows-2019 has python3.7 installed, which is incompatible with the latest gcloud
python-version: '3.8'
- name: Windows CMake 32-bit
os: windows-2022
flags: >-
-G Ninja -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_CONFORMANCE=OFF
vsversion: '2022'
windows-arch: 'win32'
cache-prefix: windows-2019-win32-cmake
cache-prefix: windows-2022-win32-cmake
- name: Windows CMake Shared
os: windows-2019
os: windows-2022
flags: >-
-G Ninja -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_CONFORMANCE=OFF
-Dprotobuf_BUILD_SHARED_LIBS=ON
vsversion: '2019'
cache-prefix: windows-2019-cmake
vsversion: '2022'
cache-prefix: windows-2022-cmake
- name: Windows CMake Install
os: windows-2019
os: windows-2022
install-flags: -G Ninja -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_CONFORMANCE=OFF -Dprotobuf_BUILD_TESTS=OFF
flags: >-
-G Ninja -Dprotobuf_WITH_ZLIB=OFF -Dprotobuf_BUILD_CONFORMANCE=OFF
-Dprotobuf_REMOVE_INSTALLED_HEADERS=ON
-Dprotobuf_BUILD_PROTOBUF_BINARIES=OFF
vsversion: '2019'
cache-prefix: windows-2019-cmake
vsversion: '2022'
cache-prefix: windows-2022-cmake
name: ${{ matrix.name }}
runs-on: ${{ matrix.os }}
steps:
Expand All @@ -405,6 +407,17 @@ jobs:
arch: ${{ matrix.windows-arch || 'x64' }}
vsversion: ${{ matrix.vsversion }}

# Workaround for incompatibility between gcloud and windows-2019 runners.
- name: Install Python
if: ${{ matrix.python-version }}
uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
with:
python-version: ${{ matrix.python-version }}
- name: Use custom python for gcloud
if: ${{ matrix.python-version }}
run: echo "CLOUDSDK_PYTHON=${Python3_ROOT_DIR}\\python3" >> $GITHUB_ENV
shell: bash

- name: Setup sccache
uses: protocolbuffers/protobuf-ci/sccache@v2
with:
Expand Down
1 change: 1 addition & 0 deletions .github/workflows/test_python.yml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@ jobs:
with:
python-version: ${{ matrix.version }}
cache: pip
cache-dependency-path: 'python/requirements.txt'

- name: Validate version
run: python3 --version | grep ${{ matrix.version }} || (echo "Invalid Python version - $(python3 --version)" && exit 1)
Expand Down
14 changes: 11 additions & 3 deletions .github/workflows/test_upb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,16 @@ jobs:
strategy:
fail-fast: false # Don't cancel all jobs if one fails.
name: Windows
runs-on: windows-2019
runs-on: windows-2022
steps:
- name: Checkout pending changes
uses: protocolbuffers/protobuf-ci/checkout@v2
with:
ref: ${{ inputs.safe-checkout }}
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
with:
cache: pip
cache-dependency-path: 'python/requirements.txt'
- name: Run tests
uses: protocolbuffers/protobuf-ci/bazel@v2
with:
Expand All @@ -92,6 +96,10 @@ jobs:
uses: protocolbuffers/protobuf-ci/checkout@v2
with:
ref: ${{ inputs.safe-checkout }}
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
with:
cache: pip
cache-dependency-path: 'python/requirements.txt'
- name: Run tests
uses: protocolbuffers/protobuf-ci/bazel@v2
with:
Expand Down Expand Up @@ -195,7 +203,7 @@ jobs:
with:
name: requirements
path: requirements
- uses: actions/setup-python@v2
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
with:
python-version: ${{ matrix.python-version }}
architecture: ${{ matrix.architecture }}
Expand Down Expand Up @@ -250,7 +258,7 @@ jobs:
path: wheels
- name: Delete Binary Wheels
run: find wheels -type f | grep -v none-any | xargs rm
- uses: actions/setup-python@v2
- uses: actions/setup-python@61a6322f88396a6271a6ee3565807d608ecaddd1 # v4.7.0
with:
python-version: ${{ matrix.python-version }}
- name: Setup Python venv
Expand Down
1 change: 1 addition & 0 deletions docs/design/editions/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -36,3 +36,4 @@ The following topics are in this repository:
* [Edition Naming](edition-naming.md)
* [Editions Feature Visibility](editions-feature-visibility.md)
* [Legacy Syntax Editions](legacy-syntax-editions.md)
* [Editions: Feature Extension Layout](editions-feature-extension-layout.md)
150 changes: 150 additions & 0 deletions docs/design/editions/editions-feature-extension-layout.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
# Editions: Feature Extension Layout

**Author:** [@mkruskal-google](https://github.com/mkruskal-google),
[@zhangskz](https://github.com/zhangskz)

**Approved:** 2023-08-23

## Background

"[What are Protobuf Editions](what-are-protobuf-editions.md)" lays out a plan
for allowing for more targeted features not owned by the protobuf team. It uses
extensions of the global features proto to implement this. One thing that was
left a bit ambiguous was *who* should own these extensions. Language, code
generator, and runtime implementations are all similar but not identical
distinctions.

"Editions Zero Feature: utf8_validation" (not available externally, though a
later version,
"[Editions Zero: utf8_validation Without Problematic Options](editions-zero-utf8_validation.md)"
is) is a recent plan to add a new set of generator features for utf8 validation.
While the sole feature we had originally created (`legacy_closed_enum` in Java
and C++) didn't have any ambiguity here, this one did. Specifically in Python,
the current behaviors across proto2/proto3 are distinct for all 3
implementations: pure python, Python/C++, Python/upb.

## Overview

In meetings, we've discussed various alternatives, captured below. The original
plan was to make feature extensions runtime implementation-specific (e.g. C++,
Java, Python, upb). There are some notable complications that came up though:

1. **Polyglot** - it's not clear how upb or C++ runtimes should behave in
multi-language situations. Which feature sets do they consider for runtime
behaviors? *Note: this is already a serious issue today, where all proto2
strings and many proto3 strings are completely unsafe across languages.*

2. **Shared Implementations** - Runtimes like upb and C++ are used as backing
implementations of multiple other languages (e.g. Python, Rust, Ruby, PHP).
If we have a single set of `upb` or `cpp` features, migrating to those
shared implementations would be more difficult (since there's no independent
switches per-language). *Note: this is already the situation we're in today,
where switching the runtime implementation can cause subtle and dangerous
behavior changes.*

Given that we only have two behaviors, and one of them is unambiguous, it seems
reasonable to punt on this decision until we have more information. We may
encounter more edge cases that require feature extensions (and give us more
information) during the rollout of edition zero. We also have a lot of freedom
to re-model features in later editions, so keeping the initial implementation as
simple as possible seems best (i.e. Alternative 2).

## Alternatives

### Alternative 1: Runtime Implementation Features

Features would be per-runtime implementation as originally described in
"Editions Zero Feature: utf8_validation." For example, Protobuf Python users
would set different features depending on the backing implementation (e.g.
`features.(pb.cpp).<feature>`, `features.(pb.upb).<feature>`).

#### Pros

* Most consistent with range of behaviors expressible pre-Editions

#### Cons

* Implementation may / should not be obvious to users.
* Lack of levers specifically for language / implementation combos. For
example, there is no way to set Python-C++ behavior independently of C++
behavior which may make migration harder from other Python implementations.

### Alternative 2: Generator Features

Features would be per-generator only (i.e. each protoc plugin would own one set
of features). This was the second decision we made in later discussions, and
while very similar to the above alternative, it's more inline with our goal of
making features primarily for codegen.

For example, all Python implementations would share the same set of features
(e.g. `features.(pb.python).<feature>`). However, certain features could be
targeted to specific implementations (e.g.
`features.(pb.python).upb_utf8_validation` would only be used by Python/upb).

#### Pros

* Allows independent controls of shared implementations in different target
languages (e.g. Python's upb feature won't affect PHP).

#### Cons

* Possible complexity in upb to understand which language's features to
respect. UPB is not currently aware of what language it is being used for.
* Limits in-process sharing across languages with shared implementations (e.g.
Python upb, PHP upb) in the case of conflicting behaviors.
* Additional checks may be needed.

### Alternative 3: Migrate to bytes

Since this whole discussion revolves around the utf8 validation feature, one
option would be to just remove it from edition zero. Instead of adding a new
toggle for UTF8 behavior, we could simply migrate everyone who doesn't enforce
utf8 today to `bytes`. This would likely need another new *codegen* feature for
generating byte getters/setters as strings, but that wouldn't have any of the
ambiguity we're seeing today.

Unfortunately, this doesn't seem feasible because of all the different behaviors
laid out in "Editions Zero Feature: utf8_validation." UTF8 validation isn't
really a binary on/off decision, and it can vary widely between languages. There
are many cases where UTF8 is validated in **some** languages but not others, and
there's also the C++ "hint" behavior that logs errors but allows invalid UTF8.

**Note:** This could still be partially done in a follow-up LSC by targeting
specific combinations of the new feature that disable validation in all relevant
languages.

#### Pros

* Punts on the issue, we wouldn't need any upb features and C++ features would
all be code-gen only
* Simplifies the situation, avoids adding a very complicated feature in
edition zero

#### Cons

* Not really possible given the current complexity
* There are O(10M) proto2 string fields that would be blindly changed to bytes

### Alternative 4: Nested Features

Another option is to allow for shared feature set messages. For example, upb
would define a feature message, but *not* make it an extension of the global
`FeatureSet`. Instead, languages with upb implementations would have a field of
this type to allow for finer-grained controls. C++ would both extend the global
`FeatureSet` and also be allowed as a field in other languages.

For example, python utf8 validation could be specified as:

We could have checks during feature validation that enforce that impossible
combinations aren't specified. For example, with our current implementation
`features.(pb.python).cpp` should always be identical to `features.(pb.cpp)`,
since we don't have any mechanism for distinguishing them.

#### Pros

* Much more explicit than options 1 and 2

#### Cons

* Maybe too explicit? Proto owners would be forced to duplicate a lot of
features
5 changes: 5 additions & 0 deletions docs/options.md
Original file line number Diff line number Diff line change
Expand Up @@ -486,3 +486,8 @@ with info about your project (name and website) so we can add an entry for you.

* Website: https://square.github.io/wire/
* Extensions: 1185

1. Protons

* Website: https://github.com/ipfs/protons
* Extensions: 1186
1 change: 1 addition & 0 deletions python/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -203,5 +203,6 @@ py_extension(
"//upb/util:compare",
"//upb/util:def_to_proto",
"//upb/util:required_fields",
"@utf8_range",
],
)
32 changes: 20 additions & 12 deletions python/convert.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "upb/message/map.h"
#include "upb/reflection/message.h"
#include "upb/util/compare.h"
#include "utf8_range.h"

// Must be last.
#include "upb/port/def.inc"
Expand Down Expand Up @@ -259,20 +260,27 @@ bool PyUpb_PyToUpb(PyObject* obj, const upb_FieldDef* f, upb_MessageValue* val,
}
case kUpb_CType_String: {
Py_ssize_t size;
const char* ptr;
PyObject* unicode = NULL;
if (PyBytes_Check(obj)) {
unicode = obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
if (!obj) return false;
// Use the object's bytes if they are valid UTF-8.
char* ptr;
if (PyBytes_AsStringAndSize(obj, &ptr, &size) < 0) return false;
if (utf8_range2((const unsigned char*)ptr, size) != 0) {
// Invalid UTF-8. Try to convert the message to a Python Unicode
// object, even though we know this will fail, just to get the
// idiomatic Python error message.
obj = PyUnicode_FromEncodedObject(obj, "utf-8", NULL);
assert(!obj);
return false;
}
*val = PyUpb_MaybeCopyString(ptr, size, arena);
return true;
} else {
const char* ptr;
ptr = PyUnicode_AsUTF8AndSize(obj, &size);
if (PyErr_Occurred()) return false;
*val = PyUpb_MaybeCopyString(ptr, size, arena);
return true;
}
ptr = PyUnicode_AsUTF8AndSize(obj, &size);
if (PyErr_Occurred()) {
Py_XDECREF(unicode);
return false;
}
*val = PyUpb_MaybeCopyString(ptr, size, arena);
Py_XDECREF(unicode);
return true;
}
case kUpb_CType_Message:
PyErr_Format(PyExc_ValueError, "Message objects may not be assigned");
Expand Down
1 change: 0 additions & 1 deletion python/google/protobuf/internal/message_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@

warnings.simplefilter('error', DeprecationWarning)


@_parameterized.named_parameters(('_proto2', unittest_pb2),
('_proto3', unittest_proto3_arena_pb2))
@testing_refleaks.TestCase
Expand Down
Loading

0 comments on commit c46f0e4

Please sign in to comment.