Skip to content

Commit

Permalink
Improve performace of repeated packed fixedSize fields (#19667)
Browse files Browse the repository at this point in the history
## Changes
Improve performance of packed RepeatedField that has fixed size, by coping `LEN` bytes from serialzied buffer to RepeatedField array (like c memcpy).

## Relevant Information
As the docs [here](https://protobuf.dev/programming-guides/encoding/#cheat-sheet) notes, fixed size value is `memcpy of the equivalent C types (u?int64_t, double)`

You can see benchmark code and results [here](https://github.com/YarinOmesi/protobuf/blob/feature-with-benchmark/csharp/src/Benchmark)

## Summarized result:
| Method               | Job                          | Size   |         Mean |    Ratio | Allocated |
|----------------------|------------------------------|--------|-------------:|---------:|----------:|
| Parse_RepeatedFloats | 3.29.0                       | 100000 | 416,582.7 ns | baseline |  400297 B |
| Parse_RepeatedFloats | improved-fixed-size-repeated | 100000 | 134,671.7 ns |     -68% |  400776 B |
| Write_RepeatedFloats | 3.29.0                       | 100000 | 340.549 us | baseline | 391.57 KB |
| Write_RepeatedFloats | improved-fixed-size-repeated | 100000 | 106.577 us |     -69% |  391.5 KB |

---
I am pretty new to this repo to tell me what to you think about this.

Closes #19667

COPYBARA_INTEGRATE_REVIEW=#19667 from YarinOmesi:improve-performance-repeated-packed-fixed-field de11524
PiperOrigin-RevId: 729380114
  • Loading branch information
YarinOmesi authored and copybara-github committed Feb 21, 2025
1 parent b666009 commit 85507b9
Show file tree
Hide file tree
Showing 2 changed files with 76 additions and 11 deletions.
58 changes: 51 additions & 7 deletions csharp/src/Google.Protobuf/Collections/RepeatedField.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,13 @@
#endregion

using System;
using System.Buffers;
using System.Collections;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Linq;
using System.Runtime.InteropServices;
using System.Security;
#if NET5_0_OR_GREATER
using System.Runtime.CompilerServices;
Expand Down Expand Up @@ -116,12 +118,24 @@ public void AddEntriesFrom(ref ParseContext ctx, FieldCodec<T> codec)
{
EnsureSize(count + (length / codec.FixedSize));

while (!SegmentedBufferHelper.IsReachedLimit(ref ctx.state))
// if littleEndian treat array as bytes and directly copy from buffer for improved performance
if(TryGetArrayAsSpanPinnedUnsafe(codec, out Span<byte> span, out GCHandle handle))
{
span = span.Slice(count * codec.FixedSize);
Debug.Assert(span.Length >= length);
ParsingPrimitives.ReadPackedFieldLittleEndian(ref ctx.buffer, ref ctx.state, length, span);
count += length / codec.FixedSize;
handle.Free();
}
else
{
// Only FieldCodecs with a fixed size can reach here, and they are all known
// types that don't allow the user to specify a custom reader action.
// reader action will never return null.
array[count++] = reader(ref ctx);
while (!SegmentedBufferHelper.IsReachedLimit(ref ctx.state))
{
// Only FieldCodecs with a fixed size can reach here, and they are all known
// types that don't allow the user to specify a custom reader action.
// reader action will never return null.
array[count++] = reader(ref ctx);
}
}
}
else
Expand Down Expand Up @@ -241,9 +255,21 @@ public void WriteTo(ref WriteContext ctx, FieldCodec<T> codec)
int size = CalculatePackedDataSize(codec);
ctx.WriteTag(tag);
ctx.WriteLength(size);
for (int i = 0; i < count; i++)

// if littleEndian and elements has fixed size, treat array as bytes (and write it as bytes to buffer) for improved performance
if(TryGetArrayAsSpanPinnedUnsafe(codec, out Span<byte> span, out GCHandle handle))
{
writer(ref ctx, array[i]);
span = span.Slice(0, Count * codec.FixedSize);

WritingPrimitives.WriteRawBytes(ref ctx.buffer, ref ctx.state, span);
handle.Free();
}
else
{
for (int i = 0; i < count; i++)
{
writer(ref ctx, array[i]);
}
}
}
else
Expand Down Expand Up @@ -679,6 +705,24 @@ internal void SetCount(int targetCount)
count = targetCount;
}

[SecuritySafeCritical]
private unsafe bool TryGetArrayAsSpanPinnedUnsafe(FieldCodec<T> codec, out Span<byte> span, out GCHandle handle)
{
// 1. protobuf wire bytes is LittleEndian only
// 2. validate that size of csharp element T is matching the size of protobuf wire size
// NOTE: cannot use bool with this span because csharp marshal it as 4 bytes
if (BitConverter.IsLittleEndian && (codec.FixedSize > 0 && Marshal.SizeOf(typeof(T)) == codec.FixedSize))
{
handle = GCHandle.Alloc(array, GCHandleType.Pinned);
span = new Span<byte>(handle.AddrOfPinnedObject().ToPointer(), array.Length * codec.FixedSize);
return true;
}

span = default;
handle = default;
return false;
}

#region Explicit interface implementation for IList and ICollection.
bool IList.IsFixedSize => false;

Expand Down
29 changes: 25 additions & 4 deletions csharp/src/Google.Protobuf/ParsingPrimitives.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
using System.Buffers;
using System.Buffers.Binary;
using System.Collections.Generic;
using System.Diagnostics;
using System.IO;
using System.Runtime.CompilerServices;
using System.Runtime.InteropServices;
Expand Down Expand Up @@ -45,7 +46,7 @@ public static int ParseLength(ref ReadOnlySpan<byte> buffer, ref ParserInternalS

/// <summary>
/// Parses the next tag.
/// If the end of logical stream was reached, an invalid tag of 0 is returned.
/// If the end of logical stream was reached, an invalid tag of 0 is returned.
/// </summary>
public static uint ParseTag(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
{
Expand Down Expand Up @@ -382,7 +383,7 @@ public static float ParseFloat(ref ReadOnlySpan<byte> buffer, ref ParserInternal
// ReadUnaligned uses processor architecture for endianness.
float result = Unsafe.ReadUnaligned<float>(ref MemoryMarshal.GetReference(buffer.Slice(state.bufferPos, length)));
state.bufferPos += length;
return result;
return result;
}

private static unsafe float ParseFloatSlow(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state)
Expand Down Expand Up @@ -737,7 +738,7 @@ public static uint ReadRawVarint32(Stream input)
/// </summary>
/// <remarks>
/// ZigZag encodes signed integers into values that can be efficiently
/// encoded with varint. (Otherwise, negative values must be
/// encoded with varint. (Otherwise, negative values must be
/// sign-extended to 32 bits to be varint encoded, thus always taking
/// 5 bytes on the wire.)
/// </remarks>
Expand All @@ -751,7 +752,7 @@ public static int DecodeZigZag32(uint n)
/// </summary>
/// <remarks>
/// ZigZag encodes signed integers into values that can be efficiently
/// encoded with varint. (Otherwise, negative values must be
/// encoded with varint. (Otherwise, negative values must be
/// sign-extended to 64 bits to be varint encoded, thus always taking
/// 10 bytes on the wire.)
/// </remarks>
Expand Down Expand Up @@ -810,5 +811,25 @@ private static void ReadRawBytesIntoSpan(ref ReadOnlySpan<byte> buffer, ref Pars
state.bufferPos += unreadSpan.Length;
}
}

/// <summary>
/// Read LittleEndian packed field from buffer of specified length into a span.
/// The amount of data available and the current limit should be checked before calling this method.
/// </summary>
internal static void ReadPackedFieldLittleEndian(ref ReadOnlySpan<byte> buffer, ref ParserInternalState state, int length, Span<byte> outBuffer)
{
Debug.Assert(BitConverter.IsLittleEndian);

if (length <= state.bufferSize - state.bufferPos)
{
buffer.Slice(state.bufferPos, length).CopyTo(outBuffer);
state.bufferPos += length;
}
else
{
ReadRawBytesIntoSpan(ref buffer, ref state, length, outBuffer);
}
}

}
}

0 comments on commit 85507b9

Please sign in to comment.