Rune extensions micro-optimizations (#3910)

* Add benchmarks for potentially optimizable RuneExtensions

* Add new RuneExtensions.DecodeSurrogatePair benchmark implementation

Avoids intermediate heap array allocations which is especially nice when the rune is not surrogate pair because then array heap allocations are completely avoided.

* Enable nullable reference types in RuneExtensions

* Make RuneExtensions.MaxUnicodeCodePoint readonly

Makes sure no one can accidentally change the value. Ideally would be const value.

* Optimize RuneExtensions.DecodeSurrogatePair

* Remove duplicate Rune.GetUnicodeCategory call

* Add new RuneExtensions.IsSurrogatePair benchmark implementation

Avoids intermediate heap allocations by using stack allocated buffer.

* Optimize RuneExtensions.IsSurrogatePair

* Add RuneExtensions.GetEncodingLength tests

* Optimize RuneExtensions.GetEncodingLength

* Optimize RuneExtensions.Encode

* Print encoding name in benchmark results

* Rename variable to better match return description

* Add RuneExtensions.EncodeSurrogatePair benchmark

---------

Co-authored-by: Tig <tig@users.noreply.github.com>
This commit is contained in:
Tonttu
2025-02-25 18:42:32 +02:00
committed by GitHub
parent ff353fc57c
commit e24bd67658
10 changed files with 417 additions and 23 deletions

View File

@@ -0,0 +1,20 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net8.0</TargetFramework>
<IsPackable>false</IsPackable>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
<RootNamespace>Terminal.Gui.$(MSBuildProjectName.Replace(" ", "_"))</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="BenchmarkDotNet" Version="0.14.0" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\Terminal.Gui\Terminal.Gui.csproj" />
</ItemGroup>
</Project>

20
Benchmarks/Program.cs Normal file
View File

@@ -0,0 +1,20 @@
using BenchmarkDotNet.Configs;
using BenchmarkDotNet.Running;
namespace Terminal.Gui.Benchmarks;
class Program
{
static void Main (string [] args)
{
var config = DefaultConfig.Instance;
// Uncomment for faster but less accurate intermediate iteration.
// Final benchmarks should be run with at least the default run length.
//config = config.AddJob (BenchmarkDotNet.Jobs.Job.ShortRun);
BenchmarkSwitcher
.FromAssembly (typeof (Program).Assembly)
.Run(args, config);
}
}

View File

@@ -0,0 +1,66 @@
using System.Text;
using BenchmarkDotNet.Attributes;
using Tui = Terminal.Gui;
namespace Terminal.Gui.Benchmarks.Text.RuneExtensions;
/// <summary>
/// Benchmarks for <see cref="Tui.RuneExtensions.DecodeSurrogatePair"/> performance fine-tuning.
/// </summary>
[MemoryDiagnoser]
[BenchmarkCategory (nameof (Tui.RuneExtensions))]
public class DecodeSurrogatePair
{
/// <summary>
/// Benchmark for previous implementation.
/// </summary>
/// <param name="rune"></param>
/// <returns></returns>
[Benchmark]
[ArgumentsSource (nameof (DataSource))]
public char []? Previous (Rune rune)
{
_ = RuneToStringToCharArray (rune, out char []? chars);
return chars;
}
/// <summary>
/// Benchmark for current implementation.
///
/// Utilizes Rune methods that take Span argument avoiding intermediate heap array allocation when combined with stack allocated intermediate buffer.
/// When rune is not surrogate pair there will be no heap allocation.
///
/// Final surrogate pair array allocation cannot be avoided due to the current method signature design.
/// Changing the method signature, or providing an alternative method, to take a destination Span would allow further optimizations by allowing caller to reuse buffer for consecutive calls.
/// </summary>
[Benchmark (Baseline = true)]
[ArgumentsSource (nameof (DataSource))]
public char []? Current (Rune rune)
{
_ = Tui.RuneExtensions.DecodeSurrogatePair (rune, out char []? chars);
return chars;
}
/// <summary>
/// Previous implementation with intermediate string allocation.
///
/// The IsSurrogatePair implementation at the time had hidden extra string allocation so there were intermediate heap allocations even if rune is not surrogate pair.
/// </summary>
private static bool RuneToStringToCharArray (Rune rune, out char []? chars)
{
if (rune.IsSurrogatePair ())
{
chars = rune.ToString ().ToCharArray ();
return true;
}
chars = null;
return false;
}
public static IEnumerable<object> DataSource ()
{
yield return new Rune ('a');
yield return "𝔹".EnumerateRunes ().Single ();
}
}

View File

@@ -0,0 +1,72 @@
using System.Text;
using BenchmarkDotNet.Attributes;
using Tui = Terminal.Gui;
namespace Terminal.Gui.Benchmarks.Text.RuneExtensions;
/// <summary>
/// Benchmarks for <see cref="Tui.RuneExtensions.Encode"/> performance fine-tuning.
/// </summary>
[MemoryDiagnoser]
[BenchmarkCategory (nameof (Tui.RuneExtensions))]
public class Encode
{
/// <summary>
/// Benchmark for previous implementation.
/// </summary>
[Benchmark]
[ArgumentsSource (nameof (DataSource))]
public byte [] Previous (Rune rune, byte [] destination, int start, int count)
{
_ = StringEncodingGetBytes (rune, destination, start, count);
return destination;
}
/// <summary>
/// Benchmark for current implementation.
///
/// Avoids intermediate heap allocations with stack allocated intermediate buffer.
/// </summary>
[Benchmark (Baseline = true)]
[ArgumentsSource (nameof (DataSource))]
public byte [] Current (Rune rune, byte [] destination, int start, int count)
{
_ = Tui.RuneExtensions.Encode (rune, destination, start, count);
return destination;
}
/// <summary>
/// Previous implementation with intermediate byte array and string allocation.
/// </summary>
private static int StringEncodingGetBytes (Rune rune, byte [] dest, int start = 0, int count = -1)
{
byte [] bytes = Encoding.UTF8.GetBytes (rune.ToString ());
var length = 0;
for (var i = 0; i < (count == -1 ? bytes.Length : count); i++)
{
if (bytes [i] == 0)
{
break;
}
dest [start + i] = bytes [i];
length++;
}
return length;
}
public static IEnumerable<object []> DataSource ()
{
Rune[] runes = [ new Rune ('a'),"𝔞".EnumerateRunes().Single() ];
foreach (var rune in runes)
{
yield return new object [] { rune, new byte [16], 0, -1 };
yield return new object [] { rune, new byte [16], 8, -1 };
// Does not work in original implementation
//yield return new object [] { rune, new byte [16], 8, 8 };
}
}
}

View File

@@ -0,0 +1,36 @@
using System.Text;
using BenchmarkDotNet.Attributes;
using Tui = Terminal.Gui;
namespace Terminal.Gui.Benchmarks.Text.RuneExtensions;
/// <summary>
/// Benchmarks for <see cref="Tui.RuneExtensions.EncodeSurrogatePair"/> performance fine-tuning.
/// </summary>
[MemoryDiagnoser]
[BenchmarkCategory (nameof (Tui.RuneExtensions))]
public class EncodeSurrogatePair
{
/// <summary>
/// Benchmark for current implementation.
/// </summary>
[Benchmark (Baseline = true)]
[ArgumentsSource (nameof (DataSource))]
public Rune Current (char highSurrogate, char lowSurrogate)
{
_ = Tui.RuneExtensions.EncodeSurrogatePair (highSurrogate, lowSurrogate, out Rune rune);
return rune;
}
public static IEnumerable<object []> DataSource ()
{
string[] runeStrings = ["🍕", "🧠", "🌹"];
foreach (string symbol in runeStrings)
{
if (symbol is [char high, char low])
{
yield return [high, low];
}
}
}
}

View File

@@ -0,0 +1,74 @@
using System.Text;
using BenchmarkDotNet.Attributes;
using Tui = Terminal.Gui;
namespace Terminal.Gui.Benchmarks.Text.RuneExtensions;
/// <summary>
/// Benchmarks for <see cref="Tui.RuneExtensions.GetEncodingLength"/> performance fine-tuning.
/// </summary>
[MemoryDiagnoser]
[BenchmarkCategory (nameof (Tui.RuneExtensions))]
public class GetEncodingLength
{
/// <summary>
/// Benchmark for previous implementation.
/// </summary>
[Benchmark]
[ArgumentsSource (nameof (DataSource))]
public int Previous (Rune rune, PrettyPrintedEncoding encoding)
{
return WithEncodingGetBytesArray (rune, encoding);
}
/// <summary>
/// Benchmark for current implementation.
/// </summary>
[Benchmark (Baseline = true)]
[ArgumentsSource (nameof (DataSource))]
public int Current (Rune rune, PrettyPrintedEncoding encoding)
{
return Tui.RuneExtensions.GetEncodingLength (rune, encoding);
}
/// <summary>
/// Previous implementation with intermediate byte array, string, and char array allocation.
/// </summary>
private static int WithEncodingGetBytesArray (Rune rune, Encoding? encoding = null)
{
encoding ??= Encoding.UTF8;
byte [] bytes = encoding.GetBytes (rune.ToString ().ToCharArray ());
var offset = 0;
if (bytes [^1] == 0)
{
offset++;
}
return bytes.Length - offset;
}
public static IEnumerable<object []> DataSource ()
{
PrettyPrintedEncoding[] encodings = [ new(Encoding.UTF8), new(Encoding.Unicode), new(Encoding.UTF32) ];
Rune[] runes = [ new Rune ('a'), "𝔹".EnumerateRunes ().Single () ];
foreach (var encoding in encodings)
{
foreach (Rune rune in runes)
{
yield return [rune, encoding];
}
}
}
/// <summary>
/// <see cref="System.Text.Encoding"/> wrapper to display proper encoding name in benchmark results.
/// </summary>
public record PrettyPrintedEncoding (Encoding Encoding)
{
public static implicit operator Encoding (PrettyPrintedEncoding ppe) => ppe.Encoding;
public override string ToString () => Encoding.HeaderName;
}
}

View File

@@ -0,0 +1,50 @@
using System.Text;
using BenchmarkDotNet.Attributes;
using Tui = Terminal.Gui;
namespace Terminal.Gui.Benchmarks.Text.RuneExtensions;
/// <summary>
/// Benchmarks for <see cref="Tui.RuneExtensions.IsSurrogatePair"/> performance fine-tuning.
/// </summary>
[MemoryDiagnoser]
[BenchmarkCategory (nameof (Tui.RuneExtensions))]
public class IsSurrogatePair
{
/// <summary>
/// Benchmark for previous implementation.
/// </summary>
/// <param name="rune"></param>
[Benchmark]
[ArgumentsSource (nameof (DataSource))]
public bool Previous (Rune rune)
{
return WithToString (rune);
}
/// <summary>
/// Benchmark for current implementation.
///
/// Avoids intermediate heap allocations by using stack allocated buffer.
/// </summary>
[Benchmark (Baseline = true)]
[ArgumentsSource (nameof (DataSource))]
public bool Current (Rune rune)
{
return Tui.RuneExtensions.IsSurrogatePair (rune);
}
/// <summary>
/// Previous implementation with intermediate string allocation.
/// </summary>
private static bool WithToString (Rune rune)
{
return char.IsSurrogatePair (rune.ToString (), 0);
}
public static IEnumerable<object> DataSource ()
{
yield return new Rune ('a');
yield return "𝔹".EnumerateRunes ().Single ();
}
}