diff --git a/Benchmarks/Benchmarks.csproj b/Benchmarks/Benchmarks.csproj new file mode 100644 index 000000000..62f1be76c --- /dev/null +++ b/Benchmarks/Benchmarks.csproj @@ -0,0 +1,20 @@ + + + + Exe + net8.0 + false + enable + enable + Terminal.Gui.$(MSBuildProjectName.Replace(" ", "_")) + + + + + + + + + + + diff --git a/Benchmarks/Program.cs b/Benchmarks/Program.cs new file mode 100644 index 000000000..3f017d29d --- /dev/null +++ b/Benchmarks/Program.cs @@ -0,0 +1,20 @@ +using BenchmarkDotNet.Configs; +using BenchmarkDotNet.Running; + +namespace Terminal.Gui.Benchmarks; + +class Program +{ + static void Main (string [] args) + { + var config = DefaultConfig.Instance; + + // Uncomment for faster but less accurate intermediate iteration. + // Final benchmarks should be run with at least the default run length. + //config = config.AddJob (BenchmarkDotNet.Jobs.Job.ShortRun); + + BenchmarkSwitcher + .FromAssembly (typeof (Program).Assembly) + .Run(args, config); + } +} diff --git a/Benchmarks/Text/RuneExtensions/DecodeSurrogatePair.cs b/Benchmarks/Text/RuneExtensions/DecodeSurrogatePair.cs new file mode 100644 index 000000000..5cce34424 --- /dev/null +++ b/Benchmarks/Text/RuneExtensions/DecodeSurrogatePair.cs @@ -0,0 +1,66 @@ +using System.Text; +using BenchmarkDotNet.Attributes; +using Tui = Terminal.Gui; + +namespace Terminal.Gui.Benchmarks.Text.RuneExtensions; + +/// +/// Benchmarks for performance fine-tuning. +/// +[MemoryDiagnoser] +[BenchmarkCategory (nameof (Tui.RuneExtensions))] +public class DecodeSurrogatePair +{ + /// + /// Benchmark for previous implementation. + /// + /// + /// + [Benchmark] + [ArgumentsSource (nameof (DataSource))] + public char []? Previous (Rune rune) + { + _ = RuneToStringToCharArray (rune, out char []? chars); + return chars; + } + + /// + /// Benchmark for current implementation. + /// + /// Utilizes Rune methods that take Span argument avoiding intermediate heap array allocation when combined with stack allocated intermediate buffer. + /// When rune is not surrogate pair there will be no heap allocation. + /// + /// Final surrogate pair array allocation cannot be avoided due to the current method signature design. + /// Changing the method signature, or providing an alternative method, to take a destination Span would allow further optimizations by allowing caller to reuse buffer for consecutive calls. + /// + [Benchmark (Baseline = true)] + [ArgumentsSource (nameof (DataSource))] + public char []? Current (Rune rune) + { + _ = Tui.RuneExtensions.DecodeSurrogatePair (rune, out char []? chars); + return chars; + } + + /// + /// Previous implementation with intermediate string allocation. + /// + /// The IsSurrogatePair implementation at the time had hidden extra string allocation so there were intermediate heap allocations even if rune is not surrogate pair. + /// + private static bool RuneToStringToCharArray (Rune rune, out char []? chars) + { + if (rune.IsSurrogatePair ()) + { + chars = rune.ToString ().ToCharArray (); + return true; + } + + chars = null; + return false; + } + + public static IEnumerable DataSource () + { + yield return new Rune ('a'); + yield return "𝔹".EnumerateRunes ().Single (); + } +} diff --git a/Benchmarks/Text/RuneExtensions/Encode.cs b/Benchmarks/Text/RuneExtensions/Encode.cs new file mode 100644 index 000000000..6f88ac554 --- /dev/null +++ b/Benchmarks/Text/RuneExtensions/Encode.cs @@ -0,0 +1,72 @@ +using System.Text; +using BenchmarkDotNet.Attributes; +using Tui = Terminal.Gui; + +namespace Terminal.Gui.Benchmarks.Text.RuneExtensions; + +/// +/// Benchmarks for performance fine-tuning. +/// +[MemoryDiagnoser] +[BenchmarkCategory (nameof (Tui.RuneExtensions))] +public class Encode +{ + /// + /// Benchmark for previous implementation. + /// + [Benchmark] + [ArgumentsSource (nameof (DataSource))] + public byte [] Previous (Rune rune, byte [] destination, int start, int count) + { + _ = StringEncodingGetBytes (rune, destination, start, count); + return destination; + } + + /// + /// Benchmark for current implementation. + /// + /// Avoids intermediate heap allocations with stack allocated intermediate buffer. + /// + [Benchmark (Baseline = true)] + [ArgumentsSource (nameof (DataSource))] + public byte [] Current (Rune rune, byte [] destination, int start, int count) + { + _ = Tui.RuneExtensions.Encode (rune, destination, start, count); + return destination; + } + + /// + /// Previous implementation with intermediate byte array and string allocation. + /// + private static int StringEncodingGetBytes (Rune rune, byte [] dest, int start = 0, int count = -1) + { + byte [] bytes = Encoding.UTF8.GetBytes (rune.ToString ()); + var length = 0; + + for (var i = 0; i < (count == -1 ? bytes.Length : count); i++) + { + if (bytes [i] == 0) + { + break; + } + + dest [start + i] = bytes [i]; + length++; + } + + return length; + } + + public static IEnumerable DataSource () + { + Rune[] runes = [ new Rune ('a'),"𝔞".EnumerateRunes().Single() ]; + + foreach (var rune in runes) + { + yield return new object [] { rune, new byte [16], 0, -1 }; + yield return new object [] { rune, new byte [16], 8, -1 }; + // Does not work in original implementation + //yield return new object [] { rune, new byte [16], 8, 8 }; + } + } +} diff --git a/Benchmarks/Text/RuneExtensions/EncodeSurrogatePair.cs b/Benchmarks/Text/RuneExtensions/EncodeSurrogatePair.cs new file mode 100644 index 000000000..7319fdba4 --- /dev/null +++ b/Benchmarks/Text/RuneExtensions/EncodeSurrogatePair.cs @@ -0,0 +1,36 @@ +using System.Text; +using BenchmarkDotNet.Attributes; +using Tui = Terminal.Gui; + +namespace Terminal.Gui.Benchmarks.Text.RuneExtensions; + +/// +/// Benchmarks for performance fine-tuning. +/// +[MemoryDiagnoser] +[BenchmarkCategory (nameof (Tui.RuneExtensions))] +public class EncodeSurrogatePair +{ + /// + /// Benchmark for current implementation. + /// + [Benchmark (Baseline = true)] + [ArgumentsSource (nameof (DataSource))] + public Rune Current (char highSurrogate, char lowSurrogate) + { + _ = Tui.RuneExtensions.EncodeSurrogatePair (highSurrogate, lowSurrogate, out Rune rune); + return rune; + } + + public static IEnumerable DataSource () + { + string[] runeStrings = ["🍕", "🧠", "🌹"]; + foreach (string symbol in runeStrings) + { + if (symbol is [char high, char low]) + { + yield return [high, low]; + } + } + } +} diff --git a/Benchmarks/Text/RuneExtensions/GetEncodingLength.cs b/Benchmarks/Text/RuneExtensions/GetEncodingLength.cs new file mode 100644 index 000000000..4682940de --- /dev/null +++ b/Benchmarks/Text/RuneExtensions/GetEncodingLength.cs @@ -0,0 +1,74 @@ +using System.Text; +using BenchmarkDotNet.Attributes; +using Tui = Terminal.Gui; + +namespace Terminal.Gui.Benchmarks.Text.RuneExtensions; + +/// +/// Benchmarks for performance fine-tuning. +/// +[MemoryDiagnoser] +[BenchmarkCategory (nameof (Tui.RuneExtensions))] +public class GetEncodingLength +{ + /// + /// Benchmark for previous implementation. + /// + [Benchmark] + [ArgumentsSource (nameof (DataSource))] + public int Previous (Rune rune, PrettyPrintedEncoding encoding) + { + return WithEncodingGetBytesArray (rune, encoding); + } + + /// + /// Benchmark for current implementation. + /// + [Benchmark (Baseline = true)] + [ArgumentsSource (nameof (DataSource))] + public int Current (Rune rune, PrettyPrintedEncoding encoding) + { + return Tui.RuneExtensions.GetEncodingLength (rune, encoding); + } + + /// + /// Previous implementation with intermediate byte array, string, and char array allocation. + /// + private static int WithEncodingGetBytesArray (Rune rune, Encoding? encoding = null) + { + encoding ??= Encoding.UTF8; + byte [] bytes = encoding.GetBytes (rune.ToString ().ToCharArray ()); + var offset = 0; + + if (bytes [^1] == 0) + { + offset++; + } + + return bytes.Length - offset; + } + + public static IEnumerable DataSource () + { + PrettyPrintedEncoding[] encodings = [ new(Encoding.UTF8), new(Encoding.Unicode), new(Encoding.UTF32) ]; + Rune[] runes = [ new Rune ('a'), "𝔹".EnumerateRunes ().Single () ]; + + foreach (var encoding in encodings) + { + foreach (Rune rune in runes) + { + yield return [rune, encoding]; + } + } + } + + /// + /// wrapper to display proper encoding name in benchmark results. + /// + public record PrettyPrintedEncoding (Encoding Encoding) + { + public static implicit operator Encoding (PrettyPrintedEncoding ppe) => ppe.Encoding; + + public override string ToString () => Encoding.HeaderName; + } +} diff --git a/Benchmarks/Text/RuneExtensions/IsSurrogatePair.cs b/Benchmarks/Text/RuneExtensions/IsSurrogatePair.cs new file mode 100644 index 000000000..865f075a3 --- /dev/null +++ b/Benchmarks/Text/RuneExtensions/IsSurrogatePair.cs @@ -0,0 +1,50 @@ +using System.Text; +using BenchmarkDotNet.Attributes; +using Tui = Terminal.Gui; + +namespace Terminal.Gui.Benchmarks.Text.RuneExtensions; + +/// +/// Benchmarks for performance fine-tuning. +/// +[MemoryDiagnoser] +[BenchmarkCategory (nameof (Tui.RuneExtensions))] +public class IsSurrogatePair +{ + /// + /// Benchmark for previous implementation. + /// + /// + [Benchmark] + [ArgumentsSource (nameof (DataSource))] + public bool Previous (Rune rune) + { + return WithToString (rune); + } + + /// + /// Benchmark for current implementation. + /// + /// Avoids intermediate heap allocations by using stack allocated buffer. + /// + [Benchmark (Baseline = true)] + [ArgumentsSource (nameof (DataSource))] + public bool Current (Rune rune) + { + return Tui.RuneExtensions.IsSurrogatePair (rune); + } + + /// + /// Previous implementation with intermediate string allocation. + /// + private static bool WithToString (Rune rune) + { + return char.IsSurrogatePair (rune.ToString (), 0); + } + + public static IEnumerable DataSource () + { + yield return new Rune ('a'); + yield return "𝔹".EnumerateRunes ().Single (); + } +} diff --git a/Terminal.Gui/Text/RuneExtensions.cs b/Terminal.Gui/Text/RuneExtensions.cs index 835dba679..f5097316f 100644 --- a/Terminal.Gui/Text/RuneExtensions.cs +++ b/Terminal.Gui/Text/RuneExtensions.cs @@ -1,4 +1,6 @@ -using System.Globalization; +#nullable enable + +using System.Globalization; using Wcwidth; namespace Terminal.Gui; @@ -7,7 +9,7 @@ namespace Terminal.Gui; public static class RuneExtensions { /// Maximum Unicode code point. - public static int MaxUnicodeCodePoint = 0x10FFFF; + public static readonly int MaxUnicodeCodePoint = 0x10FFFF; /// Reports if the provided array of bytes can be encoded as UTF-8. /// The byte array to probe. @@ -32,17 +34,25 @@ public static class RuneExtensions /// The rune to decode. /// The chars if the rune is a surrogate pair. Null otherwise. /// if the rune is a valid surrogate pair; otherwise. - public static bool DecodeSurrogatePair (this Rune rune, out char [] chars) + public static bool DecodeSurrogatePair (this Rune rune, out char []? chars) { - if (rune.IsSurrogatePair ()) + bool isSingleUtf16CodeUnit = rune.IsBmp; + if (isSingleUtf16CodeUnit) { - chars = rune.ToString ().ToCharArray (); + chars = null; + return false; + } + const int maxCharsPerRune = 2; + Span charBuffer = stackalloc char[maxCharsPerRune]; + int charsWritten = rune.EncodeToUtf16 (charBuffer); + if (charsWritten >= 2 && char.IsSurrogatePair (charBuffer [0], charBuffer [1])) + { + chars = charBuffer [..charsWritten].ToArray (); return true; } chars = null; - return false; } @@ -55,21 +65,24 @@ public static class RuneExtensions /// he number of bytes written into the destination buffer. public static int Encode (this Rune rune, byte [] dest, int start = 0, int count = -1) { - byte [] bytes = Encoding.UTF8.GetBytes (rune.ToString ()); - var length = 0; + const int maxUtf8BytesPerRune = 4; + Span bytes = stackalloc byte[maxUtf8BytesPerRune]; + int writtenBytes = rune.EncodeToUtf8 (bytes); - for (var i = 0; i < (count == -1 ? bytes.Length : count); i++) + int bytesToCopy = count == -1 + ? writtenBytes + : Math.Min (count, writtenBytes); + int bytesWritten = 0; + for (int i = 0; i < bytesToCopy; i++) { - if (bytes [i] == 0) + if (bytes [i] == '\0') { break; } - dest [start + i] = bytes [i]; - length++; + bytesWritten++; } - - return length; + return bytesWritten; } /// Attempts to encode (as UTF-16) a surrogate pair. @@ -105,18 +118,26 @@ public static class RuneExtensions /// The rune to probe. /// The encoding used; the default is UTF8. /// The number of bytes required. - public static int GetEncodingLength (this Rune rune, Encoding encoding = null) + public static int GetEncodingLength (this Rune rune, Encoding? encoding = null) { encoding ??= Encoding.UTF8; - byte [] bytes = encoding.GetBytes (rune.ToString ().ToCharArray ()); - var offset = 0; - if (bytes [^1] == 0) + const int maxCharsPerRune = 2; + // Get characters with UTF16 to keep that part independent of selected encoding. + Span charBuffer = stackalloc char[maxCharsPerRune]; + int charsWritten = rune.EncodeToUtf16(charBuffer); + Span chars = charBuffer[..charsWritten]; + + int maxEncodedLength = encoding.GetMaxByteCount (charsWritten); + Span byteBuffer = stackalloc byte[maxEncodedLength]; + int bytesEncoded = encoding.GetBytes (chars, byteBuffer); + ReadOnlySpan encodedBytes = byteBuffer[..bytesEncoded]; + + if (encodedBytes [^1] == '\0') { - offset++; + return encodedBytes.Length - 1; } - - return bytes.Length - offset; + return encodedBytes.Length; } /// Returns if the rune is a combining character. @@ -127,7 +148,7 @@ public static class RuneExtensions { UnicodeCategory category = Rune.GetUnicodeCategory (rune); - return Rune.GetUnicodeCategory (rune) == UnicodeCategory.NonSpacingMark + return category == UnicodeCategory.NonSpacingMark || category == UnicodeCategory.SpacingCombiningMark || category == UnicodeCategory.EnclosingMark; } @@ -136,7 +157,19 @@ public static class RuneExtensions /// This is a Terminal.Gui extension method to to support TUI text manipulation. /// The rune to probe. /// if the rune is a surrogate code point; otherwise. - public static bool IsSurrogatePair (this Rune rune) { return char.IsSurrogatePair (rune.ToString (), 0); } + public static bool IsSurrogatePair (this Rune rune) + { + bool isSingleUtf16CodeUnit = rune.IsBmp; + if (isSingleUtf16CodeUnit) + { + return false; + } + + const int maxCharsPerRune = 2; + Span charBuffer = stackalloc char[maxCharsPerRune]; + int charsWritten = rune.EncodeToUtf16 (charBuffer); + return charsWritten >= 2 && char.IsSurrogatePair (charBuffer [0], charBuffer [1]); + } /// /// Ensures the rune is not a control character and can be displayed by translating characters below 0x20 to diff --git a/Terminal.sln b/Terminal.sln index 5f2eda9e8..7cb9d0367 100644 --- a/Terminal.sln +++ b/Terminal.sln @@ -48,6 +48,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "SelfContained", "SelfContai EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "NativeAot", "NativeAot\NativeAot.csproj", "{E6D716C6-AC94-4150-B10A-44AE13F79344}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Benchmarks", "Benchmarks\Benchmarks.csproj", "{242FBD3E-2EC6-4274-BD40-8E62AF9327B2}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -86,6 +88,10 @@ Global {E6D716C6-AC94-4150-B10A-44AE13F79344}.Debug|Any CPU.Build.0 = Debug|Any CPU {E6D716C6-AC94-4150-B10A-44AE13F79344}.Release|Any CPU.ActiveCfg = Release|Any CPU {E6D716C6-AC94-4150-B10A-44AE13F79344}.Release|Any CPU.Build.0 = Release|Any CPU + {242FBD3E-2EC6-4274-BD40-8E62AF9327B2}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {242FBD3E-2EC6-4274-BD40-8E62AF9327B2}.Debug|Any CPU.Build.0 = Debug|Any CPU + {242FBD3E-2EC6-4274-BD40-8E62AF9327B2}.Release|Any CPU.ActiveCfg = Release|Any CPU + {242FBD3E-2EC6-4274-BD40-8E62AF9327B2}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/UnitTests/Text/RuneTests.cs b/UnitTests/Text/RuneTests.cs index 95a4f53f3..0ede7d3ba 100644 --- a/UnitTests/Text/RuneTests.cs +++ b/UnitTests/Text/RuneTests.cs @@ -902,6 +902,23 @@ public class RuneTests Assert.Equal (3, splitOnComma.Length); } + [Theory] + [InlineData ("a", "utf-8", 1)] + [InlineData ("a", "utf-16", 1)] + [InlineData ("a", "utf-32", 3)] + [InlineData ("𝔹", "utf-8", 4)] + [InlineData ("𝔹", "utf-16", 4)] + [InlineData ("𝔹", "utf-32", 3)] + public void GetEncodingLength_ReturnsLengthBasedOnSelectedEncoding (string runeStr, string encodingName, int expectedLength) + { + Rune rune = runeStr.EnumerateRunes ().Single (); + var encoding = Encoding.GetEncoding (encodingName); + + int actualLength = rune.GetEncodingLength (encoding); + + Assert.Equal (expectedLength, actualLength); + } + private int CountLettersInString (string s) { var letterCount = 0;