From 6f63dca5910aa2832a993ea22f4334aa129ccdd2 Mon Sep 17 00:00:00 2001 From: Tonttu <15074459+TheTonttu@users.noreply.github.com> Date: Fri, 14 Mar 2025 22:27:22 +0200 Subject: [PATCH] Rewrite TextFormatter.StripCRLF Uses StringBuilder and char span indexof search to reduce intermediate allocations. The new implementation behaves slightly different compared to old implementation. In synthetic LFCR scenario it is correctly removed while the old implementation left the CR, which seems like an off-by-one error. --- Benchmarks/Text/TextFormatter/StripCRLF.cs | 102 ++++++++++++++++++ Terminal.Gui/Terminal.Gui.csproj | 3 +- Terminal.Gui/Text/TextFormatter.cs | 86 +++++++++------ .../Text/TextFormatterTests.cs | 6 +- 4 files changed, 159 insertions(+), 38 deletions(-) create mode 100644 Benchmarks/Text/TextFormatter/StripCRLF.cs diff --git a/Benchmarks/Text/TextFormatter/StripCRLF.cs b/Benchmarks/Text/TextFormatter/StripCRLF.cs new file mode 100644 index 000000000..069a23d92 --- /dev/null +++ b/Benchmarks/Text/TextFormatter/StripCRLF.cs @@ -0,0 +1,102 @@ +using System.Text; +using BenchmarkDotNet.Attributes; +using Tui = Terminal.Gui; + +namespace Terminal.Gui.Benchmarks.Text.TextFormatter; + +[MemoryDiagnoser] +public class StripCRLF +{ + /// + /// Benchmark for previous implementation. + /// + /// + /// + /// + [Benchmark] + [ArgumentsSource (nameof (DataSource))] + public string Previous (string str, bool keepNewLine) + { + return RuneListToString (str, keepNewLine); + } + + /// + /// Benchmark for current implementation with StringBuilder and char span index of search. + /// + [Benchmark (Baseline = true)] + [ArgumentsSource (nameof (DataSource))] + public string Current (string str, bool keepNewLine) + { + return Tui.TextFormatter.StripCRLF (str, keepNewLine); + } + + /// + /// Previous implementation with intermediate list allocation. + /// + private static string RuneListToString (string str, bool keepNewLine = false) + { + List runes = str.ToRuneList (); + + for (var i = 0; i < runes.Count; i++) + { + switch ((char)runes [i].Value) + { + case '\n': + if (!keepNewLine) + { + runes.RemoveAt (i); + } + + break; + + case '\r': + if (i + 1 < runes.Count && runes [i + 1].Value == '\n') + { + runes.RemoveAt (i); + + if (!keepNewLine) + { + runes.RemoveAt (i); + } + + i++; + } + else + { + if (!keepNewLine) + { + runes.RemoveAt (i); + } + } + + break; + } + } + + return StringExtensions.ToString (runes); + } + + public IEnumerable DataSource () + { + string textSource = + """ + Ĺόŕéḿ íṕśúḿ d́όĺόŕ śít́ áḿét́, ćόńśéćt́ét́úŕ ád́íṕíśćíńǵ éĺít́. Ṕŕáéśéńt́ q́úíś ĺúćt́úś éĺít́. Íńt́éǵéŕ út́ áŕćú éǵét́ d́όĺόŕ śćéĺéŕíśq́úé ḿát́t́íś áć ét́ d́íáḿ. + Ṕéĺĺéńt́éśq́úé śéd́ d́áṕíb́úś ḿáśśá, v́éĺ t́ŕíśt́íq́úé d́úí. Śéd́ v́ít́áé ńéq́úé éú v́éĺít́ όŕńáŕé áĺíq́úét́. Út́ q́úíś όŕćí t́éḿṕόŕ, t́éḿṕόŕ t́úŕṕíś íd́, t́éḿṕúś ńéq́úé. + Ṕŕáéśéńt́ śáṕíéń t́úŕṕíś, όŕńáŕé v́éĺ ḿáúŕíś át́, v́áŕíúś śúśćíṕít́ áńt́é. Út́ ṕúĺv́íńáŕ t́úŕṕíś ḿáśśá, q́úíś ćúŕśúś áŕćú f́áúćíb́úś íń. + Óŕćí v́áŕíúś ńát́όq́úé ṕéńát́íb́úś ét́ ḿáǵńíś d́íś ṕáŕt́úŕíéńt́ ḿόńt́éś, ńáśćét́úŕ ŕíd́íćúĺúś ḿúś. F́úśćé át́ éx́ b́ĺáńd́ít́, ćόńv́áĺĺíś q́úáḿ ét́, v́úĺṕút́át́é ĺáćúś. + Śúśṕéńd́íśśé śít́ áḿét́ áŕćú út́ áŕćú f́áúćíb́úś v́áŕíúś. V́ív́áḿúś śít́ áḿét́ ḿáx́íḿúś d́íáḿ. Ńáḿ éx́ ĺéό, ṕh́áŕét́ŕá éú ĺόb́όŕt́íś át́, t́ŕíśt́íq́úé út́ f́éĺíś. + """; + // Consistent line endings between systems keeps performance evaluation more consistent. + textSource = textSource.ReplaceLineEndings ("\r\n"); + + bool[] permutations = [true, false]; + foreach (bool keepNewLine in permutations) + { + yield return [textSource [..1], keepNewLine]; + yield return [textSource [..10], keepNewLine]; + yield return [textSource [..100], keepNewLine]; + yield return [textSource [..(textSource.Length / 2)], keepNewLine]; + yield return [textSource, keepNewLine]; + } + } +} diff --git a/Terminal.Gui/Terminal.Gui.csproj b/Terminal.Gui/Terminal.Gui.csproj index e19e3521f..8fd390881 100644 --- a/Terminal.Gui/Terminal.Gui.csproj +++ b/Terminal.Gui/Terminal.Gui.csproj @@ -78,9 +78,10 @@ - + + diff --git a/Terminal.Gui/Text/TextFormatter.cs b/Terminal.Gui/Text/TextFormatter.cs index 82bef1f0d..5e48c128c 100644 --- a/Terminal.Gui/Text/TextFormatter.cs +++ b/Terminal.Gui/Text/TextFormatter.cs @@ -1,4 +1,5 @@ #nullable enable +using System.Buffers; using System.Diagnostics; namespace Terminal.Gui; @@ -9,6 +10,9 @@ namespace Terminal.Gui; /// public class TextFormatter { + // Utilized in CRLF related helper methods for faster newline char index search. + private static readonly SearchValues NewLineSearchValues = SearchValues.Create(['\r', '\n']); + private Key _hotKey = new (); private int _hotKeyPos = -1; private List _lines = new (); @@ -1187,45 +1191,59 @@ public class TextFormatter // TODO: Move to StringExtensions? internal static string StripCRLF (string str, bool keepNewLine = false) { - List runes = str.ToRuneList (); + StringBuilder stringBuilder = new(); - for (var i = 0; i < runes.Count; i++) + ReadOnlySpan remaining = str.AsSpan (); + while (remaining.Length > 0) { - switch ((char)runes [i].Value) + int nextLineBreakIndex = remaining.IndexOfAny (NewLineSearchValues); + if (nextLineBreakIndex == -1) { - case '\n': - if (!keepNewLine) - { - runes.RemoveAt (i); - } - - break; - - case '\r': - if (i + 1 < runes.Count && runes [i + 1].Value == '\n') - { - runes.RemoveAt (i); - - if (!keepNewLine) - { - runes.RemoveAt (i); - } - - i++; - } - else - { - if (!keepNewLine) - { - runes.RemoveAt (i); - } - } - - break; + if (str.Length == remaining.Length) + { + return str; + } + stringBuilder.Append (remaining); + break; } - } - return StringExtensions.ToString (runes); + ReadOnlySpan slice = remaining.Slice (0, nextLineBreakIndex); + stringBuilder.Append (slice); + + // Evaluate how many line break characters to preserve. + int stride; + char lineBreakChar = remaining [nextLineBreakIndex]; + if (lineBreakChar == '\n') + { + stride = 1; + if (keepNewLine) + { + stringBuilder.Append ('\n'); + } + } + else // '\r' + { + bool crlf = (nextLineBreakIndex + 1) < remaining.Length && remaining [nextLineBreakIndex + 1] == '\n'; + if (crlf) + { + stride = 2; + if (keepNewLine) + { + stringBuilder.Append ('\n'); + } + } + else + { + stride = 1; + if (keepNewLine) + { + stringBuilder.Append ('\r'); + } + } + } + remaining = remaining.Slice (slice.Length + stride); + } + return stringBuilder.ToString (); } // TODO: Move to StringExtensions? diff --git a/Tests/UnitTestsParallelizable/Text/TextFormatterTests.cs b/Tests/UnitTestsParallelizable/Text/TextFormatterTests.cs index a4dbeafc4..388714d00 100644 --- a/Tests/UnitTestsParallelizable/Text/TextFormatterTests.cs +++ b/Tests/UnitTestsParallelizable/Text/TextFormatterTests.cs @@ -2895,9 +2895,9 @@ public class TextFormatterTests [InlineData ("This has crlf\r\nin the middle", "This has crlfin the middle")] [InlineData ("This has crlf in the end\r\n", "This has crlf in the end")] // LFCR - [InlineData ("\n\rThis has lfcr in the beginning", "\rThis has lfcr in the beginning")] - [InlineData ("This has lfcr\n\rin the middle", "This has lfcr\rin the middle")] - [InlineData ("This has lfcr in the end\n\r", "This has lfcr in the end\r")] + [InlineData ("\n\rThis has lfcr in the beginning", "This has lfcr in the beginning")] + [InlineData ("This has lfcr\n\rin the middle", "This has lfcrin the middle")] + [InlineData ("This has lfcr in the end\n\r", "This has lfcr in the end")] // CR [InlineData ("\rThis has cr in the beginning", "This has cr in the beginning")] [InlineData ("This has cr\rin the middle", "This has crin the middle")]