diff --git a/Benchmarks/Text/TextFormatter/StripCRLF.cs b/Benchmarks/Text/TextFormatter/StripCRLF.cs new file mode 100644 index 000000000..069a23d92 --- /dev/null +++ b/Benchmarks/Text/TextFormatter/StripCRLF.cs @@ -0,0 +1,102 @@ +using System.Text; +using BenchmarkDotNet.Attributes; +using Tui = Terminal.Gui; + +namespace Terminal.Gui.Benchmarks.Text.TextFormatter; + +[MemoryDiagnoser] +public class StripCRLF +{ + /// + /// Benchmark for previous implementation. + /// + /// + /// + /// + [Benchmark] + [ArgumentsSource (nameof (DataSource))] + public string Previous (string str, bool keepNewLine) + { + return RuneListToString (str, keepNewLine); + } + + /// + /// Benchmark for current implementation with StringBuilder and char span index of search. + /// + [Benchmark (Baseline = true)] + [ArgumentsSource (nameof (DataSource))] + public string Current (string str, bool keepNewLine) + { + return Tui.TextFormatter.StripCRLF (str, keepNewLine); + } + + /// + /// Previous implementation with intermediate list allocation. + /// + private static string RuneListToString (string str, bool keepNewLine = false) + { + List runes = str.ToRuneList (); + + for (var i = 0; i < runes.Count; i++) + { + switch ((char)runes [i].Value) + { + case '\n': + if (!keepNewLine) + { + runes.RemoveAt (i); + } + + break; + + case '\r': + if (i + 1 < runes.Count && runes [i + 1].Value == '\n') + { + runes.RemoveAt (i); + + if (!keepNewLine) + { + runes.RemoveAt (i); + } + + i++; + } + else + { + if (!keepNewLine) + { + runes.RemoveAt (i); + } + } + + break; + } + } + + return StringExtensions.ToString (runes); + } + + public IEnumerable DataSource () + { + string textSource = + """ + Ĺόŕéḿ íṕśúḿ d́όĺόŕ śít́ áḿét́, ćόńśéćt́ét́úŕ ád́íṕíśćíńǵ éĺít́. Ṕŕáéśéńt́ q́úíś ĺúćt́úś éĺít́. Íńt́éǵéŕ út́ áŕćú éǵét́ d́όĺόŕ śćéĺéŕíśq́úé ḿát́t́íś áć ét́ d́íáḿ. + Ṕéĺĺéńt́éśq́úé śéd́ d́áṕíb́úś ḿáśśá, v́éĺ t́ŕíśt́íq́úé d́úí. Śéd́ v́ít́áé ńéq́úé éú v́éĺít́ όŕńáŕé áĺíq́úét́. Út́ q́úíś όŕćí t́éḿṕόŕ, t́éḿṕόŕ t́úŕṕíś íd́, t́éḿṕúś ńéq́úé. + Ṕŕáéśéńt́ śáṕíéń t́úŕṕíś, όŕńáŕé v́éĺ ḿáúŕíś át́, v́áŕíúś śúśćíṕít́ áńt́é. Út́ ṕúĺv́íńáŕ t́úŕṕíś ḿáśśá, q́úíś ćúŕśúś áŕćú f́áúćíb́úś íń. + Óŕćí v́áŕíúś ńát́όq́úé ṕéńát́íb́úś ét́ ḿáǵńíś d́íś ṕáŕt́úŕíéńt́ ḿόńt́éś, ńáśćét́úŕ ŕíd́íćúĺúś ḿúś. F́úśćé át́ éx́ b́ĺáńd́ít́, ćόńv́áĺĺíś q́úáḿ ét́, v́úĺṕút́át́é ĺáćúś. + Śúśṕéńd́íśśé śít́ áḿét́ áŕćú út́ áŕćú f́áúćíb́úś v́áŕíúś. V́ív́áḿúś śít́ áḿét́ ḿáx́íḿúś d́íáḿ. Ńáḿ éx́ ĺéό, ṕh́áŕét́ŕá éú ĺόb́όŕt́íś át́, t́ŕíśt́íq́úé út́ f́éĺíś. + """; + // Consistent line endings between systems keeps performance evaluation more consistent. + textSource = textSource.ReplaceLineEndings ("\r\n"); + + bool[] permutations = [true, false]; + foreach (bool keepNewLine in permutations) + { + yield return [textSource [..1], keepNewLine]; + yield return [textSource [..10], keepNewLine]; + yield return [textSource [..100], keepNewLine]; + yield return [textSource [..(textSource.Length / 2)], keepNewLine]; + yield return [textSource, keepNewLine]; + } + } +} diff --git a/Terminal.Gui/Terminal.Gui.csproj b/Terminal.Gui/Terminal.Gui.csproj index e19e3521f..8fd390881 100644 --- a/Terminal.Gui/Terminal.Gui.csproj +++ b/Terminal.Gui/Terminal.Gui.csproj @@ -78,9 +78,10 @@ - + + diff --git a/Terminal.Gui/Text/TextFormatter.cs b/Terminal.Gui/Text/TextFormatter.cs index 82bef1f0d..5e48c128c 100644 --- a/Terminal.Gui/Text/TextFormatter.cs +++ b/Terminal.Gui/Text/TextFormatter.cs @@ -1,4 +1,5 @@ #nullable enable +using System.Buffers; using System.Diagnostics; namespace Terminal.Gui; @@ -9,6 +10,9 @@ namespace Terminal.Gui; /// public class TextFormatter { + // Utilized in CRLF related helper methods for faster newline char index search. + private static readonly SearchValues NewLineSearchValues = SearchValues.Create(['\r', '\n']); + private Key _hotKey = new (); private int _hotKeyPos = -1; private List _lines = new (); @@ -1187,45 +1191,59 @@ public class TextFormatter // TODO: Move to StringExtensions? internal static string StripCRLF (string str, bool keepNewLine = false) { - List runes = str.ToRuneList (); + StringBuilder stringBuilder = new(); - for (var i = 0; i < runes.Count; i++) + ReadOnlySpan remaining = str.AsSpan (); + while (remaining.Length > 0) { - switch ((char)runes [i].Value) + int nextLineBreakIndex = remaining.IndexOfAny (NewLineSearchValues); + if (nextLineBreakIndex == -1) { - case '\n': - if (!keepNewLine) - { - runes.RemoveAt (i); - } - - break; - - case '\r': - if (i + 1 < runes.Count && runes [i + 1].Value == '\n') - { - runes.RemoveAt (i); - - if (!keepNewLine) - { - runes.RemoveAt (i); - } - - i++; - } - else - { - if (!keepNewLine) - { - runes.RemoveAt (i); - } - } - - break; + if (str.Length == remaining.Length) + { + return str; + } + stringBuilder.Append (remaining); + break; } - } - return StringExtensions.ToString (runes); + ReadOnlySpan slice = remaining.Slice (0, nextLineBreakIndex); + stringBuilder.Append (slice); + + // Evaluate how many line break characters to preserve. + int stride; + char lineBreakChar = remaining [nextLineBreakIndex]; + if (lineBreakChar == '\n') + { + stride = 1; + if (keepNewLine) + { + stringBuilder.Append ('\n'); + } + } + else // '\r' + { + bool crlf = (nextLineBreakIndex + 1) < remaining.Length && remaining [nextLineBreakIndex + 1] == '\n'; + if (crlf) + { + stride = 2; + if (keepNewLine) + { + stringBuilder.Append ('\n'); + } + } + else + { + stride = 1; + if (keepNewLine) + { + stringBuilder.Append ('\r'); + } + } + } + remaining = remaining.Slice (slice.Length + stride); + } + return stringBuilder.ToString (); } // TODO: Move to StringExtensions? diff --git a/Tests/UnitTestsParallelizable/Text/TextFormatterTests.cs b/Tests/UnitTestsParallelizable/Text/TextFormatterTests.cs index a4dbeafc4..388714d00 100644 --- a/Tests/UnitTestsParallelizable/Text/TextFormatterTests.cs +++ b/Tests/UnitTestsParallelizable/Text/TextFormatterTests.cs @@ -2895,9 +2895,9 @@ public class TextFormatterTests [InlineData ("This has crlf\r\nin the middle", "This has crlfin the middle")] [InlineData ("This has crlf in the end\r\n", "This has crlf in the end")] // LFCR - [InlineData ("\n\rThis has lfcr in the beginning", "\rThis has lfcr in the beginning")] - [InlineData ("This has lfcr\n\rin the middle", "This has lfcr\rin the middle")] - [InlineData ("This has lfcr in the end\n\r", "This has lfcr in the end\r")] + [InlineData ("\n\rThis has lfcr in the beginning", "This has lfcr in the beginning")] + [InlineData ("This has lfcr\n\rin the middle", "This has lfcrin the middle")] + [InlineData ("This has lfcr in the end\n\r", "This has lfcr in the end")] // CR [InlineData ("\rThis has cr in the beginning", "This has cr in the beginning")] [InlineData ("This has cr\rin the middle", "This has crin the middle")]