Rewrite TextFormatter.StripCRLF

Uses StringBuilder and char span indexof search to reduce intermediate allocations.

The new implementation behaves slightly different compared to old implementation. In synthetic LFCR scenario it is correctly removed while the old implementation left the CR, which seems like an off-by-one error.
This commit is contained in:
Tonttu
2025-03-14 22:27:22 +02:00
committed by Tig
parent c4502b0741
commit 6f63dca591
4 changed files with 159 additions and 38 deletions

View File

@@ -0,0 +1,102 @@
using System.Text;
using BenchmarkDotNet.Attributes;
using Tui = Terminal.Gui;
namespace Terminal.Gui.Benchmarks.Text.TextFormatter;
[MemoryDiagnoser]
public class StripCRLF
{
/// <summary>
/// Benchmark for previous implementation.
/// </summary>
/// <param name="str"></param>
/// <param name="keepNewLine"></param>
/// <returns></returns>
[Benchmark]
[ArgumentsSource (nameof (DataSource))]
public string Previous (string str, bool keepNewLine)
{
return RuneListToString (str, keepNewLine);
}
/// <summary>
/// Benchmark for current implementation with StringBuilder and char span index of search.
/// </summary>
[Benchmark (Baseline = true)]
[ArgumentsSource (nameof (DataSource))]
public string Current (string str, bool keepNewLine)
{
return Tui.TextFormatter.StripCRLF (str, keepNewLine);
}
/// <summary>
/// Previous implementation with intermediate list allocation.
/// </summary>
private static string RuneListToString (string str, bool keepNewLine = false)
{
List<Rune> runes = str.ToRuneList ();
for (var i = 0; i < runes.Count; i++)
{
switch ((char)runes [i].Value)
{
case '\n':
if (!keepNewLine)
{
runes.RemoveAt (i);
}
break;
case '\r':
if (i + 1 < runes.Count && runes [i + 1].Value == '\n')
{
runes.RemoveAt (i);
if (!keepNewLine)
{
runes.RemoveAt (i);
}
i++;
}
else
{
if (!keepNewLine)
{
runes.RemoveAt (i);
}
}
break;
}
}
return StringExtensions.ToString (runes);
}
public IEnumerable<object []> DataSource ()
{
string textSource =
"""
Ĺόŕéḿ íśúḿ d́όĺόŕ śí áḿé, ćόńśéćt́ét́úŕ ád́íṕíśćíńǵ éĺí. ŕáéśéń q́úíś ĺúćt́úś éĺí. Íńt́éǵéŕ ú áŕćú éǵé d́όĺόŕ śćéĺéŕíśq́úé ḿát́t́íś áć é d́íáḿ.
éĺĺéńt́éśq́úé śé d́áṕíb́úś ḿáśśá, v́éĺ t́ŕíśt́íq́úé d́úí. Śéd́ v́ít́áé ńéq́úé éú v́éĺít́ όŕńáŕé áĺíq́úét́. Ú q́úíś όŕćí t́éḿṕόŕ, t́éḿṕόŕ t́úŕṕíś í, t́éḿṕúś ńéq́úé.
ŕáéśéń śáíéń t́úŕṕíś, όŕńáŕé v́éĺ ḿáúŕíś á, v́áŕíúś śúśćíí áńt́é. Ú úĺv́íńáŕ t́úŕṕíś ḿáśśá, q́úíś ćúŕśúś áŕćú f́áúćíb́úś íń.
Óŕćí v́áŕíúś ńát́όq́úé éńát́íb́úś é ḿáǵńíś d́íś áŕt́úŕíéńt́ ḿόńt́éś, ńáśćét́úŕ ŕíd́íćúĺúś ḿúś. F́úśćé á é b́ĺáńd́ít́, ćόńv́áĺĺíś q́úáḿ é, v́úĺṕút́át́é ĺáćúś.
Śúśṕéńd́íśśé śí áḿé áŕćú ú áŕćú f́áúćíb́úś v́áŕíúś. V́ív́áḿúś śí áḿé ḿáx́íḿúś d́íáḿ. Ńáḿ é ĺéό, h́áŕét́ŕá éú ĺόb́όŕt́íś á, t́ŕíśt́íq́úé ú f́éĺíś.
""";
// Consistent line endings between systems keeps performance evaluation more consistent.
textSource = textSource.ReplaceLineEndings ("\r\n");
bool[] permutations = [true, false];
foreach (bool keepNewLine in permutations)
{
yield return [textSource [..1], keepNewLine];
yield return [textSource [..10], keepNewLine];
yield return [textSource [..100], keepNewLine];
yield return [textSource [..(textSource.Length / 2)], keepNewLine];
yield return [textSource, keepNewLine];
}
}
}

View File

@@ -78,9 +78,10 @@
<Using Include="Terminal.Gui.EnumExtensions" /> <Using Include="Terminal.Gui.EnumExtensions" />
</ItemGroup> </ItemGroup>
<!-- =================================================================== --> <!-- =================================================================== -->
<!-- Assembliy names for which internal items are visible --> <!-- Assembly names for which internal items are visible -->
<!-- =================================================================== --> <!-- =================================================================== -->
<ItemGroup> <ItemGroup>
<InternalsVisibleTo Include="Benchmarks"/>
<InternalsVisibleTo Include="UnitTests" /> <InternalsVisibleTo Include="UnitTests" />
<InternalsVisibleTo Include="UnitTests.Parallelizable" /> <InternalsVisibleTo Include="UnitTests.Parallelizable" />
<InternalsVisibleTo Include="StressTests" /> <InternalsVisibleTo Include="StressTests" />

View File

@@ -1,4 +1,5 @@
#nullable enable #nullable enable
using System.Buffers;
using System.Diagnostics; using System.Diagnostics;
namespace Terminal.Gui; namespace Terminal.Gui;
@@ -9,6 +10,9 @@ namespace Terminal.Gui;
/// </summary> /// </summary>
public class TextFormatter public class TextFormatter
{ {
// Utilized in CRLF related helper methods for faster newline char index search.
private static readonly SearchValues<char> NewLineSearchValues = SearchValues.Create(['\r', '\n']);
private Key _hotKey = new (); private Key _hotKey = new ();
private int _hotKeyPos = -1; private int _hotKeyPos = -1;
private List<string> _lines = new (); private List<string> _lines = new ();
@@ -1187,45 +1191,59 @@ public class TextFormatter
// TODO: Move to StringExtensions? // TODO: Move to StringExtensions?
internal static string StripCRLF (string str, bool keepNewLine = false) internal static string StripCRLF (string str, bool keepNewLine = false)
{ {
List<Rune> runes = str.ToRuneList (); StringBuilder stringBuilder = new();
for (var i = 0; i < runes.Count; i++) ReadOnlySpan<char> remaining = str.AsSpan ();
while (remaining.Length > 0)
{ {
switch ((char)runes [i].Value) int nextLineBreakIndex = remaining.IndexOfAny (NewLineSearchValues);
if (nextLineBreakIndex == -1)
{ {
case '\n': if (str.Length == remaining.Length)
if (!keepNewLine) {
{ return str;
runes.RemoveAt (i); }
} stringBuilder.Append (remaining);
break;
break;
case '\r':
if (i + 1 < runes.Count && runes [i + 1].Value == '\n')
{
runes.RemoveAt (i);
if (!keepNewLine)
{
runes.RemoveAt (i);
}
i++;
}
else
{
if (!keepNewLine)
{
runes.RemoveAt (i);
}
}
break;
} }
}
return StringExtensions.ToString (runes); ReadOnlySpan<char> slice = remaining.Slice (0, nextLineBreakIndex);
stringBuilder.Append (slice);
// Evaluate how many line break characters to preserve.
int stride;
char lineBreakChar = remaining [nextLineBreakIndex];
if (lineBreakChar == '\n')
{
stride = 1;
if (keepNewLine)
{
stringBuilder.Append ('\n');
}
}
else // '\r'
{
bool crlf = (nextLineBreakIndex + 1) < remaining.Length && remaining [nextLineBreakIndex + 1] == '\n';
if (crlf)
{
stride = 2;
if (keepNewLine)
{
stringBuilder.Append ('\n');
}
}
else
{
stride = 1;
if (keepNewLine)
{
stringBuilder.Append ('\r');
}
}
}
remaining = remaining.Slice (slice.Length + stride);
}
return stringBuilder.ToString ();
} }
// TODO: Move to StringExtensions? // TODO: Move to StringExtensions?

View File

@@ -2895,9 +2895,9 @@ public class TextFormatterTests
[InlineData ("This has crlf\r\nin the middle", "This has crlfin the middle")] [InlineData ("This has crlf\r\nin the middle", "This has crlfin the middle")]
[InlineData ("This has crlf in the end\r\n", "This has crlf in the end")] [InlineData ("This has crlf in the end\r\n", "This has crlf in the end")]
// LFCR // LFCR
[InlineData ("\n\rThis has lfcr in the beginning", "\rThis has lfcr in the beginning")] [InlineData ("\n\rThis has lfcr in the beginning", "This has lfcr in the beginning")]
[InlineData ("This has lfcr\n\rin the middle", "This has lfcr\rin the middle")] [InlineData ("This has lfcr\n\rin the middle", "This has lfcrin the middle")]
[InlineData ("This has lfcr in the end\n\r", "This has lfcr in the end\r")] [InlineData ("This has lfcr in the end\n\r", "This has lfcr in the end")]
// CR // CR
[InlineData ("\rThis has cr in the beginning", "This has cr in the beginning")] [InlineData ("\rThis has cr in the beginning", "This has cr in the beginning")]
[InlineData ("This has cr\rin the middle", "This has crin the middle")] [InlineData ("This has cr\rin the middle", "This has crin the middle")]