Rewrite TextFormatter.StripCRLF

Uses StringBuilder and char span indexof search to reduce intermediate allocations.

The new implementation behaves slightly different compared to old implementation. In synthetic LFCR scenario it is correctly removed while the old implementation left the CR, which seems like an off-by-one error.
This commit is contained in:
Tonttu
2025-03-14 22:27:22 +02:00
committed by Tig
parent c4502b0741
commit 6f63dca591
4 changed files with 159 additions and 38 deletions

View File

@@ -0,0 +1,102 @@
using System.Text;
using BenchmarkDotNet.Attributes;
using Tui = Terminal.Gui;
namespace Terminal.Gui.Benchmarks.Text.TextFormatter;
[MemoryDiagnoser]
public class StripCRLF
{
/// <summary>
/// Benchmark for previous implementation.
/// </summary>
/// <param name="str"></param>
/// <param name="keepNewLine"></param>
/// <returns></returns>
[Benchmark]
[ArgumentsSource (nameof (DataSource))]
public string Previous (string str, bool keepNewLine)
{
return RuneListToString (str, keepNewLine);
}
/// <summary>
/// Benchmark for current implementation with StringBuilder and char span index of search.
/// </summary>
[Benchmark (Baseline = true)]
[ArgumentsSource (nameof (DataSource))]
public string Current (string str, bool keepNewLine)
{
return Tui.TextFormatter.StripCRLF (str, keepNewLine);
}
/// <summary>
/// Previous implementation with intermediate list allocation.
/// </summary>
private static string RuneListToString (string str, bool keepNewLine = false)
{
List<Rune> runes = str.ToRuneList ();
for (var i = 0; i < runes.Count; i++)
{
switch ((char)runes [i].Value)
{
case '\n':
if (!keepNewLine)
{
runes.RemoveAt (i);
}
break;
case '\r':
if (i + 1 < runes.Count && runes [i + 1].Value == '\n')
{
runes.RemoveAt (i);
if (!keepNewLine)
{
runes.RemoveAt (i);
}
i++;
}
else
{
if (!keepNewLine)
{
runes.RemoveAt (i);
}
}
break;
}
}
return StringExtensions.ToString (runes);
}
public IEnumerable<object []> DataSource ()
{
string textSource =
"""
Ĺόŕéḿ íśúḿ d́όĺόŕ śí áḿé, ćόńśéćt́ét́úŕ ád́íṕíśćíńǵ éĺí. ŕáéśéń q́úíś ĺúćt́úś éĺí. Íńt́éǵéŕ ú áŕćú éǵé d́όĺόŕ śćéĺéŕíśq́úé ḿát́t́íś áć é d́íáḿ.
éĺĺéńt́éśq́úé śé d́áṕíb́úś ḿáśśá, v́éĺ t́ŕíśt́íq́úé d́úí. Śéd́ v́ít́áé ńéq́úé éú v́éĺít́ όŕńáŕé áĺíq́úét́. Ú q́úíś όŕćí t́éḿṕόŕ, t́éḿṕόŕ t́úŕṕíś í, t́éḿṕúś ńéq́úé.
ŕáéśéń śáíéń t́úŕṕíś, όŕńáŕé v́éĺ ḿáúŕíś á, v́áŕíúś śúśćíí áńt́é. Ú úĺv́íńáŕ t́úŕṕíś ḿáśśá, q́úíś ćúŕśúś áŕćú f́áúćíb́úś íń.
Óŕćí v́áŕíúś ńát́όq́úé éńát́íb́úś é ḿáǵńíś d́íś áŕt́úŕíéńt́ ḿόńt́éś, ńáśćét́úŕ ŕíd́íćúĺúś ḿúś. F́úśćé á é b́ĺáńd́ít́, ćόńv́áĺĺíś q́úáḿ é, v́úĺṕút́át́é ĺáćúś.
Śúśṕéńd́íśśé śí áḿé áŕćú ú áŕćú f́áúćíb́úś v́áŕíúś. V́ív́áḿúś śí áḿé ḿáx́íḿúś d́íáḿ. Ńáḿ é ĺéό, h́áŕét́ŕá éú ĺόb́όŕt́íś á, t́ŕíśt́íq́úé ú f́éĺíś.
""";
// Consistent line endings between systems keeps performance evaluation more consistent.
textSource = textSource.ReplaceLineEndings ("\r\n");
bool[] permutations = [true, false];
foreach (bool keepNewLine in permutations)
{
yield return [textSource [..1], keepNewLine];
yield return [textSource [..10], keepNewLine];
yield return [textSource [..100], keepNewLine];
yield return [textSource [..(textSource.Length / 2)], keepNewLine];
yield return [textSource, keepNewLine];
}
}
}