From 48b82ace1bc9e8ef7f0f94f41d963b520d9492a8 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 8 Feb 2026 11:52:58 -0500 Subject: [PATCH 1/8] treat control sequences as 0 width --- comparison/go.mod | 2 +- comparison/go.sum | 4 ++-- go.mod | 2 +- go.sum | 4 ++-- width.go | 14 +++++++++-- width_test.go | 61 +++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 79 insertions(+), 8 deletions(-) diff --git a/comparison/go.mod b/comparison/go.mod index 5f3d35e..2320be6 100644 --- a/comparison/go.mod +++ b/comparison/go.mod @@ -10,7 +10,7 @@ require ( require ( github.com/clipperhouse/stringish v0.1.1 // indirect - github.com/clipperhouse/uax29/v2 v2.5.0 // indirect + github.com/clipperhouse/uax29/v2 v2.6.0 // indirect ) replace github.com/clipperhouse/displaywidth => ../ diff --git a/comparison/go.sum b/comparison/go.sum index 85a494e..f75b5c5 100644 --- a/comparison/go.sum +++ b/comparison/go.sum @@ -1,7 +1,7 @@ github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs= github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA= -github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U= -github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= +github.com/clipperhouse/uax29/v2 v2.6.0 h1:z0cDbUV+aPASdFb2/ndFnS9ts/WNXgTNNGFoKXuhpos= +github.com/clipperhouse/uax29/v2 v2.6.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw= github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ= diff --git a/go.mod b/go.mod index 2b63438..80dbfda 100644 --- a/go.mod +++ b/go.mod @@ -2,6 +2,6 @@ module github.com/clipperhouse/displaywidth go 1.18 -require github.com/clipperhouse/uax29/v2 v2.5.0 +require github.com/clipperhouse/uax29/v2 v2.6.0 require github.com/clipperhouse/stringish v0.1.1 diff --git a/go.sum b/go.sum index 8b59062..7638f9e 100644 --- a/go.sum +++ b/go.sum @@ -1,4 +1,4 @@ github.com/clipperhouse/stringish v0.1.1 h1:+NSqMOr3GR6k1FdRhhnXrLfztGzuG+VuFDfatpWHKCs= github.com/clipperhouse/stringish v0.1.1/go.mod h1:v/WhFtE1q0ovMta2+m+UbpZ+2/HEXNWYXQgCt4hdOzA= -github.com/clipperhouse/uax29/v2 v2.5.0 h1:x7T0T4eTHDONxFJsL94uKNKPHrclyFI0lm7+w94cO8U= -github.com/clipperhouse/uax29/v2 v2.5.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= +github.com/clipperhouse/uax29/v2 v2.6.0 h1:z0cDbUV+aPASdFb2/ndFnS9ts/WNXgTNNGFoKXuhpos= +github.com/clipperhouse/uax29/v2 v2.6.0/go.mod h1:Wn1g7MK6OoeDT0vL+Q0SQLDz/KpfsVRgg6W7ihQeh4g= diff --git a/width.go b/width.go index 1f8cdb1..4cff960 100644 --- a/width.go +++ b/width.go @@ -12,12 +12,13 @@ import ( // characters are treated as width 1. When EastAsianWidth is true, ambiguous // East Asian characters are treated as width 2. type Options struct { - EastAsianWidth bool + EastAsianWidth bool + IgnoreControlSequences bool } // DefaultOptions is the default options for the display width // calculation, which is EastAsianWidth: false. -var DefaultOptions = Options{EastAsianWidth: false} +var DefaultOptions = Options{EastAsianWidth: false, IgnoreControlSequences: false} // String calculates the display width of a string, // by iterating over grapheme clusters in the string @@ -43,6 +44,8 @@ func (options Options) String(s string) int { // Not ASCII, use grapheme parsing g := graphemes.FromString(s[pos:]) + g.AnsiEscapeSequences = options.IgnoreControlSequences + start := pos for g.Next() { @@ -91,6 +94,8 @@ func (options Options) Bytes(s []byte) int { // Not ASCII, use grapheme parsing g := graphemes.FromBytes(s[pos:]) + g.AnsiEscapeSequences = options.IgnoreControlSequences + start := pos for g.Next() { @@ -231,6 +236,11 @@ func graphemeWidth[T stringish.Interface](s T, options Options) int { return asciiWidth(s[0]) } + // Multi-byte grapheme clusters led by a C0 control (0x00-0x1F) + if s[0] <= 0x1F { + return 0 + } + p, sz := lookup(s) prop := property(p) diff --git a/width_test.go b/width_test.go index d008c05..c760f0e 100644 --- a/width_test.go +++ b/width_test.go @@ -105,6 +105,67 @@ func TestStringWidth(t *testing.T) { } } +var ignoreControlSequences = Options{IgnoreControlSequences: true} + +func TestAnsiEscapeSequences(t *testing.T) { + tests := []struct { + name string + input string + options Options + expected int + }{ + // ANSI escape sequences (ECMA-48) should be zero width when parsed as single graphemes + {"SGR red", "\x1b[31m", ignoreControlSequences, 0}, + {"SGR reset", "\x1b[0m", ignoreControlSequences, 0}, + {"SGR bold", "\x1b[1m", ignoreControlSequences, 0}, + {"SGR 256-color", "\x1b[38;5;196m", ignoreControlSequences, 0}, + {"SGR truecolor", "\x1b[38;2;255;0;0m", ignoreControlSequences, 0}, + {"cursor up", "\x1b[A", ignoreControlSequences, 0}, + {"cursor position", "\x1b[10;20H", ignoreControlSequences, 0}, + {"erase in display", "\x1b[2J", ignoreControlSequences, 0}, + + // ANSI escape sequences mixed with visible text + {"red hello", "\x1b[31mhello\x1b[0m", ignoreControlSequences, 5}, + {"bold world", "\x1b[1mworld\x1b[0m", ignoreControlSequences, 5}, + {"colored CJK", "\x1b[31m中文\x1b[0m", ignoreControlSequences, 4}, + {"colored emoji", "\x1b[31m😀\x1b[0m", ignoreControlSequences, 2}, + {"nested SGR", "\x1b[1m\x1b[31mhi\x1b[0m", ignoreControlSequences, 2}, + + // CR+LF as a multi-byte C0-led grapheme (zero width) + {"CRLF", "\r\n", ignoreControlSequences, 0}, + {"text with CRLF", "hello\r\nworld", ignoreControlSequences, 10}, + + // Without IgnoreControlSequences, ESC is still zero width as a standalone byte + {"bare ESC default options", "\x1b", defaultOptions, 0}, + + // IgnoreControlSequences should not regress width for strings with no escape sequences + {"plain ASCII with option", "hello", ignoreControlSequences, 5}, + {"plain ASCII spaces with option", "hello world", ignoreControlSequences, 11}, + {"CJK with option", "中文", ignoreControlSequences, 4}, + {"emoji with option", "😀", ignoreControlSequences, 2}, + {"flag with option", "🇺🇸", ignoreControlSequences, 2}, + {"mixed with option", "hello中文😀", ignoreControlSequences, 5 + 4 + 2}, + {"ambiguous with option", "★", ignoreControlSequences, 1}, + {"combining mark with option", "é", ignoreControlSequences, 1}, + {"control chars with option", "\t\n", ignoreControlSequences, 0}, + {"empty with option", "", ignoreControlSequences, 0}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result := tt.options.String(tt.input) + if result != tt.expected { + t.Errorf("String(%q) = %d, want %d", tt.input, result, tt.expected) + } + + result = tt.options.Bytes([]byte(tt.input)) + if result != tt.expected { + t.Errorf("Bytes(%q) = %d, want %d", tt.input, result, tt.expected) + } + }) + } +} + func TestRuneWidth(t *testing.T) { tests := []struct { name string From d3448978859a964836f806c4dca9564021c74315 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 8 Feb 2026 12:13:14 -0500 Subject: [PATCH 2/8] IgnoreControlSequences for Truncate --- width.go | 4 ++++ width_test.go | 14 ++++++++++++++ 2 files changed, 18 insertions(+) diff --git a/width.go b/width.go index 4cff960..19ecec7 100644 --- a/width.go +++ b/width.go @@ -166,6 +166,8 @@ func (options Options) TruncateString(s string, maxWidth int, tail string) strin var pos, total int g := graphemes.FromString(s) + g.AnsiEscapeSequences = options.IgnoreControlSequences + for g.Next() { gw := graphemeWidth(g.Value(), options) if total+gw <= maxWidthWithoutTail { @@ -199,6 +201,8 @@ func (options Options) TruncateBytes(s []byte, maxWidth int, tail []byte) []byte var pos, total int g := graphemes.FromBytes(s) + g.AnsiEscapeSequences = options.IgnoreControlSequences + for g.Next() { gw := graphemeWidth(g.Value(), options) if total+gw <= maxWidthWithoutTail { diff --git a/width_test.go b/width_test.go index c760f0e..256ee95 100644 --- a/width_test.go +++ b/width_test.go @@ -987,6 +987,20 @@ func TestTruncateString(t *testing.T) { {"complex mixed", "Go 🇺🇸🚀", 3, "...", defaultOptions, "..."}, {"complex mixed fits", "Go 🇺🇸🚀", 7, "...", defaultOptions, "Go 🇺🇸🚀"}, + // IgnoreControlSequences (ANSI escape sequences): truncation by visible width only. + // Semantics: we only truncate when cumulative visible width strictly exceeds maxWidth + // (total > maxWidth). So if visible(s) <= maxWidth we return s unchanged. When we + // truncate, result = s[:pos]+tail where pos is the last grapheme end such that + // visible(s[:pos]) <= maxWidth - visible(tail). ANSI sequences are zero-width + // graphemes when IgnoreControlSequences is true. + {"IgnoreControlSequences plain no truncation", "hello", 5, "...", ignoreControlSequences, "hello"}, + {"IgnoreControlSequences ANSI wrapped no truncation", "\x1b[31mhello\x1b[0m", 8, "...", ignoreControlSequences, "\x1b[31mhello\x1b[0m"}, + {"IgnoreControlSequences ANSI wrapped truncate", "\x1b[31mhello\x1b[0m", 4, "...", ignoreControlSequences, "\x1b[31mh..."}, + {"IgnoreControlSequences ANSI in middle truncate", "hello\x1b[31mworld", 5, "...", ignoreControlSequences, "he..."}, + {"IgnoreControlSequences CJK truncate", "\x1b[31m中文\x1b[0m", 2, "...", ignoreControlSequences, "..."}, + {"IgnoreControlSequences CJK no truncation", "\x1b[31m中文\x1b[0m", 7, "...", ignoreControlSequences, "\x1b[31m中文\x1b[0m"}, + {"IgnoreControlSequences CJK one wide then tail", "\x1b[31m中文xx\x1b[0m", 5, "...", ignoreControlSequences, "\x1b[31m中..."}, + // East Asian Width option {"ambiguous EAW fits", "★", 2, "...", eawOptions, "★"}, {"ambiguous EAW truncate", "★", 1, "...", eawOptions, "..."}, From 20794ed22296f5d04272eab16336f56e297f73a8 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 8 Feb 2026 12:18:17 -0500 Subject: [PATCH 3/8] add comment --- width.go | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/width.go b/width.go index 19ecec7..97db647 100644 --- a/width.go +++ b/width.go @@ -8,11 +8,21 @@ import ( ) // Options allows you to specify the treatment of ambiguous East Asian -// characters. When EastAsianWidth is false (default), ambiguous East Asian -// characters are treated as width 1. When EastAsianWidth is true, ambiguous -// East Asian characters are treated as width 2. +// characters and ANSI escape sequences. type Options struct { - EastAsianWidth bool + // EastAsianWidth specifies whether to treat ambiguous East Asian characters + // as width 1 or 2. When false (default), ambiguous East Asian characters + // are treated as width 1. When true, they are width 2. + EastAsianWidth bool + + // IgnoreControlSequences specifies whether to ignore ECMA-48 escape sequences + // when calculating the display width. When false (default), ANSI escape + // sequences are treated as just a series of characters. When true, they are + // treated as a single zero-width unit. + // + // Note that this option is about *sequences*. Individual control characters + // are already treated as zero-width. With this option, ANSI sequences such as + // "\x1b[31m" and "\x1b[0m" do not count towards the width of a string. IgnoreControlSequences bool } From 66a3b63a31343baf3b50a02e0532933a1398adf9 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 8 Feb 2026 12:38:24 -0500 Subject: [PATCH 4/8] Support IgnoreControlSequences in *Graphemes funcs --- graphemes.go | 16 ++++++++-------- width_test.go | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+), 8 deletions(-) diff --git a/graphemes.go b/graphemes.go index e9cc79a..3d11eec 100644 --- a/graphemes.go +++ b/graphemes.go @@ -44,10 +44,10 @@ func StringGraphemes(s string) Graphemes[string] { // Iterate using the Next method, and get the width of the current grapheme // using the Width method. func (options Options) StringGraphemes(s string) Graphemes[string] { - return Graphemes[string]{ - iter: graphemes.FromString(s), - options: options, - } + g := graphemes.FromString(s) + g.AnsiEscapeSequences = options.IgnoreControlSequences + + return Graphemes[string]{iter: g, options: options} } // BytesGraphemes returns an iterator over grapheme clusters for the given @@ -65,8 +65,8 @@ func BytesGraphemes(s []byte) Graphemes[[]byte] { // Iterate using the Next method, and get the width of the current grapheme // using the Width method. func (options Options) BytesGraphemes(s []byte) Graphemes[[]byte] { - return Graphemes[[]byte]{ - iter: graphemes.FromBytes(s), - options: options, - } + g := graphemes.FromBytes(s) + g.AnsiEscapeSequences = options.IgnoreControlSequences + + return Graphemes[[]byte]{iter: g, options: options} } diff --git a/width_test.go b/width_test.go index 256ee95..bbf565e 100644 --- a/width_test.go +++ b/width_test.go @@ -884,6 +884,52 @@ func TestBytesGraphemes(t *testing.T) { } } +func TestGraphemesIgnoreControlSequences(t *testing.T) { + tests := []struct { + name string + input string + options Options + }{ + // IgnoreControlSequences true: ANSI sequences are one zero-width grapheme each; visible width only + {"IgnoreControlSequences ANSI wrapped", "\x1b[31mhello\x1b[0m", ignoreControlSequences}, + {"IgnoreControlSequences ANSI only", "\x1b[0m", ignoreControlSequences}, + {"IgnoreControlSequences plain text", "hi", ignoreControlSequences}, + {"IgnoreControlSequences ANSI mid", "a\x1b[31mb\x1b[0mc", ignoreControlSequences}, + // Default options: sum of grapheme widths must still match String/Bytes + {"default ANSI wrapped", "\x1b[31mhello\x1b[0m", defaultOptions}, + {"default plain", "hello", defaultOptions}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // StringGraphemes: option must be passed through; sum of Width() matches String() + expected := tt.options.String(tt.input) + iter := tt.options.StringGraphemes(tt.input) + got := 0 + for iter.Next() { + got += iter.Width() + } + if got != expected { + t.Errorf("StringGraphemes(%q) sum Width() = %d, want %d (String)", + tt.input, got, expected) + } + + // BytesGraphemes: same option and outcome for []byte + b := []byte(tt.input) + expectedBytes := tt.options.Bytes(b) + iterBytes := tt.options.BytesGraphemes(b) + gotBytes := 0 + for iterBytes.Next() { + gotBytes += iterBytes.Width() + } + if gotBytes != expectedBytes { + t.Errorf("BytesGraphemes(%q) sum Width() = %d, want %d (Bytes)", + b, gotBytes, expectedBytes) + } + }) + } +} + func TestAsciiWidth(t *testing.T) { tests := []struct { name string From 29a4112b301012fcc4cb7e0d6f88dddff46c6dda Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 8 Feb 2026 12:45:05 -0500 Subject: [PATCH 5/8] Update AGENTS.md --- AGENTS.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/AGENTS.md b/AGENTS.md index 853e291..ce8d7c0 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -18,13 +18,15 @@ by running `go generate` from the top package directory. ## Pull Requests and branches -For PRs (pull requests), you can use the gh CLI tool to retrieve details, -or post comments. Then, compare the current branch with main. Reviewing a PR -and reviewing a branch are about the same, but the PR may add context. +For PRs (pull requests), you can use the gh CLI tool. Compare the current branch with main. Reviewing a PR and reviewing a branch are about the same, but the PR may add context. -Look for bugs. Think like GitHub Copilot or Cursor BugBot. +Understand the goals of the PR. Note any API changes, especially breaking changes. -Offer to post a brief summary of the review to the PR, via the gh CLI tool. +Look for thoroughness of tests, as well as GoDoc comments. + +Retrieve and consider the comments on the PR, which may have come from GitHub Copilot or Cursor BugBot. Think like GitHub Copilot or Cursor BugBot. + +Offer to optionally post a brief summary of the review to the PR, via the gh CLI tool. ## Comparisons to go-runewidth From b5ee0b6a7c8a08e33ccc6c9edaec99647e97c106 Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 8 Feb 2026 13:12:28 -0500 Subject: [PATCH 6/8] docs & comments --- README.md | 46 +++++++++++++++++++++++++--------------------- width.go | 2 +- 2 files changed, 26 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 44b5143..7290d13 100644 --- a/README.md +++ b/README.md @@ -61,8 +61,28 @@ func main() { ### Options -There is one option, `displaywidth.Options.EastAsianWidth`, which defines -how [East Asian Ambiguous characters](https://www.unicode.org/reports/tr11/#Ambiguous) +Create the options you need, and then use methods on the options struct. + +```go +var myOptions = displaywidth.Options{ + EastAsianWidth: true, + IgnoreControlSequences: true, +} + +width := myOptions.String("Hello, 世界!") +``` + +#### IgnoreControlSequences + +`IgnoreControlSequences` specifies whether to ignore ECMA-48 escape sequences +when calculating the display width. When `false` (default), ANSI escape +sequences are treated as just a series of characters. When `true`, they are +treated as a single zero-width unit. + +#### EastAsianWidth + +`EastAsianWidth` defines how +[East Asian Ambiguous characters](https://www.unicode.org/reports/tr11/#Ambiguous) are treated. When `false` (default), East Asian Ambiguous characters are treated as width 1. @@ -70,26 +90,8 @@ When `true`, they are treated as width 2. You may wish to configure this based on environment variables or locale. `go-runewidth`, for example, does so - [during package initialization](https://github.com/mattn/go-runewidth/blob/master/runewidth.go#L26C1-L45C2). + [during package initialization](https://github.com/mattn/go-runewidth/blob/master/runewidth.go#L26C1-L45C2). `displaywidth` does not do this automatically, we prefer to leave it to you. -`displaywidth` does not do this automatically, we prefer to leave it to you. -You might do something like: - -```go -var width displaywidth.Options // zero value is default - -func init() { - if os.Getenv("EAST_ASIAN_WIDTH") == "true" { - width = displaywidth.Options{EastAsianWidth: true} - } - // or check locale, or any other logic you want -} - -// use it in your logic -func myApp() { - fmt.Println(width.String("Hello, 世界!")) -} -``` ## Technical standards and compatibility @@ -101,6 +103,8 @@ and [regional indicator pairs](https://en.wikipedia.org/wiki/Regional_indicator_ for emojis. We are keeping an eye on [emerging standards](https://www.jeffquast.com/post/state-of-terminal-emulation-2025/). +For control sequences, we implement the [ECMA-48](https://ecma-international.org/publications-and-standards/standards/ecma-48/) standard for 7-bit ASCII control sequences. + `clipperhouse/displaywidth`, `mattn/go-runewidth`, and `rivo/uniseg` will give the same outputs for most real-world text. Extensive details are in the [compatibility analysis](comparison/COMPATIBILITY_ANALYSIS.md). diff --git a/width.go b/width.go index 97db647..ece9489 100644 --- a/width.go +++ b/width.go @@ -27,7 +27,7 @@ type Options struct { } // DefaultOptions is the default options for the display width -// calculation, which is EastAsianWidth: false. +// calculation, which is EastAsianWidth false and IgnoreControlSequences false. var DefaultOptions = Options{EastAsianWidth: false, IgnoreControlSequences: false} // String calculates the display width of a string, From f3c7b6d341ad02d6591dc42216db578fd97aee7c Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 8 Feb 2026 13:12:42 -0500 Subject: [PATCH 7/8] more tests --- width_test.go | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/width_test.go b/width_test.go index bbf565e..2358b8b 100644 --- a/width_test.go +++ b/width_test.go @@ -135,8 +135,10 @@ func TestAnsiEscapeSequences(t *testing.T) { {"CRLF", "\r\n", ignoreControlSequences, 0}, {"text with CRLF", "hello\r\nworld", ignoreControlSequences, 10}, - // Without IgnoreControlSequences, ESC is still zero width as a standalone byte + // Without IgnoreControlSequences, ESC is zero width but the rest of the sequence is visible {"bare ESC default options", "\x1b", defaultOptions, 0}, + {"SGR red default options", "\x1b[31m", defaultOptions, 4}, + {"red hello default options", "\x1b[31mhello\x1b[0m", defaultOptions, 12}, // IgnoreControlSequences should not regress width for strings with no escape sequences {"plain ASCII with option", "hello", ignoreControlSequences, 5}, From f1ccf242a368a47e128c1cc1181d12c8504b6f9e Mon Sep 17 00:00:00 2001 From: Matt Sherman Date: Sun, 8 Feb 2026 13:49:47 -0500 Subject: [PATCH 8/8] more fuzzing --- .github/workflows/gofuzz.yml | 2 +- fuzz_test.go | 146 ++++++++++++++++++++++++++++++++++- 2 files changed, 143 insertions(+), 5 deletions(-) diff --git a/.github/workflows/gofuzz.yml b/.github/workflows/gofuzz.yml index 1e29c99..ecccce9 100644 --- a/.github/workflows/gofuzz.yml +++ b/.github/workflows/gofuzz.yml @@ -11,7 +11,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - fuzzer: [FuzzBytesAndString, FuzzRune, FuzzTruncateStringAndBytes] + fuzzer: [FuzzBytesAndString, FuzzRune, FuzzTruncateStringAndBytes, FuzzControlSequences] steps: - name: Check out code uses: actions/checkout@v4 diff --git a/fuzz_test.go b/fuzz_test.go index 7a510b2..d092652 100644 --- a/fuzz_test.go +++ b/fuzz_test.go @@ -99,8 +99,10 @@ func FuzzBytesAndString(f *testing.F) { // Test with different options combinations options := []Options{ - {EastAsianWidth: false}, // default + {EastAsianWidth: false}, {EastAsianWidth: true}, + {IgnoreControlSequences: true}, + {EastAsianWidth: true, IgnoreControlSequences: true}, } for _, option := range options { @@ -188,10 +190,13 @@ func FuzzRune(f *testing.F) { } } - // Test with different options + // Test with different options (Rune is per-rune, IgnoreControlSequences + // doesn't affect single runes, but we include it for completeness) options := []Options{ - {EastAsianWidth: false}, // default + {EastAsianWidth: false}, {EastAsianWidth: true}, + {IgnoreControlSequences: true}, + {EastAsianWidth: true, IgnoreControlSequences: true}, } for _, option := range options { @@ -308,8 +313,10 @@ func FuzzTruncateStringAndBytes(f *testing.F) { // Test with different options options := []Options{ - {EastAsianWidth: false}, // default + {EastAsianWidth: false}, {EastAsianWidth: true}, + {IgnoreControlSequences: true}, + {EastAsianWidth: true, IgnoreControlSequences: true}, } for _, option := range options { @@ -327,3 +334,134 @@ func FuzzTruncateStringAndBytes(f *testing.F) { } }) } + +// FuzzControlSequences fuzzes strings containing ANSI/ECMA-48 escape sequences +// across all option combinations (EastAsianWidth x IgnoreControlSequences). +func FuzzControlSequences(f *testing.F) { + if testing.Short() { + f.Skip("skipping fuzz test in short mode") + } + + // Seed with ANSI escape sequences + f.Add([]byte("\x1b[31m")) // SGR red + f.Add([]byte("\x1b[0m")) // SGR reset + f.Add([]byte("\x1b[1m")) // SGR bold + f.Add([]byte("\x1b[38;5;196m")) // SGR 256-color + f.Add([]byte("\x1b[38;2;255;0;0m")) // SGR truecolor + f.Add([]byte("\x1b[A")) // cursor up + f.Add([]byte("\x1b[10;20H")) // cursor position + f.Add([]byte("\x1b[2J")) // erase in display + f.Add([]byte("\x1b[31mhello\x1b[0m")) // red text + f.Add([]byte("\x1b[1m\x1b[31mhi\x1b[0m")) // nested SGR + f.Add([]byte("hello\x1b[31mworld\x1b[0m")) // ANSI mid-string + f.Add([]byte("\x1b[31m中文\x1b[0m")) // colored CJK + f.Add([]byte("\x1b[31m😀\x1b[0m")) // colored emoji + f.Add([]byte("\x1b[31m🇺🇸\x1b[0m")) // colored flag + f.Add([]byte("a\x1b[31mb\x1b[32mc\x1b[33md\x1b[0m")) // multiple colors + f.Add([]byte("\x1b[31m\x1b[42m\x1b[1mbold on red\x1b[0m")) // stacked SGR + f.Add([]byte("\r\n")) // CR+LF + f.Add([]byte("hello\r\nworld")) // text with CRLF + f.Add([]byte("\x1b")) // bare ESC + f.Add([]byte("\x1b[")) // incomplete sequence + f.Add([]byte("\x1b[31")) // incomplete SGR + f.Add([]byte("")) // empty + f.Add([]byte("hello")) // plain ASCII + f.Add([]byte("中文")) // plain CJK + f.Add([]byte("😀")) // plain emoji + + // Seed with multi-lingual text + file, err := testdata.Sample() + if err != nil { + f.Fatal(err) + } + chunks := bytes.Split(file, []byte("\n")) + for _, chunk := range chunks { + f.Add(chunk) + } + + allOptions := []Options{ + {}, + {EastAsianWidth: true}, + {IgnoreControlSequences: true}, + {EastAsianWidth: true, IgnoreControlSequences: true}, + } + + f.Fuzz(func(t *testing.T, text []byte) { + for _, opt := range allOptions { + wb := opt.Bytes(text) + ws := opt.String(string(text)) + + // Invariant: width is never negative + if wb < 0 { + t.Errorf("Bytes() with %+v returned negative width %d for %q", opt, wb, text) + } + + // Invariant: String and Bytes agree + if wb != ws { + t.Errorf("Bytes()=%d != String()=%d with %+v for %q", wb, ws, opt, text) + } + + // Invariant: empty input is always 0 + if len(text) == 0 && wb != 0 { + t.Errorf("non-zero width %d for empty input with %+v", wb, opt) + } + + // Invariant: sum of grapheme widths equals total width + gIter := opt.BytesGraphemes(text) + gSum := 0 + for gIter.Next() { + gw := gIter.Width() + if gw < 0 { + t.Errorf("grapheme Width() < 0 with %+v for %q", opt, text) + } + gSum += gw + } + if gSum != wb { + t.Errorf("sum of grapheme widths %d != Bytes() %d with %+v for %q", gSum, wb, opt, text) + } + + // Same for StringGraphemes + sgIter := opt.StringGraphemes(string(text)) + sgSum := 0 + for sgIter.Next() { + sgSum += sgIter.Width() + } + if sgSum != ws { + t.Errorf("sum of StringGraphemes widths %d != String() %d with %+v for %q", sgSum, ws, opt, text) + } + + // Invariant: IgnoreControlSequences width <= default width + // (escape sequences become 0 instead of their visible char widths) + if opt.IgnoreControlSequences { + noIgnore := Options{EastAsianWidth: opt.EastAsianWidth} + wDefault := noIgnore.Bytes(text) + if wb > wDefault { + t.Errorf("IgnoreControlSequences width %d > default width %d with %+v for %q", wb, wDefault, opt, text) + } + } + + // Invariant: truncation respects maxWidth (accounting for the tail, + // which is always appended and may itself exceed maxWidth) + tail := "..." + tailWidth := opt.String(tail) + for _, maxWidth := range []int{0, 1, 3, 5, 10, 20} { + ts := opt.TruncateString(string(text), maxWidth, tail) + tsWidth := opt.String(ts) + limit := maxWidth + if tailWidth > limit { + limit = tailWidth + } + if tsWidth > limit { + t.Errorf("TruncateString() width %d > max(maxWidth, tailWidth) %d with %+v for %q -> %q", + tsWidth, limit, opt, text, ts) + } + + tb := opt.TruncateBytes(text, maxWidth, []byte(tail)) + if !bytes.Equal(tb, []byte(ts)) { + t.Errorf("TruncateBytes() != TruncateString() with %+v for %q: %q != %q", + opt, text, tb, ts) + } + } + } + }) +}