From fc726fae9fbcbb86ff404e356998177e0b302adf Mon Sep 17 00:00:00 2001 From: forgejo-backport-action Date: Sat, 12 Jul 2025 19:00:07 +0200 Subject: [PATCH] [v12.0/forgejo] fix(code-search): HighlightSearchResultCode should count the number of bytes and not the number of runes (#8498) **Backport:** https://codeberg.org/forgejo/forgejo/pulls/8492 fixes incorrect handling of unicode in the matched line Co-authored-by: Shiny Nematoda Reviewed-on: https://codeberg.org/forgejo/forgejo/pulls/8498 Co-authored-by: forgejo-backport-action Co-committed-by: forgejo-backport-action --- modules/indexer/code/search.go | 21 +++-- modules/indexer/code/search_test.go | 122 ++++++++++++++++++++++++++++ 2 files changed, 134 insertions(+), 9 deletions(-) create mode 100644 modules/indexer/code/search_test.go diff --git a/modules/indexer/code/search.go b/modules/indexer/code/search.go index adf51a76d7..499b9117c4 100644 --- a/modules/indexer/code/search.go +++ b/modules/indexer/code/search.go @@ -97,7 +97,7 @@ func HighlightSearchResultCode(filename string, lineNums []int, highlightRanges conv := hcd.ConvertToPlaceholders(string(hl)) convLines := strings.Split(conv, "\n") - // each highlightRange is of the form [line number, start pos, end pos] + // each highlightRange is of the form [line number, start byte offset, end byte offset] for _, highlightRange := range highlightRanges { ln, start, end := highlightRange[0], highlightRange[1], highlightRange[2] line := convLines[ln] @@ -105,15 +105,18 @@ func HighlightSearchResultCode(filename string, lineNums []int, highlightRanges continue } + sr := strings.NewReader(line) sb := strings.Builder{} count := -1 isOpen := false - for _, r := range line { + for r, size, err := sr.ReadRune(); err == nil; r, size, err = sr.ReadRune() { if token, ok := hcd.PlaceholderTokenMap[r]; // token was not found - !ok || - // token was marked as used - token == "" || + !ok { + count += size + } else if + // token was marked as used + token == "" || // the token is not an valid html tag emitted by chroma !(len(token) > 6 && (token[0:5] == "= end: // if tag is not open, no need to close if !isOpen { break } sb.WriteRune(endTag) isOpen = false - case start: + case count >= start: // if tag is open, do not open again if isOpen { break @@ -161,7 +164,7 @@ func HighlightSearchResultCode(filename string, lineNums []int, highlightRanges highlightedLines := strings.Split(hcd.Recover(conv), "\n") // The lineNums outputted by highlight.Code might not match the original lineNums, because "highlight" removes the last `\n` lines := make([]ResultLine, min(len(highlightedLines), len(lineNums))) - for i := 0; i < len(lines); i++ { + for i := range len(lines) { lines[i].Num = lineNums[i] lines[i].FormattedContent = template.HTML(highlightedLines[i]) } diff --git a/modules/indexer/code/search_test.go b/modules/indexer/code/search_test.go new file mode 100644 index 0000000000..e542b38c24 --- /dev/null +++ b/modules/indexer/code/search_test.go @@ -0,0 +1,122 @@ +// Copyright 2025 The Forgejo Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package code + +import ( + "html/template" + "strings" + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestHighlightSearchResultCode(t *testing.T) { + opts := []struct { + Title string + File string + Lines []int + Range [][3]int + Code string + Result []template.HTML + }{ + { + Title: "One Match Text", + File: "test.txt", + Range: [][3]int{{1, 5, 9}}, + Code: "First Line\nMark this only\nThe End", + Result: []template.HTML{ + "First Line", + "Mark this only", + "The End", + }, + }, + { + Title: "Two Match Text", + File: "test.txt", + Range: [][3]int{ + {1, 5, 9}, + {2, 5, 9}, + }, + Code: "First Line\nMark this only\nMark this too\nThe End", + Result: []template.HTML{ + "First Line", + "Mark this only", + "Mark this too", + "The End", + }, + }, + { + Title: "Unicode Before", + File: "test.txt", + Range: [][3]int{{1, 10, 14}}, + Code: "First Line\nMark 👉 this only\nThe End", + Result: []template.HTML{ + "First Line", + "Mark 👉 this only", + "The End", + }, + }, + { + Title: "Unicode Between", + File: "test.txt", + Range: [][3]int{{1, 5, 14}}, + Code: "First Line\nMark this 😊 only\nThe End", + Result: []template.HTML{ + "First Line", + "Mark this 😊 only", + "The End", + }, + }, + { + Title: "Unicode Before And Between", + File: "test.txt", + Range: [][3]int{{1, 10, 19}}, + Code: "First Line\nMark 👉 this 😊 only\nThe End", + Result: []template.HTML{ + "First Line", + "Mark 👉 this 😊 only", + "The End", + }, + }, + { + Title: "Golang", + File: "test.go", + Range: [][3]int{{1, 14, 23}}, + Code: "func main() {\n\tfmt.Println(\"mark this\")\n}", + Result: []template.HTML{ + "func main() {", + "\tfmt.Println("mark this")", + "}", + }, + }, + { + Title: "Golang Unicode", + File: "test.go", + Range: [][3]int{{1, 14, 28}}, + Code: "func main() {\n\tfmt.Println(\"mark this 😊\")\n}", + Result: []template.HTML{ + "func main() {", + "\tfmt.Println("mark this 😊")", + "}", + }, + }, + } + for _, o := range opts { + t.Run(o.Title, func(t *testing.T) { + lines := []int{} + for i := range strings.Count(strings.TrimSuffix(o.Code, "\n"), "\n") + 1 { + lines = append(lines, i+1) + } + res := HighlightSearchResultCode(o.File, lines, o.Range, o.Code) + assert.Len(t, res, len(o.Result)) + assert.Len(t, res, len(lines)) + + for i, r := range res { + require.Equal(t, lines[i], r.Num) + require.Equal(t, o.Result[i], r.FormattedContent) + } + }) + } +}