Fix a panic in NotifyCreateIssueComment (caused by string truncation) (#17928)

* Fix a panic in NotifyCreateIssueComment (caused by string truncation) * more unit tests * refactor * fix some edge cases * use SplitStringAtByteN for comment content
2021-12-09 13:41:17 +08:00 · 2021-12-09 13:41:17 +08:00 · c7e23401a3
commit c7e23401a3
parent 183175263d
4 changed files with 104 additions and 17 deletions
--- a/models/repo_archiver.go
+++ b/models/repo_archiver.go
@ -6,7 +6,6 @@ package models

 import (
 	"code.gitea.io/gitea/models/db"
-
 	repo_model "code.gitea.io/gitea/models/repo"
 )

--- a/modules/notification/action/action.go
+++ b/modules/notification/action/action.go
@ -15,6 +15,7 @@ import (
 	"code.gitea.io/gitea/modules/log"
 	"code.gitea.io/gitea/modules/notification/base"
 	"code.gitea.io/gitea/modules/repository"
+	"code.gitea.io/gitea/modules/util"
 )

 type actionNotifier struct {
@ -100,14 +101,15 @@ func (a *actionNotifier) NotifyCreateIssueComment(doer *user_model.User, repo *m
 		IsPrivate: issue.Repo.IsPrivate,
 	}

-	content := ""
-
-	if len(comment.Content) > 200 {
-		content = comment.Content[:strings.LastIndex(comment.Content[0:200], " ")] + "…"
-	} else {
-		content = comment.Content
+	truncatedContent, truncatedRight := util.SplitStringAtByteN(comment.Content, 200)
+	if truncatedRight != "" {
+		// in case the content is in a Latin family language, we remove the last broken word.
+		lastSpaceIdx := strings.LastIndex(truncatedContent, " ")
+		if lastSpaceIdx != -1 && (len(truncatedContent)-lastSpaceIdx < 15) {
+			truncatedContent = truncatedContent[:lastSpaceIdx] + "…"
+		}
 	}
-	act.Content = fmt.Sprintf("%d|%s", issue.Index, content)
+	act.Content = fmt.Sprintf("%d|%s", issue.Index, truncatedContent)

 	if issue.IsPull {
 		act.OpType = models.ActionCommentPull
--- a/modules/util/truncate.go
+++ b/modules/util/truncate.go
@ -6,20 +6,23 @@ package util

 import "unicode/utf8"

+// in UTF8 "…" is 3 bytes so doesn't really gain us anything...
+const utf8Ellipsis = "…"
+const asciiEllipsis = "..."
+
 // SplitStringAtByteN splits a string at byte n accounting for rune boundaries. (Combining characters are not accounted for.)
 func SplitStringAtByteN(input string, n int) (left, right string) {
 	if len(input) <= n {
-		left = input
-		return
+		return input, ""
 	}

 	if !utf8.ValidString(input) {
-		left = input[:n-3] + "..."
-		right = "..." + input[n-3:]
-		return
+		if n-3 < 0 {
+			return input, ""
+		}
+		return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:]
 	}

-	// in UTF8 "…" is 3 bytes so doesn't really gain us anything...
 	end := 0
 	for end <= n-3 {
 		_, size := utf8.DecodeRuneInString(input[end:])
@ -29,7 +32,29 @@ func SplitStringAtByteN(input string, n int) (left, right string) {
 		end += size
 	}

-	left = input[:end] + "…"
-	right = "…" + input[end:]
-	return
+	return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:]
+}
+
+// SplitStringAtRuneN splits a string at rune n accounting for rune boundaries. (Combining characters are not accounted for.)
+func SplitStringAtRuneN(input string, n int) (left, right string) {
+	if !utf8.ValidString(input) {
+		if len(input) <= n || n-3 < 0 {
+			return input, ""
+		}
+		return input[:n-3] + asciiEllipsis, asciiEllipsis + input[n-3:]
+	}
+
+	if utf8.RuneCountInString(input) <= n {
+		return input, ""
+	}
+
+	count := 0
+	end := 0
+	for count < n-1 {
+		_, size := utf8.DecodeRuneInString(input[end:])
+		end += size
+		count++
+	}
+
+	return input[:end] + utf8Ellipsis, utf8Ellipsis + input[end:]
 }
--- a/modules/util/truncate_test.go
+++ b/modules/util/truncate_test.go
@ -0,0 +1,61 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package util
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestSplitString(t *testing.T) {
+	type testCase struct {
+		input    string
+		n        int
+		leftSub  string
+		ellipsis string
+	}
+
+	test := func(tc []*testCase, f func(input string, n int) (left, right string)) {
+		for _, c := range tc {
+			l, r := f(c.input, c.n)
+			if c.ellipsis != "" {
+				assert.Equal(t, c.leftSub+c.ellipsis, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
+				assert.Equal(t, c.ellipsis+c.input[len(c.leftSub):], r, "test split %s at %d, expected rightSub: %q", c.input, c.n, c.input[len(c.leftSub):])
+			} else {
+				assert.Equal(t, c.leftSub, l, "test split %q at %d, expected leftSub: %q", c.input, c.n, c.leftSub)
+				assert.Equal(t, "", r, "test split %q at %d, expected rightSub: %q", c.input, c.n, "")
+			}
+		}
+	}
+
+	tc := []*testCase{
+		{"abc123xyz", 0, "", utf8Ellipsis},
+		{"abc123xyz", 1, "", utf8Ellipsis},
+		{"abc123xyz", 4, "a", utf8Ellipsis},
+		{"啊bc123xyz", 4, "", utf8Ellipsis},
+		{"啊bc123xyz", 6, "啊", utf8Ellipsis},
+		{"啊bc", 5, "啊bc", ""},
+		{"啊bc", 6, "啊bc", ""},
+		{"abc\xef\x03\xfe", 3, "", asciiEllipsis},
+		{"abc\xef\x03\xfe", 4, "a", asciiEllipsis},
+		{"\xef\x03", 1, "\xef\x03", ""},
+	}
+	test(tc, SplitStringAtByteN)
+
+	tc = []*testCase{
+		{"abc123xyz", 0, "", utf8Ellipsis},
+		{"abc123xyz", 1, "", utf8Ellipsis},
+		{"abc123xyz", 4, "abc", utf8Ellipsis},
+		{"啊bc123xyz", 4, "啊bc", utf8Ellipsis},
+		{"啊bc123xyz", 6, "啊bc12", utf8Ellipsis},
+		{"啊bc", 3, "啊bc", ""},
+		{"啊bc", 4, "啊bc", ""},
+		{"abc\xef\x03\xfe", 3, "", asciiEllipsis},
+		{"abc\xef\x03\xfe", 4, "a", asciiEllipsis},
+		{"\xef\x03", 1, "\xef\x03", ""},
+	}
+	test(tc, SplitStringAtRuneN)
+}