diff --git a/models/issues/pull.go b/models/issues/pull.go index 23b938f95ab7..2acc2b4226e0 100644 --- a/models/issues/pull.go +++ b/models/issues/pull.go @@ -920,7 +920,7 @@ func PullRequestCodeOwnersReview(ctx context.Context, pull *Issue, pr *PullReque var data string for _, file := range files { if blob, err := commit.GetBlobByPath(file); err == nil { - data, err = blob.GetBlobContent() + data, err = blob.GetBlobContent(setting.UI.MaxDisplayFileSize) if err == nil { break } diff --git a/modules/git/blob.go b/modules/git/blob.go index 8864f54d1bf2..bcecb42e16eb 100644 --- a/modules/git/blob.go +++ b/modules/git/blob.go @@ -20,17 +20,18 @@ func (b *Blob) Name() string { return b.name } -// GetBlobContent Gets the content of the blob as raw text -func (b *Blob) GetBlobContent() (string, error) { +// GetBlobContent Gets the limited content of the blob as raw text +func (b *Blob) GetBlobContent(limit int64) (string, error) { + if limit <= 0 { + return "", nil + } dataRc, err := b.DataAsync() if err != nil { return "", err } defer dataRc.Close() - buf := make([]byte, 1024) - n, _ := util.ReadAtMost(dataRc, buf) - buf = buf[:n] - return string(buf), nil + buf, err := util.ReadWithLimit(dataRc, int(limit)) + return string(buf), err } // GetBlobLineCount gets line count of the blob diff --git a/modules/util/io.go b/modules/util/io.go index 69b1d63145a4..1559b019a063 100644 --- a/modules/util/io.go +++ b/modules/util/io.go @@ -4,13 +4,14 @@ package util import ( + "bytes" "errors" "io" ) // ReadAtMost reads at most len(buf) bytes from r into buf. // It returns the number of bytes copied. n is only less than len(buf) if r provides fewer bytes. -// If EOF occurs while reading, err will be nil. +// If EOF or ErrUnexpectedEOF occurs while reading, err will be nil. func ReadAtMost(r io.Reader, buf []byte) (n int, err error) { n, err = io.ReadFull(r, buf) if err == io.EOF || err == io.ErrUnexpectedEOF { @@ -19,6 +20,42 @@ func ReadAtMost(r io.Reader, buf []byte) (n int, err error) { return n, err } +// ReadWithLimit reads at most "limit" bytes from r into buf. +// If EOF or ErrUnexpectedEOF occurs while reading, err will be nil. +func ReadWithLimit(r io.Reader, n int) (buf []byte, err error) { + return readWithLimit(r, 1024, n) +} + +func readWithLimit(r io.Reader, batch, limit int) ([]byte, error) { + if limit <= batch { + buf := make([]byte, limit) + n, err := ReadAtMost(r, buf) + if err != nil { + return nil, err + } + return buf[:n], nil + } + res := bytes.NewBuffer(make([]byte, 0, batch)) + bufFix := make([]byte, batch) + eof := false + for res.Len() < limit && !eof { + bufTmp := bufFix + if res.Len()+batch > limit { + bufTmp = bufFix[:limit-res.Len()] + } + n, err := io.ReadFull(r, bufTmp) + if err == io.EOF || err == io.ErrUnexpectedEOF { + eof = true + } else if err != nil { + return nil, err + } + if _, err = res.Write(bufTmp[:n]); err != nil { + return nil, err + } + } + return res.Bytes(), nil +} + // ErrNotEmpty is an error reported when there is a non-empty reader var ErrNotEmpty = errors.New("not-empty") diff --git a/modules/util/io_test.go b/modules/util/io_test.go new file mode 100644 index 000000000000..275575463a63 --- /dev/null +++ b/modules/util/io_test.go @@ -0,0 +1,66 @@ +// Copyright 2023 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package util + +import ( + "bytes" + "errors" + "testing" + + "github.com/stretchr/testify/assert" +) + +type readerWithError struct { + buf *bytes.Buffer +} + +func (r *readerWithError) Read(p []byte) (n int, err error) { + if r.buf.Len() < 2 { + return 0, errors.New("test error") + } + return r.buf.Read(p) +} + +func TestReadWithLimit(t *testing.T) { + bs := []byte("0123456789abcdef") + + // normal test + buf, err := readWithLimit(bytes.NewBuffer(bs), 5, 2) + assert.NoError(t, err) + assert.Equal(t, []byte("01"), buf) + + buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 5) + assert.NoError(t, err) + assert.Equal(t, []byte("01234"), buf) + + buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 6) + assert.NoError(t, err) + assert.Equal(t, []byte("012345"), buf) + + buf, err = readWithLimit(bytes.NewBuffer(bs), 5, len(bs)) + assert.NoError(t, err) + assert.Equal(t, []byte("0123456789abcdef"), buf) + + buf, err = readWithLimit(bytes.NewBuffer(bs), 5, 100) + assert.NoError(t, err) + assert.Equal(t, []byte("0123456789abcdef"), buf) + + // test with error + buf, err = readWithLimit(&readerWithError{bytes.NewBuffer(bs)}, 5, 10) + assert.NoError(t, err) + assert.Equal(t, []byte("0123456789"), buf) + + buf, err = readWithLimit(&readerWithError{bytes.NewBuffer(bs)}, 5, 100) + assert.ErrorContains(t, err, "test error") + assert.Empty(t, buf) + + // test public function + buf, err = ReadWithLimit(bytes.NewBuffer(bs), 2) + assert.NoError(t, err) + assert.Equal(t, []byte("01"), buf) + + buf, err = ReadWithLimit(bytes.NewBuffer(bs), 9999999) + assert.NoError(t, err) + assert.Equal(t, []byte("0123456789abcdef"), buf) +} diff --git a/routers/web/repo/view.go b/routers/web/repo/view.go index 1d54f25884e3..cf719c49f0b2 100644 --- a/routers/web/repo/view.go +++ b/routers/web/repo/view.go @@ -363,7 +363,7 @@ func renderFile(ctx *context.Context, entry *git.TreeEntry, treeLink, rawLink st ctx.Data["FileError"] = ctx.Locale.Tr("actions.runs.invalid_workflow_helper", workFlowErr.Error()) } } else if util.SliceContains([]string{"CODEOWNERS", "docs/CODEOWNERS", ".gitea/CODEOWNERS"}, ctx.Repo.TreePath) { - if data, err := blob.GetBlobContent(); err == nil { + if data, err := blob.GetBlobContent(setting.UI.MaxDisplayFileSize); err == nil { _, warnings := issue_model.GetCodeOwnersFromContent(ctx, data) if len(warnings) > 0 { ctx.Data["FileWarning"] = strings.Join(warnings, "\n") diff --git a/routers/web/user/profile.go b/routers/web/user/profile.go index 42ae37e3ba30..6f9f84d60dbd 100644 --- a/routers/web/user/profile.go +++ b/routers/web/user/profile.go @@ -107,7 +107,7 @@ func Profile(ctx *context.Context) { } blob, err := commit.GetBlobByPath("README.md") if err == nil { - bytes, err := blob.GetBlobContent() + bytes, err := blob.GetBlobContent(setting.UI.MaxDisplayFileSize) if err != nil { ctx.ServerError("GetBlobContent", err) return diff --git a/services/repository/files/content.go b/services/repository/files/content.go index 6f6dc91d859a..c701431d6785 100644 --- a/services/repository/files/content.go +++ b/services/repository/files/content.go @@ -203,7 +203,7 @@ func GetContents(ctx context.Context, repo *repo_model.Repository, treePath, ref } else if entry.IsLink() { contentsResponse.Type = string(ContentTypeLink) // The target of a symlink file is the content of the file - targetFromContent, err := entry.Blob().GetBlobContent() + targetFromContent, err := entry.Blob().GetBlobContent(1024) if err != nil { return nil, err } diff --git a/tests/integration/api_packages_cargo_test.go b/tests/integration/api_packages_cargo_test.go index 608f19296819..03d8e0c5207e 100644 --- a/tests/integration/api_packages_cargo_test.go +++ b/tests/integration/api_packages_cargo_test.go @@ -88,7 +88,7 @@ func testPackageCargo(t *testing.T, _ *neturl.URL) { blob, err := commit.GetBlobByPath(path) assert.NoError(t, err) - content, err := blob.GetBlobContent() + content, err := blob.GetBlobContent(1024) assert.NoError(t, err) return content