forked from gitea/gitea
1634 lines
49 KiB
Go
1634 lines
49 KiB
Go
// Copyright 2014 The Gogs Authors. All rights reserved.
|
|
// Copyright 2019 The Gitea Authors. All rights reserved.
|
|
// Use of this source code is governed by a MIT-style
|
|
// license that can be found in the LICENSE file.
|
|
|
|
package gitdiff
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"html"
|
|
"html/template"
|
|
"io"
|
|
"net/url"
|
|
"os"
|
|
"regexp"
|
|
"sort"
|
|
"strings"
|
|
"time"
|
|
|
|
"code.gitea.io/gitea/models"
|
|
"code.gitea.io/gitea/models/db"
|
|
pull_model "code.gitea.io/gitea/models/pull"
|
|
user_model "code.gitea.io/gitea/models/user"
|
|
"code.gitea.io/gitea/modules/analyze"
|
|
"code.gitea.io/gitea/modules/base"
|
|
"code.gitea.io/gitea/modules/charset"
|
|
"code.gitea.io/gitea/modules/git"
|
|
"code.gitea.io/gitea/modules/highlight"
|
|
"code.gitea.io/gitea/modules/lfs"
|
|
"code.gitea.io/gitea/modules/log"
|
|
"code.gitea.io/gitea/modules/setting"
|
|
|
|
"github.com/sergi/go-diff/diffmatchpatch"
|
|
stdcharset "golang.org/x/net/html/charset"
|
|
"golang.org/x/text/encoding"
|
|
"golang.org/x/text/transform"
|
|
)
|
|
|
|
// DiffLineType represents the type of a DiffLine.
|
|
type DiffLineType uint8
|
|
|
|
// DiffLineType possible values.
|
|
const (
|
|
DiffLinePlain DiffLineType = iota + 1
|
|
DiffLineAdd
|
|
DiffLineDel
|
|
DiffLineSection
|
|
)
|
|
|
|
// DiffFileType represents the type of a DiffFile.
|
|
type DiffFileType uint8
|
|
|
|
// DiffFileType possible values.
|
|
const (
|
|
DiffFileAdd DiffFileType = iota + 1
|
|
DiffFileChange
|
|
DiffFileDel
|
|
DiffFileRename
|
|
DiffFileCopy
|
|
)
|
|
|
|
// DiffLineExpandDirection represents the DiffLineSection expand direction
|
|
type DiffLineExpandDirection uint8
|
|
|
|
// DiffLineExpandDirection possible values.
|
|
const (
|
|
DiffLineExpandNone DiffLineExpandDirection = iota + 1
|
|
DiffLineExpandSingle
|
|
DiffLineExpandUpDown
|
|
DiffLineExpandUp
|
|
DiffLineExpandDown
|
|
)
|
|
|
|
// DiffLine represents a line difference in a DiffSection.
|
|
type DiffLine struct {
|
|
LeftIdx int
|
|
RightIdx int
|
|
Match int
|
|
Type DiffLineType
|
|
Content string
|
|
Comments []*models.Comment
|
|
SectionInfo *DiffLineSectionInfo
|
|
}
|
|
|
|
// DiffLineSectionInfo represents diff line section meta data
|
|
type DiffLineSectionInfo struct {
|
|
Path string
|
|
LastLeftIdx int
|
|
LastRightIdx int
|
|
LeftIdx int
|
|
RightIdx int
|
|
LeftHunkSize int
|
|
RightHunkSize int
|
|
}
|
|
|
|
// BlobExcerptChunkSize represent max lines of excerpt
|
|
const BlobExcerptChunkSize = 20
|
|
|
|
// GetType returns the type of a DiffLine.
|
|
func (d *DiffLine) GetType() int {
|
|
return int(d.Type)
|
|
}
|
|
|
|
// CanComment returns whether or not a line can get commented
|
|
func (d *DiffLine) CanComment() bool {
|
|
return len(d.Comments) == 0 && d.Type != DiffLineSection
|
|
}
|
|
|
|
// GetCommentSide returns the comment side of the first comment, if not set returns empty string
|
|
func (d *DiffLine) GetCommentSide() string {
|
|
if len(d.Comments) == 0 {
|
|
return ""
|
|
}
|
|
return d.Comments[0].DiffSide()
|
|
}
|
|
|
|
// GetLineTypeMarker returns the line type marker
|
|
func (d *DiffLine) GetLineTypeMarker() string {
|
|
if strings.IndexByte(" +-", d.Content[0]) > -1 {
|
|
return d.Content[0:1]
|
|
}
|
|
return ""
|
|
}
|
|
|
|
// GetBlobExcerptQuery builds query string to get blob excerpt
|
|
func (d *DiffLine) GetBlobExcerptQuery() string {
|
|
query := fmt.Sprintf(
|
|
"last_left=%d&last_right=%d&"+
|
|
"left=%d&right=%d&"+
|
|
"left_hunk_size=%d&right_hunk_size=%d&"+
|
|
"path=%s",
|
|
d.SectionInfo.LastLeftIdx, d.SectionInfo.LastRightIdx,
|
|
d.SectionInfo.LeftIdx, d.SectionInfo.RightIdx,
|
|
d.SectionInfo.LeftHunkSize, d.SectionInfo.RightHunkSize,
|
|
url.QueryEscape(d.SectionInfo.Path))
|
|
return query
|
|
}
|
|
|
|
// GetExpandDirection gets DiffLineExpandDirection
|
|
func (d *DiffLine) GetExpandDirection() DiffLineExpandDirection {
|
|
if d.Type != DiffLineSection || d.SectionInfo == nil || d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx <= 1 {
|
|
return DiffLineExpandNone
|
|
}
|
|
if d.SectionInfo.LastLeftIdx <= 0 && d.SectionInfo.LastRightIdx <= 0 {
|
|
return DiffLineExpandUp
|
|
} else if d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx > BlobExcerptChunkSize && d.SectionInfo.RightHunkSize > 0 {
|
|
return DiffLineExpandUpDown
|
|
} else if d.SectionInfo.LeftHunkSize <= 0 && d.SectionInfo.RightHunkSize <= 0 {
|
|
return DiffLineExpandDown
|
|
}
|
|
return DiffLineExpandSingle
|
|
}
|
|
|
|
func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int) *DiffLineSectionInfo {
|
|
leftLine, leftHunk, rightLine, righHunk := git.ParseDiffHunkString(line)
|
|
|
|
return &DiffLineSectionInfo{
|
|
Path: treePath,
|
|
LastLeftIdx: lastLeftIdx,
|
|
LastRightIdx: lastRightIdx,
|
|
LeftIdx: leftLine,
|
|
RightIdx: rightLine,
|
|
LeftHunkSize: leftHunk,
|
|
RightHunkSize: righHunk,
|
|
}
|
|
}
|
|
|
|
// escape a line's content or return <br> needed for copy/paste purposes
|
|
func getLineContent(content string) DiffInline {
|
|
if len(content) > 0 {
|
|
return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content)))
|
|
}
|
|
return DiffInline{Content: "<br>"}
|
|
}
|
|
|
|
// DiffSection represents a section of a DiffFile.
|
|
type DiffSection struct {
|
|
file *DiffFile
|
|
FileName string
|
|
Name string
|
|
Lines []*DiffLine
|
|
}
|
|
|
|
var (
|
|
addedCodePrefix = []byte(`<span class="added-code">`)
|
|
removedCodePrefix = []byte(`<span class="removed-code">`)
|
|
codeTagSuffix = []byte(`</span>`)
|
|
)
|
|
|
|
var (
|
|
unfinishedtagRegex = regexp.MustCompile(`<[^>]*$`)
|
|
trailingSpanRegex = regexp.MustCompile(`<span\s*[[:alpha:]="]*?[>]?$`)
|
|
entityRegex = regexp.MustCompile(`&[#]*?[0-9[:alpha:]]*$`)
|
|
)
|
|
|
|
// shouldWriteInline represents combinations where we manually write inline changes
|
|
func shouldWriteInline(diff diffmatchpatch.Diff, lineType DiffLineType) bool {
|
|
if true &&
|
|
diff.Type == diffmatchpatch.DiffEqual ||
|
|
diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd ||
|
|
diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
func fixupBrokenSpans(diffs []diffmatchpatch.Diff) []diffmatchpatch.Diff {
|
|
// Create a new array to store our fixed up blocks
|
|
fixedup := make([]diffmatchpatch.Diff, 0, len(diffs))
|
|
|
|
// semantically label some numbers
|
|
const insert, delete, equal = 0, 1, 2
|
|
|
|
// record the positions of the last type of each block in the fixedup blocks
|
|
last := []int{-1, -1, -1}
|
|
operation := []diffmatchpatch.Operation{diffmatchpatch.DiffInsert, diffmatchpatch.DiffDelete, diffmatchpatch.DiffEqual}
|
|
|
|
// create a writer for insert and deletes
|
|
toWrite := []strings.Builder{
|
|
{},
|
|
{},
|
|
}
|
|
|
|
// make some flags for insert and delete
|
|
unfinishedTag := []bool{false, false}
|
|
unfinishedEnt := []bool{false, false}
|
|
|
|
// store stores the provided text in the writer for the typ
|
|
store := func(text string, typ int) {
|
|
(&(toWrite[typ])).WriteString(text)
|
|
}
|
|
|
|
// hasStored returns true if there is stored content
|
|
hasStored := func(typ int) bool {
|
|
return (&toWrite[typ]).Len() > 0
|
|
}
|
|
|
|
// stored will return that content
|
|
stored := func(typ int) string {
|
|
return (&toWrite[typ]).String()
|
|
}
|
|
|
|
// empty will empty the stored content
|
|
empty := func(typ int) {
|
|
(&toWrite[typ]).Reset()
|
|
}
|
|
|
|
// pop will remove the stored content appending to a diff block for that typ
|
|
pop := func(typ int, fixedup []diffmatchpatch.Diff) []diffmatchpatch.Diff {
|
|
if hasStored(typ) {
|
|
if last[typ] > last[equal] {
|
|
fixedup[last[typ]].Text += stored(typ)
|
|
} else {
|
|
fixedup = append(fixedup, diffmatchpatch.Diff{
|
|
Type: operation[typ],
|
|
Text: stored(typ),
|
|
})
|
|
}
|
|
empty(typ)
|
|
}
|
|
return fixedup
|
|
}
|
|
|
|
// Now we walk the provided diffs and check the type of each block in turn
|
|
for _, diff := range diffs {
|
|
|
|
typ := delete // flag for handling insert or delete typs
|
|
switch diff.Type {
|
|
case diffmatchpatch.DiffEqual:
|
|
// First check if there is anything stored
|
|
if hasStored(insert) || hasStored(delete) {
|
|
// There are two reasons for storing content:
|
|
// 1. Unfinished Entity <- Could be more efficient here by not doing this if we're looking for a tag
|
|
if unfinishedEnt[insert] || unfinishedEnt[delete] {
|
|
// we look for a ';' to finish an entity
|
|
idx := strings.IndexRune(diff.Text, ';')
|
|
if idx >= 0 {
|
|
// if we find a ';' store the preceding content to both insert and delete
|
|
store(diff.Text[:idx+1], insert)
|
|
store(diff.Text[:idx+1], delete)
|
|
|
|
// and remove it from this block
|
|
diff.Text = diff.Text[idx+1:]
|
|
|
|
// reset the ent flags
|
|
unfinishedEnt[insert] = false
|
|
unfinishedEnt[delete] = false
|
|
} else {
|
|
// otherwise store it all on insert and delete
|
|
store(diff.Text, insert)
|
|
store(diff.Text, delete)
|
|
// and empty this block
|
|
diff.Text = ""
|
|
}
|
|
}
|
|
// 2. Unfinished Tag
|
|
if unfinishedTag[insert] || unfinishedTag[delete] {
|
|
// we look for a '>' to finish a tag
|
|
idx := strings.IndexRune(diff.Text, '>')
|
|
if idx >= 0 {
|
|
store(diff.Text[:idx+1], insert)
|
|
store(diff.Text[:idx+1], delete)
|
|
diff.Text = diff.Text[idx+1:]
|
|
unfinishedTag[insert] = false
|
|
unfinishedTag[delete] = false
|
|
} else {
|
|
store(diff.Text, insert)
|
|
store(diff.Text, delete)
|
|
diff.Text = ""
|
|
}
|
|
}
|
|
|
|
// If we've completed the required tag/entities
|
|
if !(unfinishedTag[insert] || unfinishedTag[delete] || unfinishedEnt[insert] || unfinishedEnt[delete]) {
|
|
// pop off the stack
|
|
fixedup = pop(insert, fixedup)
|
|
fixedup = pop(delete, fixedup)
|
|
}
|
|
|
|
// If that has left this diff block empty then shortcut
|
|
if len(diff.Text) == 0 {
|
|
continue
|
|
}
|
|
}
|
|
|
|
// check if this block ends in an unfinished tag?
|
|
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
|
|
if idx != nil {
|
|
unfinishedTag[insert] = true
|
|
unfinishedTag[delete] = true
|
|
} else {
|
|
// otherwise does it end in an unfinished entity?
|
|
idx = entityRegex.FindStringIndex(diff.Text)
|
|
if idx != nil {
|
|
unfinishedEnt[insert] = true
|
|
unfinishedEnt[delete] = true
|
|
}
|
|
}
|
|
|
|
// If there is an unfinished component
|
|
if idx != nil {
|
|
// Store the fragment
|
|
store(diff.Text[idx[0]:], insert)
|
|
store(diff.Text[idx[0]:], delete)
|
|
// and remove it from this block
|
|
diff.Text = diff.Text[:idx[0]]
|
|
}
|
|
|
|
// If that hasn't left the block empty
|
|
if len(diff.Text) > 0 {
|
|
// store the position of the last equal block and store it in our diffs
|
|
last[equal] = len(fixedup)
|
|
fixedup = append(fixedup, diff)
|
|
}
|
|
continue
|
|
case diffmatchpatch.DiffInsert:
|
|
typ = insert
|
|
fallthrough
|
|
case diffmatchpatch.DiffDelete:
|
|
// First check if there is anything stored for this type
|
|
if hasStored(typ) {
|
|
// if there is prepend it to this block, empty the storage and reset our flags
|
|
diff.Text = stored(typ) + diff.Text
|
|
empty(typ)
|
|
unfinishedEnt[typ] = false
|
|
unfinishedTag[typ] = false
|
|
}
|
|
|
|
// check if this block ends in an unfinished tag
|
|
idx := unfinishedtagRegex.FindStringIndex(diff.Text)
|
|
if idx != nil {
|
|
unfinishedTag[typ] = true
|
|
} else {
|
|
// otherwise does it end in an unfinished entity
|
|
idx = entityRegex.FindStringIndex(diff.Text)
|
|
if idx != nil {
|
|
unfinishedEnt[typ] = true
|
|
}
|
|
}
|
|
|
|
// If there is an unfinished component
|
|
if idx != nil {
|
|
// Store the fragment
|
|
store(diff.Text[idx[0]:], typ)
|
|
// and remove it from this block
|
|
diff.Text = diff.Text[:idx[0]]
|
|
}
|
|
|
|
// If that hasn't left the block empty
|
|
if len(diff.Text) > 0 {
|
|
// if the last block of this type was after the last equal block
|
|
if last[typ] > last[equal] {
|
|
// store this blocks content on that block
|
|
fixedup[last[typ]].Text += diff.Text
|
|
} else {
|
|
// otherwise store the position of the last block of this type and store the block
|
|
last[typ] = len(fixedup)
|
|
fixedup = append(fixedup, diff)
|
|
}
|
|
}
|
|
continue
|
|
}
|
|
}
|
|
|
|
// pop off any remaining stored content
|
|
fixedup = pop(insert, fixedup)
|
|
fixedup = pop(delete, fixedup)
|
|
|
|
return fixedup
|
|
}
|
|
|
|
func diffToHTML(fileName string, diffs []diffmatchpatch.Diff, lineType DiffLineType) DiffInline {
|
|
buf := bytes.NewBuffer(nil)
|
|
match := ""
|
|
|
|
diffs = fixupBrokenSpans(diffs)
|
|
|
|
for _, diff := range diffs {
|
|
if shouldWriteInline(diff, lineType) {
|
|
if len(match) > 0 {
|
|
diff.Text = match + diff.Text
|
|
match = ""
|
|
}
|
|
// Chroma HTML syntax highlighting is done before diffing individual lines in order to maintain consistency.
|
|
// Since inline changes might split in the middle of a chroma span tag or HTML entity, make we manually put it back together
|
|
// before writing so we don't try insert added/removed code spans in the middle of one of those
|
|
// and create broken HTML. This is done by moving incomplete HTML forward until it no longer matches our pattern of
|
|
// a line ending with an incomplete HTML entity or partial/opening <span>.
|
|
|
|
// EX:
|
|
// diffs[{Type: dmp.DiffDelete, Text: "language</span><span "},
|
|
// {Type: dmp.DiffEqual, Text: "c"},
|
|
// {Type: dmp.DiffDelete, Text: "lass="p">}]
|
|
|
|
// After first iteration
|
|
// diffs[{Type: dmp.DiffDelete, Text: "language</span>"}, //write out
|
|
// {Type: dmp.DiffEqual, Text: "<span c"},
|
|
// {Type: dmp.DiffDelete, Text: "lass="p">,</span>}]
|
|
|
|
// After second iteration
|
|
// {Type: dmp.DiffEqual, Text: ""}, // write out
|
|
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
|
|
|
|
// Final
|
|
// {Type: dmp.DiffDelete, Text: "<span class="p">,</span>}]
|
|
// end up writing <span class="removed-code"><span class="p">,</span></span>
|
|
// Instead of <span class="removed-code">lass="p",</span></span>
|
|
|
|
m := trailingSpanRegex.FindStringSubmatchIndex(diff.Text)
|
|
if m != nil {
|
|
match = diff.Text[m[0]:m[1]]
|
|
diff.Text = strings.TrimSuffix(diff.Text, match)
|
|
}
|
|
m = entityRegex.FindStringSubmatchIndex(diff.Text)
|
|
if m != nil {
|
|
match = diff.Text[m[0]:m[1]]
|
|
diff.Text = strings.TrimSuffix(diff.Text, match)
|
|
}
|
|
// Print an existing closing span first before opening added/remove-code span so it doesn't unintentionally close it
|
|
if strings.HasPrefix(diff.Text, "</span>") {
|
|
buf.WriteString("</span>")
|
|
diff.Text = strings.TrimPrefix(diff.Text, "</span>")
|
|
}
|
|
// If we weren't able to fix it then this should avoid broken HTML by not inserting more spans below
|
|
// The previous/next diff section will contain the rest of the tag that is missing here
|
|
if strings.Count(diff.Text, "<") != strings.Count(diff.Text, ">") {
|
|
buf.WriteString(diff.Text)
|
|
continue
|
|
}
|
|
}
|
|
switch {
|
|
case diff.Type == diffmatchpatch.DiffEqual:
|
|
buf.WriteString(diff.Text)
|
|
case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
|
|
buf.Write(addedCodePrefix)
|
|
buf.WriteString(diff.Text)
|
|
buf.Write(codeTagSuffix)
|
|
case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
|
|
buf.Write(removedCodePrefix)
|
|
buf.WriteString(diff.Text)
|
|
buf.Write(codeTagSuffix)
|
|
}
|
|
}
|
|
return DiffInlineWithUnicodeEscape(template.HTML(buf.String()))
|
|
}
|
|
|
|
// GetLine gets a specific line by type (add or del) and file line number
|
|
func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine {
|
|
var (
|
|
difference = 0
|
|
addCount = 0
|
|
delCount = 0
|
|
matchDiffLine *DiffLine
|
|
)
|
|
|
|
LOOP:
|
|
for _, diffLine := range diffSection.Lines {
|
|
switch diffLine.Type {
|
|
case DiffLineAdd:
|
|
addCount++
|
|
case DiffLineDel:
|
|
delCount++
|
|
default:
|
|
if matchDiffLine != nil {
|
|
break LOOP
|
|
}
|
|
difference = diffLine.RightIdx - diffLine.LeftIdx
|
|
addCount = 0
|
|
delCount = 0
|
|
}
|
|
|
|
switch lineType {
|
|
case DiffLineDel:
|
|
if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference {
|
|
matchDiffLine = diffLine
|
|
}
|
|
case DiffLineAdd:
|
|
if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference {
|
|
matchDiffLine = diffLine
|
|
}
|
|
}
|
|
}
|
|
|
|
if addCount == delCount {
|
|
return matchDiffLine
|
|
}
|
|
return nil
|
|
}
|
|
|
|
var diffMatchPatch = diffmatchpatch.New()
|
|
|
|
func init() {
|
|
diffMatchPatch.DiffEditCost = 100
|
|
}
|
|
|
|
// DiffInline is a struct that has a content and escape status
|
|
type DiffInline struct {
|
|
EscapeStatus charset.EscapeStatus
|
|
Content template.HTML
|
|
}
|
|
|
|
// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped
|
|
func DiffInlineWithUnicodeEscape(s template.HTML) DiffInline {
|
|
status, content := charset.EscapeControlString(string(s))
|
|
return DiffInline{EscapeStatus: status, Content: template.HTML(content)}
|
|
}
|
|
|
|
// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped
|
|
func DiffInlineWithHighlightCode(fileName, language, code string) DiffInline {
|
|
status, content := charset.EscapeControlString(highlight.Code(fileName, language, code))
|
|
return DiffInline{EscapeStatus: status, Content: template.HTML(content)}
|
|
}
|
|
|
|
// GetComputedInlineDiffFor computes inline diff for the given line.
|
|
func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) DiffInline {
|
|
if setting.Git.DisableDiffHighlight {
|
|
return getLineContent(diffLine.Content[1:])
|
|
}
|
|
|
|
var (
|
|
compareDiffLine *DiffLine
|
|
diff1 string
|
|
diff2 string
|
|
)
|
|
|
|
language := ""
|
|
if diffSection.file != nil {
|
|
language = diffSection.file.Language
|
|
}
|
|
|
|
// try to find equivalent diff line. ignore, otherwise
|
|
switch diffLine.Type {
|
|
case DiffLineSection:
|
|
return getLineContent(diffLine.Content[1:])
|
|
case DiffLineAdd:
|
|
compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
|
|
if compareDiffLine == nil {
|
|
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:])
|
|
}
|
|
diff1 = compareDiffLine.Content
|
|
diff2 = diffLine.Content
|
|
case DiffLineDel:
|
|
compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
|
|
if compareDiffLine == nil {
|
|
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:])
|
|
}
|
|
diff1 = diffLine.Content
|
|
diff2 = compareDiffLine.Content
|
|
default:
|
|
if strings.IndexByte(" +-", diffLine.Content[0]) > -1 {
|
|
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:])
|
|
}
|
|
return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content)
|
|
}
|
|
|
|
diffRecord := diffMatchPatch.DiffMain(highlight.Code(diffSection.FileName, language, diff1[1:]), highlight.Code(diffSection.FileName, language, diff2[1:]), true)
|
|
diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
|
|
|
|
return diffToHTML(diffSection.FileName, diffRecord, diffLine.Type)
|
|
}
|
|
|
|
// DiffFile represents a file diff.
|
|
type DiffFile struct {
|
|
Name string
|
|
NameHash string
|
|
OldName string
|
|
Index int
|
|
Addition, Deletion int
|
|
Type DiffFileType
|
|
IsCreated bool
|
|
IsDeleted bool
|
|
IsBin bool
|
|
IsLFSFile bool
|
|
IsRenamed bool
|
|
IsAmbiguous bool
|
|
IsSubmodule bool
|
|
Sections []*DiffSection
|
|
IsIncomplete bool
|
|
IsIncompleteLineTooLong bool
|
|
IsProtected bool
|
|
IsGenerated bool
|
|
IsVendored bool
|
|
IsViewed bool // User specific
|
|
HasChangedSinceLastReview bool // User specific
|
|
Language string
|
|
}
|
|
|
|
// GetType returns type of diff file.
|
|
func (diffFile *DiffFile) GetType() int {
|
|
return int(diffFile.Type)
|
|
}
|
|
|
|
// GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
|
|
func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection {
|
|
if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile {
|
|
return nil
|
|
}
|
|
leftCommit, err := gitRepo.GetCommit(leftCommitID)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
rightCommit, err := gitRepo.GetCommit(rightCommitID)
|
|
if err != nil {
|
|
return nil
|
|
}
|
|
lastSection := diffFile.Sections[len(diffFile.Sections)-1]
|
|
lastLine := lastSection.Lines[len(lastSection.Lines)-1]
|
|
leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name)
|
|
rightLineCount := getCommitFileLineCount(rightCommit, diffFile.Name)
|
|
if leftLineCount <= lastLine.LeftIdx || rightLineCount <= lastLine.RightIdx {
|
|
return nil
|
|
}
|
|
tailDiffLine := &DiffLine{
|
|
Type: DiffLineSection,
|
|
Content: " ",
|
|
SectionInfo: &DiffLineSectionInfo{
|
|
Path: diffFile.Name,
|
|
LastLeftIdx: lastLine.LeftIdx,
|
|
LastRightIdx: lastLine.RightIdx,
|
|
LeftIdx: leftLineCount,
|
|
RightIdx: rightLineCount,
|
|
},
|
|
}
|
|
tailSection := &DiffSection{FileName: diffFile.Name, Lines: []*DiffLine{tailDiffLine}}
|
|
return tailSection
|
|
}
|
|
|
|
// GetDiffFileName returns the name of the diff file, or its old name in case it was deleted
|
|
func (diffFile *DiffFile) GetDiffFileName() string {
|
|
if diffFile.Name == "" {
|
|
return diffFile.OldName
|
|
}
|
|
return diffFile.Name
|
|
}
|
|
|
|
func (diffFile *DiffFile) ShouldBeHidden() bool {
|
|
return diffFile.IsGenerated || diffFile.IsViewed
|
|
}
|
|
|
|
func getCommitFileLineCount(commit *git.Commit, filePath string) int {
|
|
blob, err := commit.GetBlobByPath(filePath)
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
lineCount, err := blob.GetBlobLineCount()
|
|
if err != nil {
|
|
return 0
|
|
}
|
|
return lineCount
|
|
}
|
|
|
|
// Diff represents a difference between two git trees.
|
|
type Diff struct {
|
|
Start, End string
|
|
NumFiles int
|
|
TotalAddition, TotalDeletion int
|
|
Files []*DiffFile
|
|
IsIncomplete bool
|
|
NumViewedFiles int // user-specific
|
|
}
|
|
|
|
// LoadComments loads comments into each line
|
|
func (diff *Diff) LoadComments(ctx context.Context, issue *models.Issue, currentUser *user_model.User) error {
|
|
allComments, err := models.FetchCodeComments(ctx, issue, currentUser)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
for _, file := range diff.Files {
|
|
if lineCommits, ok := allComments[file.Name]; ok {
|
|
for _, section := range file.Sections {
|
|
for _, line := range section.Lines {
|
|
if comments, ok := lineCommits[int64(line.LeftIdx*-1)]; ok {
|
|
line.Comments = append(line.Comments, comments...)
|
|
}
|
|
if comments, ok := lineCommits[int64(line.RightIdx)]; ok {
|
|
line.Comments = append(line.Comments, comments...)
|
|
}
|
|
sort.SliceStable(line.Comments, func(i, j int) bool {
|
|
return line.Comments[i].CreatedUnix < line.Comments[j].CreatedUnix
|
|
})
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
const cmdDiffHead = "diff --git "
|
|
|
|
// ParsePatch builds a Diff object from a io.Reader and some parameters.
|
|
func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader, skipToFile string) (*Diff, error) {
|
|
log.Debug("ParsePatch(%d, %d, %d, ..., %s)", maxLines, maxLineCharacters, maxFiles, skipToFile)
|
|
var curFile *DiffFile
|
|
|
|
skipping := skipToFile != ""
|
|
|
|
diff := &Diff{Files: make([]*DiffFile, 0)}
|
|
|
|
sb := strings.Builder{}
|
|
|
|
// OK let's set a reasonable buffer size.
|
|
// This should be let's say at least the size of maxLineCharacters or 4096 whichever is larger.
|
|
readerSize := maxLineCharacters
|
|
if readerSize < 4096 {
|
|
readerSize = 4096
|
|
}
|
|
|
|
input := bufio.NewReaderSize(reader, readerSize)
|
|
line, err := input.ReadString('\n')
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
return diff, nil
|
|
}
|
|
return diff, err
|
|
}
|
|
parsingLoop:
|
|
for {
|
|
// 1. A patch file always begins with `diff --git ` + `a/path b/path` (possibly quoted)
|
|
// if it does not we have bad input!
|
|
if !strings.HasPrefix(line, cmdDiffHead) {
|
|
return diff, fmt.Errorf("invalid first file line: %s", line)
|
|
}
|
|
|
|
if maxFiles > -1 && len(diff.Files) >= maxFiles {
|
|
lastFile := createDiffFile(diff, line)
|
|
diff.End = lastFile.Name
|
|
diff.IsIncomplete = true
|
|
_, err := io.Copy(io.Discard, reader)
|
|
if err != nil {
|
|
// By the definition of io.Copy this never returns io.EOF
|
|
return diff, fmt.Errorf("error during io.Copy: %w", err)
|
|
}
|
|
break parsingLoop
|
|
}
|
|
|
|
curFile = createDiffFile(diff, line)
|
|
if skipping {
|
|
if curFile.Name != skipToFile {
|
|
line, err = skipToNextDiffHead(input)
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
return diff, nil
|
|
}
|
|
return diff, err
|
|
}
|
|
continue
|
|
}
|
|
skipping = false
|
|
}
|
|
|
|
diff.Files = append(diff.Files, curFile)
|
|
|
|
// 2. It is followed by one or more extended header lines:
|
|
//
|
|
// old mode <mode>
|
|
// new mode <mode>
|
|
// deleted file mode <mode>
|
|
// new file mode <mode>
|
|
// copy from <path>
|
|
// copy to <path>
|
|
// rename from <path>
|
|
// rename to <path>
|
|
// similarity index <number>
|
|
// dissimilarity index <number>
|
|
// index <hash>..<hash> <mode>
|
|
//
|
|
// * <mode> 6-digit octal numbers including the file type and file permission bits.
|
|
// * <path> does not include the a/ and b/ prefixes
|
|
// * <number> percentage of unchanged lines for similarity, percentage of changed
|
|
// lines dissimilarity as integer rounded down with terminal %. 100% => equal files.
|
|
// * The index line includes the blob object names before and after the change.
|
|
// The <mode> is included if the file mode does not change; otherwise, separate
|
|
// lines indicate the old and the new mode.
|
|
// 3. Following this header the "standard unified" diff format header may be encountered: (but not for every case...)
|
|
//
|
|
// --- a/<path>
|
|
// +++ b/<path>
|
|
//
|
|
// With multiple hunks
|
|
//
|
|
// @@ <hunk descriptor> @@
|
|
// +added line
|
|
// -removed line
|
|
// unchanged line
|
|
//
|
|
// 4. Binary files get:
|
|
//
|
|
// Binary files a/<path> and b/<path> differ
|
|
//
|
|
// but one of a/<path> and b/<path> could be /dev/null.
|
|
curFileLoop:
|
|
for {
|
|
line, err = input.ReadString('\n')
|
|
if err != nil {
|
|
if err != io.EOF {
|
|
return diff, err
|
|
}
|
|
break parsingLoop
|
|
}
|
|
switch {
|
|
case strings.HasPrefix(line, cmdDiffHead):
|
|
break curFileLoop
|
|
case strings.HasPrefix(line, "old mode ") ||
|
|
strings.HasPrefix(line, "new mode "):
|
|
if strings.HasSuffix(line, " 160000\n") {
|
|
curFile.IsSubmodule = true
|
|
}
|
|
case strings.HasPrefix(line, "rename from "):
|
|
curFile.IsRenamed = true
|
|
curFile.Type = DiffFileRename
|
|
if curFile.IsAmbiguous {
|
|
curFile.OldName = line[len("rename from ") : len(line)-1]
|
|
}
|
|
case strings.HasPrefix(line, "rename to "):
|
|
curFile.IsRenamed = true
|
|
curFile.Type = DiffFileRename
|
|
if curFile.IsAmbiguous {
|
|
curFile.Name = line[len("rename to ") : len(line)-1]
|
|
curFile.IsAmbiguous = false
|
|
}
|
|
case strings.HasPrefix(line, "copy from "):
|
|
curFile.IsRenamed = true
|
|
curFile.Type = DiffFileCopy
|
|
if curFile.IsAmbiguous {
|
|
curFile.OldName = line[len("copy from ") : len(line)-1]
|
|
}
|
|
case strings.HasPrefix(line, "copy to "):
|
|
curFile.IsRenamed = true
|
|
curFile.Type = DiffFileCopy
|
|
if curFile.IsAmbiguous {
|
|
curFile.Name = line[len("copy to ") : len(line)-1]
|
|
curFile.IsAmbiguous = false
|
|
}
|
|
case strings.HasPrefix(line, "new file"):
|
|
curFile.Type = DiffFileAdd
|
|
curFile.IsCreated = true
|
|
if strings.HasSuffix(line, " 160000\n") {
|
|
curFile.IsSubmodule = true
|
|
}
|
|
case strings.HasPrefix(line, "deleted"):
|
|
curFile.Type = DiffFileDel
|
|
curFile.IsDeleted = true
|
|
if strings.HasSuffix(line, " 160000\n") {
|
|
curFile.IsSubmodule = true
|
|
}
|
|
case strings.HasPrefix(line, "index"):
|
|
if strings.HasSuffix(line, " 160000\n") {
|
|
curFile.IsSubmodule = true
|
|
}
|
|
case strings.HasPrefix(line, "similarity index 100%"):
|
|
curFile.Type = DiffFileRename
|
|
case strings.HasPrefix(line, "Binary"):
|
|
curFile.IsBin = true
|
|
case strings.HasPrefix(line, "--- "):
|
|
// Handle ambiguous filenames
|
|
if curFile.IsAmbiguous {
|
|
// The shortest string that can end up here is:
|
|
// "--- a\t\n" without the quotes.
|
|
// This line has a len() of 7 but doesn't contain a oldName.
|
|
// So the amount that the line need is at least 8 or more.
|
|
// The code will otherwise panic for a out-of-bounds.
|
|
if len(line) > 7 && line[4] == 'a' {
|
|
curFile.OldName = line[6 : len(line)-1]
|
|
if line[len(line)-2] == '\t' {
|
|
curFile.OldName = curFile.OldName[:len(curFile.OldName)-1]
|
|
}
|
|
} else {
|
|
curFile.OldName = ""
|
|
}
|
|
}
|
|
// Otherwise do nothing with this line
|
|
case strings.HasPrefix(line, "+++ "):
|
|
// Handle ambiguous filenames
|
|
if curFile.IsAmbiguous {
|
|
if len(line) > 6 && line[4] == 'b' {
|
|
curFile.Name = line[6 : len(line)-1]
|
|
if line[len(line)-2] == '\t' {
|
|
curFile.Name = curFile.Name[:len(curFile.Name)-1]
|
|
}
|
|
if curFile.OldName == "" {
|
|
curFile.OldName = curFile.Name
|
|
}
|
|
} else {
|
|
curFile.Name = curFile.OldName
|
|
}
|
|
curFile.IsAmbiguous = false
|
|
}
|
|
// Otherwise do nothing with this line, but now switch to parsing hunks
|
|
lineBytes, isFragment, err := parseHunks(curFile, maxLines, maxLineCharacters, input)
|
|
diff.TotalAddition += curFile.Addition
|
|
diff.TotalDeletion += curFile.Deletion
|
|
if err != nil {
|
|
if err != io.EOF {
|
|
return diff, err
|
|
}
|
|
break parsingLoop
|
|
}
|
|
sb.Reset()
|
|
_, _ = sb.Write(lineBytes)
|
|
for isFragment {
|
|
lineBytes, isFragment, err = input.ReadLine()
|
|
if err != nil {
|
|
// Now by the definition of ReadLine this cannot be io.EOF
|
|
return diff, fmt.Errorf("unable to ReadLine: %w", err)
|
|
}
|
|
_, _ = sb.Write(lineBytes)
|
|
}
|
|
line = sb.String()
|
|
sb.Reset()
|
|
|
|
break curFileLoop
|
|
}
|
|
}
|
|
}
|
|
|
|
// TODO: There are numerous issues with this:
|
|
// - we might want to consider detecting encoding while parsing but...
|
|
// - we're likely to fail to get the correct encoding here anyway as we won't have enough information
|
|
diffLineTypeBuffers := make(map[DiffLineType]*bytes.Buffer, 3)
|
|
diffLineTypeDecoders := make(map[DiffLineType]*encoding.Decoder, 3)
|
|
diffLineTypeBuffers[DiffLinePlain] = new(bytes.Buffer)
|
|
diffLineTypeBuffers[DiffLineAdd] = new(bytes.Buffer)
|
|
diffLineTypeBuffers[DiffLineDel] = new(bytes.Buffer)
|
|
for _, f := range diff.Files {
|
|
f.NameHash = base.EncodeSha1(f.Name)
|
|
|
|
for _, buffer := range diffLineTypeBuffers {
|
|
buffer.Reset()
|
|
}
|
|
for _, sec := range f.Sections {
|
|
for _, l := range sec.Lines {
|
|
if l.Type == DiffLineSection {
|
|
continue
|
|
}
|
|
diffLineTypeBuffers[l.Type].WriteString(l.Content[1:])
|
|
diffLineTypeBuffers[l.Type].WriteString("\n")
|
|
}
|
|
}
|
|
for lineType, buffer := range diffLineTypeBuffers {
|
|
diffLineTypeDecoders[lineType] = nil
|
|
if buffer.Len() == 0 {
|
|
continue
|
|
}
|
|
charsetLabel, err := charset.DetectEncoding(buffer.Bytes())
|
|
if charsetLabel != "UTF-8" && err == nil {
|
|
encoding, _ := stdcharset.Lookup(charsetLabel)
|
|
if encoding != nil {
|
|
diffLineTypeDecoders[lineType] = encoding.NewDecoder()
|
|
}
|
|
}
|
|
}
|
|
for _, sec := range f.Sections {
|
|
for _, l := range sec.Lines {
|
|
decoder := diffLineTypeDecoders[l.Type]
|
|
if decoder != nil {
|
|
if c, _, err := transform.String(decoder, l.Content[1:]); err == nil {
|
|
l.Content = l.Content[0:1] + c
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
diff.NumFiles = len(diff.Files)
|
|
return diff, nil
|
|
}
|
|
|
|
func skipToNextDiffHead(input *bufio.Reader) (line string, err error) {
|
|
// need to skip until the next cmdDiffHead
|
|
isFragment, wasFragment := false, false
|
|
var lineBytes []byte
|
|
for {
|
|
lineBytes, isFragment, err = input.ReadLine()
|
|
if err != nil {
|
|
return
|
|
}
|
|
if wasFragment {
|
|
wasFragment = isFragment
|
|
continue
|
|
}
|
|
if bytes.HasPrefix(lineBytes, []byte(cmdDiffHead)) {
|
|
break
|
|
}
|
|
wasFragment = isFragment
|
|
}
|
|
line = string(lineBytes)
|
|
if isFragment {
|
|
var tail string
|
|
tail, err = input.ReadString('\n')
|
|
if err != nil {
|
|
return
|
|
}
|
|
line += tail
|
|
}
|
|
return
|
|
}
|
|
|
|
func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio.Reader) (lineBytes []byte, isFragment bool, err error) {
|
|
sb := strings.Builder{}
|
|
|
|
var (
|
|
curSection *DiffSection
|
|
curFileLinesCount int
|
|
curFileLFSPrefix bool
|
|
)
|
|
|
|
lastLeftIdx := -1
|
|
leftLine, rightLine := 1, 1
|
|
|
|
for {
|
|
for isFragment {
|
|
curFile.IsIncomplete = true
|
|
curFile.IsIncompleteLineTooLong = true
|
|
_, isFragment, err = input.ReadLine()
|
|
if err != nil {
|
|
// Now by the definition of ReadLine this cannot be io.EOF
|
|
err = fmt.Errorf("unable to ReadLine: %w", err)
|
|
return
|
|
}
|
|
}
|
|
sb.Reset()
|
|
lineBytes, isFragment, err = input.ReadLine()
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
return
|
|
}
|
|
err = fmt.Errorf("unable to ReadLine: %w", err)
|
|
return
|
|
}
|
|
if lineBytes[0] == 'd' {
|
|
// End of hunks
|
|
return
|
|
}
|
|
|
|
switch lineBytes[0] {
|
|
case '@':
|
|
if maxLines > -1 && curFileLinesCount >= maxLines {
|
|
curFile.IsIncomplete = true
|
|
continue
|
|
}
|
|
|
|
_, _ = sb.Write(lineBytes)
|
|
for isFragment {
|
|
// This is very odd indeed - we're in a section header and the line is too long
|
|
// This really shouldn't happen...
|
|
lineBytes, isFragment, err = input.ReadLine()
|
|
if err != nil {
|
|
// Now by the definition of ReadLine this cannot be io.EOF
|
|
err = fmt.Errorf("unable to ReadLine: %w", err)
|
|
return
|
|
}
|
|
_, _ = sb.Write(lineBytes)
|
|
}
|
|
line := sb.String()
|
|
|
|
// Create a new section to represent this hunk
|
|
curSection = &DiffSection{file: curFile}
|
|
lastLeftIdx = -1
|
|
curFile.Sections = append(curFile.Sections, curSection)
|
|
|
|
lineSectionInfo := getDiffLineSectionInfo(curFile.Name, line, leftLine-1, rightLine-1)
|
|
diffLine := &DiffLine{
|
|
Type: DiffLineSection,
|
|
Content: line,
|
|
SectionInfo: lineSectionInfo,
|
|
}
|
|
curSection.Lines = append(curSection.Lines, diffLine)
|
|
curSection.FileName = curFile.Name
|
|
// update line number.
|
|
leftLine = lineSectionInfo.LeftIdx
|
|
rightLine = lineSectionInfo.RightIdx
|
|
continue
|
|
case '\\':
|
|
if maxLines > -1 && curFileLinesCount >= maxLines {
|
|
curFile.IsIncomplete = true
|
|
continue
|
|
}
|
|
// This is used only to indicate that the current file does not have a terminal newline
|
|
if !bytes.Equal(lineBytes, []byte("\\ No newline at end of file")) {
|
|
err = fmt.Errorf("unexpected line in hunk: %s", string(lineBytes))
|
|
return
|
|
}
|
|
// Technically this should be the end the file!
|
|
// FIXME: we should be putting a marker at the end of the file if there is no terminal new line
|
|
continue
|
|
case '+':
|
|
curFileLinesCount++
|
|
curFile.Addition++
|
|
if maxLines > -1 && curFileLinesCount >= maxLines {
|
|
curFile.IsIncomplete = true
|
|
continue
|
|
}
|
|
diffLine := &DiffLine{Type: DiffLineAdd, RightIdx: rightLine, Match: -1}
|
|
rightLine++
|
|
if curSection == nil {
|
|
// Create a new section to represent this hunk
|
|
curSection = &DiffSection{file: curFile}
|
|
curFile.Sections = append(curFile.Sections, curSection)
|
|
lastLeftIdx = -1
|
|
}
|
|
if lastLeftIdx > -1 {
|
|
diffLine.Match = lastLeftIdx
|
|
curSection.Lines[lastLeftIdx].Match = len(curSection.Lines)
|
|
lastLeftIdx++
|
|
if lastLeftIdx >= len(curSection.Lines) || curSection.Lines[lastLeftIdx].Type != DiffLineDel {
|
|
lastLeftIdx = -1
|
|
}
|
|
}
|
|
curSection.Lines = append(curSection.Lines, diffLine)
|
|
case '-':
|
|
curFileLinesCount++
|
|
curFile.Deletion++
|
|
if maxLines > -1 && curFileLinesCount >= maxLines {
|
|
curFile.IsIncomplete = true
|
|
continue
|
|
}
|
|
diffLine := &DiffLine{Type: DiffLineDel, LeftIdx: leftLine, Match: -1}
|
|
if leftLine > 0 {
|
|
leftLine++
|
|
}
|
|
if curSection == nil {
|
|
// Create a new section to represent this hunk
|
|
curSection = &DiffSection{file: curFile}
|
|
curFile.Sections = append(curFile.Sections, curSection)
|
|
lastLeftIdx = -1
|
|
}
|
|
if len(curSection.Lines) == 0 || curSection.Lines[len(curSection.Lines)-1].Type != DiffLineDel {
|
|
lastLeftIdx = len(curSection.Lines)
|
|
}
|
|
curSection.Lines = append(curSection.Lines, diffLine)
|
|
case ' ':
|
|
curFileLinesCount++
|
|
if maxLines > -1 && curFileLinesCount >= maxLines {
|
|
curFile.IsIncomplete = true
|
|
continue
|
|
}
|
|
diffLine := &DiffLine{Type: DiffLinePlain, LeftIdx: leftLine, RightIdx: rightLine}
|
|
leftLine++
|
|
rightLine++
|
|
lastLeftIdx = -1
|
|
if curSection == nil {
|
|
// Create a new section to represent this hunk
|
|
curSection = &DiffSection{file: curFile}
|
|
curFile.Sections = append(curFile.Sections, curSection)
|
|
}
|
|
curSection.Lines = append(curSection.Lines, diffLine)
|
|
default:
|
|
// This is unexpected
|
|
err = fmt.Errorf("unexpected line in hunk: %s", string(lineBytes))
|
|
return
|
|
}
|
|
|
|
line := string(lineBytes)
|
|
if isFragment {
|
|
curFile.IsIncomplete = true
|
|
curFile.IsIncompleteLineTooLong = true
|
|
for isFragment {
|
|
lineBytes, isFragment, err = input.ReadLine()
|
|
if err != nil {
|
|
// Now by the definition of ReadLine this cannot be io.EOF
|
|
err = fmt.Errorf("unable to ReadLine: %w", err)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
if len(line) > maxLineCharacters {
|
|
curFile.IsIncomplete = true
|
|
curFile.IsIncompleteLineTooLong = true
|
|
line = line[:maxLineCharacters]
|
|
}
|
|
curSection.Lines[len(curSection.Lines)-1].Content = line
|
|
|
|
// handle LFS
|
|
if line[1:] == lfs.MetaFileIdentifier {
|
|
curFileLFSPrefix = true
|
|
} else if curFileLFSPrefix && strings.HasPrefix(line[1:], lfs.MetaFileOidPrefix) {
|
|
oid := strings.TrimPrefix(line[1:], lfs.MetaFileOidPrefix)
|
|
if len(oid) == 64 {
|
|
m := &models.LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}}
|
|
count, err := db.Count(m)
|
|
|
|
if err == nil && count > 0 {
|
|
curFile.IsBin = true
|
|
curFile.IsLFSFile = true
|
|
curSection.Lines = nil
|
|
lastLeftIdx = -1
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
func createDiffFile(diff *Diff, line string) *DiffFile {
|
|
// The a/ and b/ filenames are the same unless rename/copy is involved.
|
|
// Especially, even for a creation or a deletion, /dev/null is not used
|
|
// in place of the a/ or b/ filenames.
|
|
//
|
|
// When rename/copy is involved, file1 and file2 show the name of the
|
|
// source file of the rename/copy and the name of the file that rename/copy
|
|
// produces, respectively.
|
|
//
|
|
// Path names are quoted if necessary.
|
|
//
|
|
// This means that you should always be able to determine the file name even when there
|
|
// there is potential ambiguity...
|
|
//
|
|
// but we can be simpler with our heuristics by just forcing git to prefix things nicely
|
|
curFile := &DiffFile{
|
|
Index: len(diff.Files) + 1,
|
|
Type: DiffFileChange,
|
|
Sections: make([]*DiffSection, 0, 10),
|
|
}
|
|
|
|
rd := strings.NewReader(line[len(cmdDiffHead):] + " ")
|
|
curFile.Type = DiffFileChange
|
|
oldNameAmbiguity := false
|
|
newNameAmbiguity := false
|
|
|
|
curFile.OldName, oldNameAmbiguity = readFileName(rd)
|
|
curFile.Name, newNameAmbiguity = readFileName(rd)
|
|
if oldNameAmbiguity && newNameAmbiguity {
|
|
curFile.IsAmbiguous = true
|
|
// OK we should bet that the oldName and the newName are the same if they can be made to be same
|
|
// So we need to start again ...
|
|
if (len(line)-len(cmdDiffHead)-1)%2 == 0 {
|
|
// diff --git a/b b/b b/b b/b b/b b/b
|
|
//
|
|
midpoint := (len(line) + len(cmdDiffHead) - 1) / 2
|
|
new, old := line[len(cmdDiffHead):midpoint], line[midpoint+1:]
|
|
if len(new) > 2 && len(old) > 2 && new[2:] == old[2:] {
|
|
curFile.OldName = old[2:]
|
|
curFile.Name = old[2:]
|
|
}
|
|
}
|
|
}
|
|
|
|
curFile.IsRenamed = curFile.Name != curFile.OldName
|
|
return curFile
|
|
}
|
|
|
|
func readFileName(rd *strings.Reader) (string, bool) {
|
|
ambiguity := false
|
|
var name string
|
|
char, _ := rd.ReadByte()
|
|
_ = rd.UnreadByte()
|
|
if char == '"' {
|
|
fmt.Fscanf(rd, "%q ", &name)
|
|
if len(name) == 0 {
|
|
log.Error("Reader has no file name: %v", rd)
|
|
return "", true
|
|
}
|
|
|
|
if name[0] == '\\' {
|
|
name = name[1:]
|
|
}
|
|
} else {
|
|
// This technique is potentially ambiguous it may not be possible to uniquely identify the filenames from the diff line alone
|
|
ambiguity = true
|
|
fmt.Fscanf(rd, "%s ", &name)
|
|
char, _ := rd.ReadByte()
|
|
_ = rd.UnreadByte()
|
|
for !(char == 0 || char == '"' || char == 'b') {
|
|
var suffix string
|
|
fmt.Fscanf(rd, "%s ", &suffix)
|
|
name += " " + suffix
|
|
char, _ = rd.ReadByte()
|
|
_ = rd.UnreadByte()
|
|
}
|
|
}
|
|
if len(name) < 2 {
|
|
log.Error("Unable to determine name from reader: %v", rd)
|
|
return "", true
|
|
}
|
|
return name[2:], ambiguity
|
|
}
|
|
|
|
// DiffOptions represents the options for a DiffRange
|
|
type DiffOptions struct {
|
|
BeforeCommitID string
|
|
AfterCommitID string
|
|
SkipTo string
|
|
MaxLines int
|
|
MaxLineCharacters int
|
|
MaxFiles int
|
|
WhitespaceBehavior string
|
|
DirectComparison bool
|
|
}
|
|
|
|
// GetDiff builds a Diff between two commits of a repository.
|
|
// Passing the empty string as beforeCommitID returns a diff from the parent commit.
|
|
// The whitespaceBehavior is either an empty string or a git flag
|
|
func GetDiff(gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
|
|
repoPath := gitRepo.Path
|
|
|
|
commit, err := gitRepo.GetCommit(opts.AfterCommitID)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
argsLength := 6
|
|
if len(opts.WhitespaceBehavior) > 0 {
|
|
argsLength++
|
|
}
|
|
if len(opts.SkipTo) > 0 {
|
|
argsLength++
|
|
}
|
|
if len(files) > 0 {
|
|
argsLength += len(files) + 1
|
|
}
|
|
|
|
diffArgs := make([]string, 0, argsLength)
|
|
if (len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == git.EmptySHA) && commit.ParentCount() == 0 {
|
|
diffArgs = append(diffArgs, "diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M")
|
|
if len(opts.WhitespaceBehavior) != 0 {
|
|
diffArgs = append(diffArgs, opts.WhitespaceBehavior)
|
|
}
|
|
// append empty tree ref
|
|
diffArgs = append(diffArgs, "4b825dc642cb6eb9a060e54bf8d69288fbee4904")
|
|
diffArgs = append(diffArgs, opts.AfterCommitID)
|
|
} else {
|
|
actualBeforeCommitID := opts.BeforeCommitID
|
|
if len(actualBeforeCommitID) == 0 {
|
|
parentCommit, _ := commit.Parent(0)
|
|
actualBeforeCommitID = parentCommit.ID.String()
|
|
}
|
|
diffArgs = append(diffArgs, "diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M")
|
|
if len(opts.WhitespaceBehavior) != 0 {
|
|
diffArgs = append(diffArgs, opts.WhitespaceBehavior)
|
|
}
|
|
diffArgs = append(diffArgs, actualBeforeCommitID)
|
|
diffArgs = append(diffArgs, opts.AfterCommitID)
|
|
opts.BeforeCommitID = actualBeforeCommitID
|
|
}
|
|
|
|
// In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
|
|
// so if we are using at least this version of git we don't have to tell ParsePatch to do
|
|
// the skipping for us
|
|
parsePatchSkipToFile := opts.SkipTo
|
|
if opts.SkipTo != "" && git.CheckGitVersionAtLeast("2.31") == nil {
|
|
diffArgs = append(diffArgs, "--skip-to="+opts.SkipTo)
|
|
parsePatchSkipToFile = ""
|
|
}
|
|
|
|
if len(files) > 0 {
|
|
diffArgs = append(diffArgs, "--")
|
|
diffArgs = append(diffArgs, files...)
|
|
}
|
|
|
|
reader, writer := io.Pipe()
|
|
defer func() {
|
|
_ = reader.Close()
|
|
_ = writer.Close()
|
|
}()
|
|
|
|
go func(ctx context.Context, diffArgs []string, repoPath string, writer *io.PipeWriter) {
|
|
cmd := git.NewCommand(ctx, diffArgs...)
|
|
cmd.SetDescription(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath))
|
|
if err := cmd.Run(&git.RunOpts{
|
|
Timeout: time.Duration(setting.Git.Timeout.Default) * time.Second,
|
|
Dir: repoPath,
|
|
Stderr: os.Stderr,
|
|
Stdout: writer,
|
|
}); err != nil {
|
|
log.Error("error during RunWithContext: %w", err)
|
|
}
|
|
|
|
_ = writer.Close()
|
|
}(gitRepo.Ctx, diffArgs, repoPath, writer)
|
|
|
|
diff, err := ParsePatch(opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("unable to ParsePatch: %w", err)
|
|
}
|
|
diff.Start = opts.SkipTo
|
|
|
|
var checker *git.CheckAttributeReader
|
|
|
|
if git.CheckGitVersionAtLeast("1.7.8") == nil {
|
|
indexFilename, worktree, deleteTemporaryFile, err := gitRepo.ReadTreeToTemporaryIndex(opts.AfterCommitID)
|
|
if err == nil {
|
|
defer deleteTemporaryFile()
|
|
|
|
checker = &git.CheckAttributeReader{
|
|
Attributes: []string{"linguist-vendored", "linguist-generated", "linguist-language", "gitlab-language"},
|
|
Repo: gitRepo,
|
|
IndexFile: indexFilename,
|
|
WorkTree: worktree,
|
|
}
|
|
ctx, cancel := context.WithCancel(gitRepo.Ctx)
|
|
if err := checker.Init(ctx); err != nil {
|
|
log.Error("Unable to open checker for %s. Error: %v", opts.AfterCommitID, err)
|
|
} else {
|
|
go func() {
|
|
err := checker.Run()
|
|
if err != nil && err != ctx.Err() {
|
|
log.Error("Unable to open checker for %s. Error: %v", opts.AfterCommitID, err)
|
|
}
|
|
cancel()
|
|
}()
|
|
}
|
|
defer func() {
|
|
_ = checker.Close()
|
|
cancel()
|
|
}()
|
|
}
|
|
}
|
|
|
|
for _, diffFile := range diff.Files {
|
|
|
|
gotVendor := false
|
|
gotGenerated := false
|
|
if checker != nil {
|
|
attrs, err := checker.CheckPath(diffFile.Name)
|
|
if err == nil {
|
|
if vendored, has := attrs["linguist-vendored"]; has {
|
|
if vendored == "set" || vendored == "true" {
|
|
diffFile.IsVendored = true
|
|
gotVendor = true
|
|
} else {
|
|
gotVendor = vendored == "false"
|
|
}
|
|
}
|
|
if generated, has := attrs["linguist-generated"]; has {
|
|
if generated == "set" || generated == "true" {
|
|
diffFile.IsGenerated = true
|
|
gotGenerated = true
|
|
} else {
|
|
gotGenerated = generated == "false"
|
|
}
|
|
}
|
|
if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
|
|
diffFile.Language = language
|
|
} else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
|
|
diffFile.Language = language
|
|
}
|
|
} else {
|
|
log.Error("Unexpected error: %v", err)
|
|
}
|
|
}
|
|
|
|
if !gotVendor {
|
|
diffFile.IsVendored = analyze.IsVendor(diffFile.Name)
|
|
}
|
|
if !gotGenerated {
|
|
diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name)
|
|
}
|
|
|
|
tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID)
|
|
if tailSection != nil {
|
|
diffFile.Sections = append(diffFile.Sections, tailSection)
|
|
}
|
|
}
|
|
|
|
separator := "..."
|
|
if opts.DirectComparison {
|
|
separator = ".."
|
|
}
|
|
|
|
shortstatArgs := []string{opts.BeforeCommitID + separator + opts.AfterCommitID}
|
|
if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == git.EmptySHA {
|
|
shortstatArgs = []string{git.EmptyTreeSHA, opts.AfterCommitID}
|
|
}
|
|
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, shortstatArgs...)
|
|
if err != nil && strings.Contains(err.Error(), "no merge base") {
|
|
// git >= 2.28 now returns an error if base and head have become unrelated.
|
|
// previously it would return the results of git diff --shortstat base head so let's try that...
|
|
shortstatArgs = []string{opts.BeforeCommitID, opts.AfterCommitID}
|
|
diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, shortstatArgs...)
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return diff, nil
|
|
}
|
|
|
|
// SyncAndGetUserSpecificDiff is like GetDiff, except that user specific data such as which files the given user has already viewed on the given PR will also be set
|
|
// Additionally, the database asynchronously is updated if files have changed since the last review
|
|
func SyncAndGetUserSpecificDiff(ctx context.Context, userID int64, pull *models.PullRequest, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
|
|
diff, err := GetDiff(gitRepo, opts, files...)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
review, err := pull_model.GetNewestReviewState(ctx, userID, pull.ID)
|
|
if err != nil || review == nil || review.UpdatedFiles == nil {
|
|
return diff, err
|
|
}
|
|
|
|
latestCommit := opts.AfterCommitID
|
|
if latestCommit == "" {
|
|
latestCommit = pull.HeadBranch // opts.AfterCommitID is preferred because it handles PRs from forks correctly and the branch name doesn't
|
|
}
|
|
|
|
changedFiles, err := gitRepo.GetFilesChangedBetween(review.CommitSHA, latestCommit)
|
|
if err != nil {
|
|
return diff, err
|
|
}
|
|
|
|
filesChangedSinceLastDiff := make(map[string]pull_model.ViewedState)
|
|
outer:
|
|
for _, diffFile := range diff.Files {
|
|
fileViewedState := review.UpdatedFiles[diffFile.GetDiffFileName()]
|
|
|
|
// Check whether it was previously detected that the file has changed since the last review
|
|
if fileViewedState == pull_model.HasChanged {
|
|
diffFile.HasChangedSinceLastReview = true
|
|
continue
|
|
}
|
|
|
|
filename := diffFile.GetDiffFileName()
|
|
|
|
// Check explicitly whether the file has changed since the last review
|
|
for _, changedFile := range changedFiles {
|
|
diffFile.HasChangedSinceLastReview = filename == changedFile
|
|
if diffFile.HasChangedSinceLastReview {
|
|
filesChangedSinceLastDiff[filename] = pull_model.HasChanged
|
|
continue outer // We don't want to check if the file is viewed here as that would fold the file, which is in this case unwanted
|
|
}
|
|
}
|
|
// Check whether the file has already been viewed
|
|
if fileViewedState == pull_model.Viewed {
|
|
diffFile.IsViewed = true
|
|
diff.NumViewedFiles++
|
|
}
|
|
}
|
|
|
|
// Explicitly store files that have changed in the database, if any is present at all.
|
|
// This has the benefit that the "Has Changed" attribute will be present as long as the user does not explicitly mark this file as viewed, so it will even survive a page reload after marking another file as viewed.
|
|
// On the other hand, this means that even if a commit reverting an unseen change is committed, the file will still be seen as changed.
|
|
if len(filesChangedSinceLastDiff) > 0 {
|
|
err := pull_model.UpdateReviewState(ctx, review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff)
|
|
if err != nil {
|
|
log.Warn("Could not update review for user %d, pull %d, commit %s and the changed files %v: %v", review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff, err)
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return diff, err
|
|
}
|
|
|
|
// CommentAsDiff returns c.Patch as *Diff
|
|
func CommentAsDiff(c *models.Comment) (*Diff, error) {
|
|
diff, err := ParsePatch(setting.Git.MaxGitDiffLines,
|
|
setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch), "")
|
|
if err != nil {
|
|
log.Error("Unable to parse patch: %v", err)
|
|
return nil, err
|
|
}
|
|
if len(diff.Files) == 0 {
|
|
return nil, fmt.Errorf("no file found for comment ID: %d", c.ID)
|
|
}
|
|
secs := diff.Files[0].Sections
|
|
if len(secs) == 0 {
|
|
return nil, fmt.Errorf("no sections found for comment ID: %d", c.ID)
|
|
}
|
|
return diff, nil
|
|
}
|
|
|
|
// CommentMustAsDiff executes AsDiff and logs the error instead of returning
|
|
func CommentMustAsDiff(c *models.Comment) *Diff {
|
|
if c == nil {
|
|
return nil
|
|
}
|
|
defer func() {
|
|
if err := recover(); err != nil {
|
|
log.Error("PANIC whilst retrieving diff for comment[%d] Error: %v\nStack: %s", c.ID, err, log.Stack(2))
|
|
}
|
|
}()
|
|
diff, err := CommentAsDiff(c)
|
|
if err != nil {
|
|
log.Warn("CommentMustAsDiff: %v", err)
|
|
}
|
|
return diff
|
|
}
|
|
|
|
// GetWhitespaceFlag returns git diff flag for treating whitespaces
|
|
func GetWhitespaceFlag(whitespaceBehavior string) string {
|
|
whitespaceFlags := map[string]string{
|
|
"ignore-all": "-w",
|
|
"ignore-change": "-b",
|
|
"ignore-eol": "--ignore-space-at-eol",
|
|
"show-all": "",
|
|
}
|
|
|
|
if flag, ok := whitespaceFlags[whitespaceBehavior]; ok {
|
|
return flag
|
|
}
|
|
log.Warn("unknown whitespace behavior: %q, default to 'show-all'", whitespaceBehavior)
|
|
return ""
|
|
}
|