2014-04-13 09:35:36 +08:00
// Copyright 2014 The Gogs Authors. All rights reserved.
2019-09-06 10:20:09 +08:00
// Copyright 2019 The Gitea Authors. All rights reserved.
2014-04-13 09:35:36 +08:00
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
2019-09-06 10:20:09 +08:00
package gitdiff
2014-04-13 09:35:36 +08:00
import (
"bufio"
2014-12-09 15:18:25 +08:00
"bytes"
2019-11-30 22:40:22 +08:00
"context"
2014-06-19 13:08:03 +08:00
"fmt"
2020-12-27 05:58:21 +08:00
"html"
2016-01-09 14:51:17 +08:00
"html/template"
2014-04-13 09:35:36 +08:00
"io"
2019-11-15 10:52:59 +08:00
"net/url"
2014-04-13 09:35:36 +08:00
"os"
2018-08-06 12:43:22 +08:00
"sort"
2014-04-13 09:35:36 +08:00
"strings"
2021-08-31 12:16:23 +08:00
"time"
2014-04-13 09:35:36 +08:00
2021-09-19 19:49:59 +08:00
"code.gitea.io/gitea/models/db"
2022-06-12 23:51:54 +08:00
git_model "code.gitea.io/gitea/models/git"
2022-06-13 17:37:59 +08:00
issues_model "code.gitea.io/gitea/models/issues"
2022-05-08 02:28:10 +08:00
pull_model "code.gitea.io/gitea/models/pull"
2021-11-24 17:49:20 +08:00
user_model "code.gitea.io/gitea/models/user"
2021-09-10 04:13:36 +08:00
"code.gitea.io/gitea/modules/analyze"
2022-05-09 06:29:50 +08:00
"code.gitea.io/gitea/modules/base"
2019-08-15 20:07:28 +08:00
"code.gitea.io/gitea/modules/charset"
2019-03-27 17:33:00 +08:00
"code.gitea.io/gitea/modules/git"
2016-12-07 01:58:31 +08:00
"code.gitea.io/gitea/modules/highlight"
2021-04-09 06:25:57 +08:00
"code.gitea.io/gitea/modules/lfs"
2016-11-11 00:24:48 +08:00
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/setting"
2022-08-14 02:32:34 +08:00
"code.gitea.io/gitea/modules/translation"
2019-08-24 00:40:30 +08:00
2016-11-06 00:56:35 +08:00
"github.com/sergi/go-diff/diffmatchpatch"
2019-08-15 20:07:28 +08:00
stdcharset "golang.org/x/net/html/charset"
2021-07-13 09:13:52 +08:00
"golang.org/x/text/encoding"
2016-11-06 00:56:35 +08:00
"golang.org/x/text/transform"
2014-04-13 09:35:36 +08:00
)
2022-07-23 19:28:02 +08:00
// DiffLineType represents the type of DiffLine.
2016-01-07 04:00:40 +08:00
type DiffLineType uint8
2016-11-22 19:08:23 +08:00
// DiffLineType possible values.
2014-04-13 09:35:36 +08:00
const (
2016-11-08 00:24:59 +08:00
DiffLinePlain DiffLineType = iota + 1
DiffLineAdd
DiffLineDel
2016-11-08 00:33:03 +08:00
DiffLineSection
2014-04-13 09:35:36 +08:00
)
2022-07-23 19:28:02 +08:00
// DiffFileType represents the type of DiffFile.
2016-01-07 04:00:40 +08:00
type DiffFileType uint8
2016-11-22 19:08:23 +08:00
// DiffFileType possible values.
2014-04-13 09:35:36 +08:00
const (
2016-11-08 00:24:59 +08:00
DiffFileAdd DiffFileType = iota + 1
DiffFileChange
DiffFileDel
2016-11-08 00:33:03 +08:00
DiffFileRename
2020-09-09 21:08:40 +08:00
DiffFileCopy
2014-04-13 09:35:36 +08:00
)
2019-11-15 10:52:59 +08:00
// DiffLineExpandDirection represents the DiffLineSection expand direction
type DiffLineExpandDirection uint8
// DiffLineExpandDirection possible values.
const (
DiffLineExpandNone DiffLineExpandDirection = iota + 1
DiffLineExpandSingle
DiffLineExpandUpDown
DiffLineExpandUp
DiffLineExpandDown
)
2016-11-24 16:30:08 +08:00
// DiffLine represents a line difference in a DiffSection.
2014-04-13 09:35:36 +08:00
type DiffLine struct {
2019-11-15 10:52:59 +08:00
LeftIdx int
RightIdx int
2021-08-29 22:28:04 +08:00
Match int
2019-11-15 10:52:59 +08:00
Type DiffLineType
Content string
2022-06-13 17:37:59 +08:00
Comments [ ] * issues_model . Comment
2019-11-15 10:52:59 +08:00
SectionInfo * DiffLineSectionInfo
}
// DiffLineSectionInfo represents diff line section meta data
type DiffLineSectionInfo struct {
Path string
LastLeftIdx int
LastRightIdx int
LeftIdx int
RightIdx int
LeftHunkSize int
RightHunkSize int
2014-04-13 09:35:36 +08:00
}
2020-08-20 22:53:06 +08:00
// BlobExcerptChunkSize represent max lines of excerpt
const BlobExcerptChunkSize = 20
2019-11-15 10:52:59 +08:00
2022-07-23 19:28:02 +08:00
// GetType returns the type of DiffLine.
2016-01-07 04:00:40 +08:00
func ( d * DiffLine ) GetType ( ) int {
return int ( d . Type )
2014-04-13 09:35:36 +08:00
}
2022-07-23 19:28:02 +08:00
// CanComment returns whether a line can get commented
2018-08-06 12:43:22 +08:00
func ( d * DiffLine ) CanComment ( ) bool {
return len ( d . Comments ) == 0 && d . Type != DiffLineSection
}
// GetCommentSide returns the comment side of the first comment, if not set returns empty string
func ( d * DiffLine ) GetCommentSide ( ) string {
if len ( d . Comments ) == 0 {
return ""
}
return d . Comments [ 0 ] . DiffSide ( )
}
2019-06-25 04:23:52 +08:00
// GetLineTypeMarker returns the line type marker
func ( d * DiffLine ) GetLineTypeMarker ( ) string {
if strings . IndexByte ( " +-" , d . Content [ 0 ] ) > - 1 {
return d . Content [ 0 : 1 ]
}
return ""
}
2019-11-15 10:52:59 +08:00
// GetBlobExcerptQuery builds query string to get blob excerpt
func ( d * DiffLine ) GetBlobExcerptQuery ( ) string {
query := fmt . Sprintf (
"last_left=%d&last_right=%d&" +
"left=%d&right=%d&" +
"left_hunk_size=%d&right_hunk_size=%d&" +
"path=%s" ,
d . SectionInfo . LastLeftIdx , d . SectionInfo . LastRightIdx ,
d . SectionInfo . LeftIdx , d . SectionInfo . RightIdx ,
d . SectionInfo . LeftHunkSize , d . SectionInfo . RightHunkSize ,
url . QueryEscape ( d . SectionInfo . Path ) )
return query
}
// GetExpandDirection gets DiffLineExpandDirection
func ( d * DiffLine ) GetExpandDirection ( ) DiffLineExpandDirection {
if d . Type != DiffLineSection || d . SectionInfo == nil || d . SectionInfo . RightIdx - d . SectionInfo . LastRightIdx <= 1 {
return DiffLineExpandNone
}
if d . SectionInfo . LastLeftIdx <= 0 && d . SectionInfo . LastRightIdx <= 0 {
return DiffLineExpandUp
2020-08-20 22:53:06 +08:00
} else if d . SectionInfo . RightIdx - d . SectionInfo . LastRightIdx > BlobExcerptChunkSize && d . SectionInfo . RightHunkSize > 0 {
2019-11-15 10:52:59 +08:00
return DiffLineExpandUpDown
} else if d . SectionInfo . LeftHunkSize <= 0 && d . SectionInfo . RightHunkSize <= 0 {
return DiffLineExpandDown
}
return DiffLineExpandSingle
}
2020-01-24 01:28:15 +08:00
func getDiffLineSectionInfo ( treePath , line string , lastLeftIdx , lastRightIdx int ) * DiffLineSectionInfo {
2020-01-28 16:02:03 +08:00
leftLine , leftHunk , rightLine , righHunk := git . ParseDiffHunkString ( line )
2020-01-24 01:28:15 +08:00
2019-11-15 10:52:59 +08:00
return & DiffLineSectionInfo {
2020-01-24 01:28:15 +08:00
Path : treePath ,
2019-11-15 10:52:59 +08:00
LastLeftIdx : lastLeftIdx ,
LastRightIdx : lastRightIdx ,
LeftIdx : leftLine ,
RightIdx : rightLine ,
LeftHunkSize : leftHunk ,
RightHunkSize : righHunk ,
}
}
2019-06-27 05:35:07 +08:00
// escape a line's content or return <br> needed for copy/paste purposes
2022-08-14 02:32:34 +08:00
func getLineContent ( content string , locale translation . Locale ) DiffInline {
2019-06-27 05:35:07 +08:00
if len ( content ) > 0 {
2022-08-14 02:32:34 +08:00
return DiffInlineWithUnicodeEscape ( template . HTML ( html . EscapeString ( content ) ) , locale )
2019-06-27 05:35:07 +08:00
}
2022-08-14 02:32:34 +08:00
return DiffInline { EscapeStatus : & charset . EscapeStatus { } , Content : "<br>" }
2019-06-27 05:35:07 +08:00
}
2016-11-24 16:30:08 +08:00
// DiffSection represents a section of a DiffFile.
2014-04-13 09:35:36 +08:00
type DiffSection struct {
2021-11-18 04:37:00 +08:00
file * DiffFile
2020-07-01 05:34:03 +08:00
FileName string
Name string
Lines [ ] * DiffLine
2014-04-13 09:35:36 +08:00
}
2016-01-09 14:51:17 +08:00
var (
2019-06-25 04:23:52 +08:00
addedCodePrefix = [ ] byte ( ` <span class="added-code"> ` )
removedCodePrefix = [ ] byte ( ` <span class="removed-code"> ` )
codeTagSuffix = [ ] byte ( ` </span> ` )
2016-01-09 14:51:17 +08:00
)
2021-02-14 22:51:00 +08:00
2022-07-23 19:28:02 +08:00
func diffToHTML ( lineWrapperTags [ ] string , diffs [ ] diffmatchpatch . Diff , lineType DiffLineType ) string {
2016-08-08 00:49:47 +08:00
buf := bytes . NewBuffer ( nil )
2022-07-23 19:28:02 +08:00
// restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary
for _ , tag := range lineWrapperTags {
buf . WriteString ( tag )
}
2020-10-22 06:14:44 +08:00
for _ , diff := range diffs {
switch {
case diff . Type == diffmatchpatch . DiffEqual :
buf . WriteString ( diff . Text )
case diff . Type == diffmatchpatch . DiffInsert && lineType == DiffLineAdd :
2016-01-09 14:51:17 +08:00
buf . Write ( addedCodePrefix )
2020-10-22 06:14:44 +08:00
buf . WriteString ( diff . Text )
2016-01-09 14:51:17 +08:00
buf . Write ( codeTagSuffix )
2020-10-22 06:14:44 +08:00
case diff . Type == diffmatchpatch . DiffDelete && lineType == DiffLineDel :
2016-01-09 14:51:17 +08:00
buf . Write ( removedCodePrefix )
2020-10-22 06:14:44 +08:00
buf . WriteString ( diff . Text )
2016-01-09 14:51:17 +08:00
buf . Write ( codeTagSuffix )
2016-01-04 05:26:46 +08:00
}
}
2022-07-23 19:28:02 +08:00
for range lineWrapperTags {
buf . WriteString ( "</span>" )
}
return buf . String ( )
2016-01-04 05:26:46 +08:00
}
2016-11-22 19:08:23 +08:00
// GetLine gets a specific line by type (add or del) and file line number
2016-01-08 20:50:25 +08:00
func ( diffSection * DiffSection ) GetLine ( lineType DiffLineType , idx int ) * DiffLine {
2016-08-08 00:49:47 +08:00
var (
difference = 0
addCount = 0
delCount = 0
matchDiffLine * DiffLine
)
LOOP :
2016-01-08 20:50:25 +08:00
for _ , diffLine := range diffSection . Lines {
2016-08-08 00:49:47 +08:00
switch diffLine . Type {
2016-11-08 00:24:59 +08:00
case DiffLineAdd :
2016-08-08 00:49:47 +08:00
addCount ++
2016-11-08 00:24:59 +08:00
case DiffLineDel :
2016-08-08 00:49:47 +08:00
delCount ++
default :
if matchDiffLine != nil {
break LOOP
}
2016-01-08 20:50:25 +08:00
difference = diffLine . RightIdx - diffLine . LeftIdx
2016-08-08 00:49:47 +08:00
addCount = 0
delCount = 0
2016-01-04 05:26:46 +08:00
}
2016-08-08 00:49:47 +08:00
switch lineType {
2016-11-08 00:24:59 +08:00
case DiffLineDel :
2016-01-09 14:51:17 +08:00
if diffLine . RightIdx == 0 && diffLine . LeftIdx == idx - difference {
2016-08-08 00:49:47 +08:00
matchDiffLine = diffLine
2016-01-08 20:50:25 +08:00
}
2016-11-08 00:24:59 +08:00
case DiffLineAdd :
2016-01-09 14:51:17 +08:00
if diffLine . LeftIdx == 0 && diffLine . RightIdx == idx + difference {
2016-08-08 00:49:47 +08:00
matchDiffLine = diffLine
2016-01-04 05:26:46 +08:00
}
}
}
2016-08-08 00:49:47 +08:00
if addCount == delCount {
return matchDiffLine
}
2016-01-04 05:26:46 +08:00
return nil
}
2016-08-08 00:49:47 +08:00
var diffMatchPatch = diffmatchpatch . New ( )
func init ( ) {
diffMatchPatch . DiffEditCost = 100
}
2022-01-07 09:18:52 +08:00
// DiffInline is a struct that has a content and escape status
type DiffInline struct {
2022-08-14 02:32:34 +08:00
EscapeStatus * charset . EscapeStatus
2022-01-07 09:18:52 +08:00
Content template . HTML
}
// DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped
2022-08-14 02:32:34 +08:00
func DiffInlineWithUnicodeEscape ( s template . HTML , locale translation . Locale ) DiffInline {
status , content := charset . EscapeControlHTML ( string ( s ) , locale )
2022-01-07 09:18:52 +08:00
return DiffInline { EscapeStatus : status , Content : template . HTML ( content ) }
}
// DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped
2022-08-14 02:32:34 +08:00
func DiffInlineWithHighlightCode ( fileName , language , code string , locale translation . Locale ) DiffInline {
status , content := charset . EscapeControlHTML ( highlight . Code ( fileName , language , code ) , locale )
2022-01-07 09:18:52 +08:00
return DiffInline { EscapeStatus : status , Content : template . HTML ( content ) }
}
2016-11-22 19:08:23 +08:00
// GetComputedInlineDiffFor computes inline diff for the given line.
2022-08-14 02:32:34 +08:00
func ( diffSection * DiffSection ) GetComputedInlineDiffFor ( diffLine * DiffLine , locale translation . Locale ) DiffInline {
2016-08-08 00:49:47 +08:00
if setting . Git . DisableDiffHighlight {
2022-08-14 02:32:34 +08:00
return getLineContent ( diffLine . Content [ 1 : ] , locale )
2016-01-28 04:54:08 +08:00
}
2020-07-01 05:34:03 +08:00
2016-08-08 00:49:47 +08:00
var (
compareDiffLine * DiffLine
diff1 string
diff2 string
)
2016-01-04 05:26:46 +08:00
2021-11-18 04:37:00 +08:00
language := ""
if diffSection . file != nil {
language = diffSection . file . Language
}
2016-01-28 04:54:08 +08:00
// try to find equivalent diff line. ignore, otherwise
2016-08-08 00:49:47 +08:00
switch diffLine . Type {
2020-07-01 05:34:03 +08:00
case DiffLineSection :
2022-08-14 02:32:34 +08:00
return getLineContent ( diffLine . Content [ 1 : ] , locale )
2016-11-08 00:24:59 +08:00
case DiffLineAdd :
compareDiffLine = diffSection . GetLine ( DiffLineDel , diffLine . RightIdx )
2016-01-28 04:54:08 +08:00
if compareDiffLine == nil {
2022-08-14 02:32:34 +08:00
return DiffInlineWithHighlightCode ( diffSection . FileName , language , diffLine . Content [ 1 : ] , locale )
2016-01-28 04:54:08 +08:00
}
diff1 = compareDiffLine . Content
diff2 = diffLine . Content
2016-11-08 00:24:59 +08:00
case DiffLineDel :
compareDiffLine = diffSection . GetLine ( DiffLineAdd , diffLine . LeftIdx )
2016-01-28 04:54:08 +08:00
if compareDiffLine == nil {
2022-08-14 02:32:34 +08:00
return DiffInlineWithHighlightCode ( diffSection . FileName , language , diffLine . Content [ 1 : ] , locale )
2016-01-04 05:26:46 +08:00
}
2016-01-28 04:54:08 +08:00
diff1 = diffLine . Content
diff2 = compareDiffLine . Content
2016-08-08 00:49:47 +08:00
default :
2019-06-25 04:23:52 +08:00
if strings . IndexByte ( " +-" , diffLine . Content [ 0 ] ) > - 1 {
2022-08-14 02:32:34 +08:00
return DiffInlineWithHighlightCode ( diffSection . FileName , language , diffLine . Content [ 1 : ] , locale )
2019-06-25 04:23:52 +08:00
}
2022-08-14 02:32:34 +08:00
return DiffInlineWithHighlightCode ( diffSection . FileName , language , diffLine . Content , locale )
2016-01-28 04:54:08 +08:00
}
2016-01-04 05:26:46 +08:00
2022-07-23 19:28:02 +08:00
hcd := newHighlightCodeDiff ( )
diffRecord := hcd . diffWithHighlight ( diffSection . FileName , language , diff1 [ 1 : ] , diff2 [ 1 : ] )
// it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
// if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)"
diffHTML := diffToHTML ( nil , diffRecord , diffLine . Type )
2022-08-14 02:32:34 +08:00
return DiffInlineWithUnicodeEscape ( template . HTML ( diffHTML ) , locale )
2016-01-04 05:26:46 +08:00
}
2016-11-24 16:30:08 +08:00
// DiffFile represents a file diff.
2014-04-13 09:35:36 +08:00
type DiffFile struct {
2022-05-08 02:28:10 +08:00
Name string
2022-05-09 06:29:50 +08:00
NameHash string
2022-05-08 02:28:10 +08:00
OldName string
Index int
Addition , Deletion int
Type DiffFileType
IsCreated bool
IsDeleted bool
IsBin bool
IsLFSFile bool
IsRenamed bool
IsAmbiguous bool
IsSubmodule bool
Sections [ ] * DiffSection
IsIncomplete bool
IsIncompleteLineTooLong bool
IsProtected bool
IsGenerated bool
IsVendored bool
IsViewed bool // User specific
HasChangedSinceLastReview bool // User specific
Language string
2014-04-13 09:35:36 +08:00
}
2016-11-22 19:08:23 +08:00
// GetType returns type of diff file.
2016-01-07 04:00:40 +08:00
func ( diffFile * DiffFile ) GetType ( ) int {
return int ( diffFile . Type )
}
2019-11-15 10:52:59 +08:00
// GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
func ( diffFile * DiffFile ) GetTailSection ( gitRepo * git . Repository , leftCommitID , rightCommitID string ) * DiffSection {
2020-01-22 15:20:58 +08:00
if len ( diffFile . Sections ) == 0 || diffFile . Type != DiffFileChange || diffFile . IsBin || diffFile . IsLFSFile {
2019-11-15 10:52:59 +08:00
return nil
}
leftCommit , err := gitRepo . GetCommit ( leftCommitID )
if err != nil {
return nil
}
rightCommit , err := gitRepo . GetCommit ( rightCommitID )
if err != nil {
return nil
}
lastSection := diffFile . Sections [ len ( diffFile . Sections ) - 1 ]
lastLine := lastSection . Lines [ len ( lastSection . Lines ) - 1 ]
leftLineCount := getCommitFileLineCount ( leftCommit , diffFile . Name )
rightLineCount := getCommitFileLineCount ( rightCommit , diffFile . Name )
if leftLineCount <= lastLine . LeftIdx || rightLineCount <= lastLine . RightIdx {
return nil
}
tailDiffLine := & DiffLine {
Type : DiffLineSection ,
Content : " " ,
SectionInfo : & DiffLineSectionInfo {
Path : diffFile . Name ,
LastLeftIdx : lastLine . LeftIdx ,
LastRightIdx : lastLine . RightIdx ,
LeftIdx : leftLineCount ,
RightIdx : rightLineCount ,
2022-01-21 01:46:10 +08:00
} ,
}
2020-07-01 05:34:03 +08:00
tailSection := & DiffSection { FileName : diffFile . Name , Lines : [ ] * DiffLine { tailDiffLine } }
2019-11-15 10:52:59 +08:00
return tailSection
}
2022-05-08 02:28:10 +08:00
// GetDiffFileName returns the name of the diff file, or its old name in case it was deleted
func ( diffFile * DiffFile ) GetDiffFileName ( ) string {
if diffFile . Name == "" {
return diffFile . OldName
}
return diffFile . Name
}
func ( diffFile * DiffFile ) ShouldBeHidden ( ) bool {
return diffFile . IsGenerated || diffFile . IsViewed
}
2019-11-15 10:52:59 +08:00
func getCommitFileLineCount ( commit * git . Commit , filePath string ) int {
blob , err := commit . GetBlobByPath ( filePath )
if err != nil {
return 0
}
lineCount , err := blob . GetBlobLineCount ( )
if err != nil {
return 0
}
return lineCount
}
2016-11-24 16:30:08 +08:00
// Diff represents a difference between two git trees.
2014-04-13 09:35:36 +08:00
type Diff struct {
2022-05-08 02:28:10 +08:00
Start , End string
NumFiles int
TotalAddition , TotalDeletion int
Files [ ] * DiffFile
IsIncomplete bool
NumViewedFiles int // user-specific
2014-04-13 09:35:36 +08:00
}
2018-08-06 12:43:22 +08:00
// LoadComments loads comments into each line
2022-06-13 17:37:59 +08:00
func ( diff * Diff ) LoadComments ( ctx context . Context , issue * issues_model . Issue , currentUser * user_model . User ) error {
allComments , err := issues_model . FetchCodeComments ( ctx , issue , currentUser )
2018-08-06 12:43:22 +08:00
if err != nil {
return err
}
for _ , file := range diff . Files {
if lineCommits , ok := allComments [ file . Name ] ; ok {
for _ , section := range file . Sections {
for _ , line := range section . Lines {
if comments , ok := lineCommits [ int64 ( line . LeftIdx * - 1 ) ] ; ok {
line . Comments = append ( line . Comments , comments ... )
}
if comments , ok := lineCommits [ int64 ( line . RightIdx ) ] ; ok {
line . Comments = append ( line . Comments , comments ... )
}
sort . SliceStable ( line . Comments , func ( i , j int ) bool {
return line . Comments [ i ] . CreatedUnix < line . Comments [ j ] . CreatedUnix
} )
}
}
}
}
return nil
}
2016-11-22 19:08:23 +08:00
const cmdDiffHead = "diff --git "
2014-04-13 09:35:36 +08:00
2020-10-17 01:13:18 +08:00
// ParsePatch builds a Diff object from a io.Reader and some parameters.
2021-11-20 21:50:00 +08:00
func ParsePatch ( maxLines , maxLineCharacters , maxFiles int , reader io . Reader , skipToFile string ) ( * Diff , error ) {
2021-11-22 00:51:08 +08:00
log . Debug ( "ParsePatch(%d, %d, %d, ..., %s)" , maxLines , maxLineCharacters , maxFiles , skipToFile )
2020-10-17 01:13:18 +08:00
var curFile * DiffFile
2021-11-20 21:50:00 +08:00
skipping := skipToFile != ""
2020-10-17 01:13:18 +08:00
diff := & Diff { Files : make ( [ ] * DiffFile , 0 ) }
sb := strings . Builder { }
// OK let's set a reasonable buffer size.
// This should be let's say at least the size of maxLineCharacters or 4096 whichever is larger.
readerSize := maxLineCharacters
if readerSize < 4096 {
readerSize = 4096
}
input := bufio . NewReaderSize ( reader , readerSize )
line , err := input . ReadString ( '\n' )
if err != nil {
if err == io . EOF {
return diff , nil
}
return diff , err
}
parsingLoop :
for {
// 1. A patch file always begins with `diff --git ` + `a/path b/path` (possibly quoted)
// if it does not we have bad input!
if ! strings . HasPrefix ( line , cmdDiffHead ) {
2021-11-20 21:50:00 +08:00
return diff , fmt . Errorf ( "invalid first file line: %s" , line )
2014-04-13 09:35:36 +08:00
}
2021-11-22 00:51:08 +08:00
if maxFiles > - 1 && len ( diff . Files ) >= maxFiles {
2021-10-16 00:05:33 +08:00
lastFile := createDiffFile ( diff , line )
diff . End = lastFile . Name
2020-10-17 01:13:18 +08:00
diff . IsIncomplete = true
2021-09-22 13:38:34 +08:00
_ , err := io . Copy ( io . Discard , reader )
2020-10-17 01:13:18 +08:00
if err != nil {
// By the definition of io.Copy this never returns io.EOF
2021-11-20 21:50:00 +08:00
return diff , fmt . Errorf ( "error during io.Copy: %w" , err )
2020-10-17 01:13:18 +08:00
}
break parsingLoop
}
2014-04-13 09:35:36 +08:00
2020-10-17 01:13:18 +08:00
curFile = createDiffFile ( diff , line )
2021-11-20 21:50:00 +08:00
if skipping {
if curFile . Name != skipToFile {
line , err = skipToNextDiffHead ( input )
if err != nil {
if err == io . EOF {
return diff , nil
}
return diff , err
}
continue
}
skipping = false
}
2020-10-17 01:13:18 +08:00
diff . Files = append ( diff . Files , curFile )
// 2. It is followed by one or more extended header lines:
//
// old mode <mode>
// new mode <mode>
// deleted file mode <mode>
// new file mode <mode>
// copy from <path>
// copy to <path>
// rename from <path>
// rename to <path>
// similarity index <number>
// dissimilarity index <number>
// index <hash>..<hash> <mode>
//
// * <mode> 6-digit octal numbers including the file type and file permission bits.
// * <path> does not include the a/ and b/ prefixes
// * <number> percentage of unchanged lines for similarity, percentage of changed
// lines dissimilarity as integer rounded down with terminal %. 100% => equal files.
// * The index line includes the blob object names before and after the change.
// The <mode> is included if the file mode does not change; otherwise, separate
// lines indicate the old and the new mode.
// 3. Following this header the "standard unified" diff format header may be encountered: (but not for every case...)
//
// --- a/<path>
// +++ b/<path>
//
// With multiple hunks
//
// @@ <hunk descriptor> @@
// +added line
// -removed line
// unchanged line
//
// 4. Binary files get:
//
// Binary files a/<path> and b/<path> differ
//
// but one of a/<path> and b/<path> could be /dev/null.
curFileLoop :
2017-11-29 07:22:24 +08:00
for {
2020-10-17 01:13:18 +08:00
line , err = input . ReadString ( '\n' )
2017-11-29 07:22:24 +08:00
if err != nil {
2020-10-17 01:13:18 +08:00
if err != io . EOF {
return diff , err
2017-11-29 07:22:24 +08:00
}
2020-10-17 01:13:18 +08:00
break parsingLoop
2017-11-29 07:22:24 +08:00
}
2020-10-17 01:13:18 +08:00
switch {
2020-10-22 07:08:44 +08:00
case strings . HasPrefix ( line , cmdDiffHead ) :
break curFileLoop
2020-10-17 01:13:18 +08:00
case strings . HasPrefix ( line , "old mode " ) ||
strings . HasPrefix ( line , "new mode " ) :
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
2021-02-28 02:46:14 +08:00
case strings . HasPrefix ( line , "rename from " ) :
curFile . IsRenamed = true
curFile . Type = DiffFileRename
if curFile . IsAmbiguous {
curFile . OldName = line [ len ( "rename from " ) : len ( line ) - 1 ]
}
case strings . HasPrefix ( line , "rename to " ) :
curFile . IsRenamed = true
curFile . Type = DiffFileRename
if curFile . IsAmbiguous {
curFile . Name = line [ len ( "rename to " ) : len ( line ) - 1 ]
curFile . IsAmbiguous = false
}
2020-10-17 01:13:18 +08:00
case strings . HasPrefix ( line , "copy from " ) :
curFile . IsRenamed = true
curFile . Type = DiffFileCopy
2021-02-28 02:46:14 +08:00
if curFile . IsAmbiguous {
curFile . OldName = line [ len ( "copy from " ) : len ( line ) - 1 ]
}
2020-10-17 01:13:18 +08:00
case strings . HasPrefix ( line , "copy to " ) :
curFile . IsRenamed = true
curFile . Type = DiffFileCopy
2021-02-28 02:46:14 +08:00
if curFile . IsAmbiguous {
curFile . Name = line [ len ( "copy to " ) : len ( line ) - 1 ]
curFile . IsAmbiguous = false
}
2020-10-17 01:13:18 +08:00
case strings . HasPrefix ( line , "new file" ) :
curFile . Type = DiffFileAdd
curFile . IsCreated = true
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
case strings . HasPrefix ( line , "deleted" ) :
curFile . Type = DiffFileDel
curFile . IsDeleted = true
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
case strings . HasPrefix ( line , "index" ) :
if strings . HasSuffix ( line , " 160000\n" ) {
curFile . IsSubmodule = true
}
case strings . HasPrefix ( line , "similarity index 100%" ) :
curFile . Type = DiffFileRename
case strings . HasPrefix ( line , "Binary" ) :
curFile . IsBin = true
case strings . HasPrefix ( line , "--- " ) :
2021-02-28 02:46:14 +08:00
// Handle ambiguous filenames
if curFile . IsAmbiguous {
2021-11-08 01:52:50 +08:00
// The shortest string that can end up here is:
2022-01-10 17:32:37 +08:00
// "--- a\t\n" without the quotes.
2021-11-08 01:52:50 +08:00
// This line has a len() of 7 but doesn't contain a oldName.
// So the amount that the line need is at least 8 or more.
// The code will otherwise panic for a out-of-bounds.
if len ( line ) > 7 && line [ 4 ] == 'a' {
2021-02-28 02:46:14 +08:00
curFile . OldName = line [ 6 : len ( line ) - 1 ]
if line [ len ( line ) - 2 ] == '\t' {
curFile . OldName = curFile . OldName [ : len ( curFile . OldName ) - 1 ]
}
} else {
curFile . OldName = ""
}
}
// Otherwise do nothing with this line
2020-10-17 01:13:18 +08:00
case strings . HasPrefix ( line , "+++ " ) :
2021-02-28 02:46:14 +08:00
// Handle ambiguous filenames
if curFile . IsAmbiguous {
if len ( line ) > 6 && line [ 4 ] == 'b' {
curFile . Name = line [ 6 : len ( line ) - 1 ]
if line [ len ( line ) - 2 ] == '\t' {
curFile . Name = curFile . Name [ : len ( curFile . Name ) - 1 ]
}
if curFile . OldName == "" {
curFile . OldName = curFile . Name
}
} else {
curFile . Name = curFile . OldName
}
curFile . IsAmbiguous = false
}
// Otherwise do nothing with this line, but now switch to parsing hunks
2020-10-17 01:13:18 +08:00
lineBytes , isFragment , err := parseHunks ( curFile , maxLines , maxLineCharacters , input )
diff . TotalAddition += curFile . Addition
diff . TotalDeletion += curFile . Deletion
if err != nil {
if err != io . EOF {
return diff , err
}
break parsingLoop
}
sb . Reset ( )
_ , _ = sb . Write ( lineBytes )
for isFragment {
lineBytes , isFragment , err = input . ReadLine ( )
if err != nil {
// Now by the definition of ReadLine this cannot be io.EOF
2021-11-20 21:50:00 +08:00
return diff , fmt . Errorf ( "unable to ReadLine: %w" , err )
2020-10-17 01:13:18 +08:00
}
_ , _ = sb . Write ( lineBytes )
}
line = sb . String ( )
sb . Reset ( )
break curFileLoop
2015-12-02 14:10:13 +08:00
}
}
2020-10-17 01:13:18 +08:00
}
2021-07-13 09:13:52 +08:00
// TODO: There are numerous issues with this:
2020-10-17 01:13:18 +08:00
// - we might want to consider detecting encoding while parsing but...
// - we're likely to fail to get the correct encoding here anyway as we won't have enough information
2022-01-21 01:46:10 +08:00
diffLineTypeBuffers := make ( map [ DiffLineType ] * bytes . Buffer , 3 )
diffLineTypeDecoders := make ( map [ DiffLineType ] * encoding . Decoder , 3 )
2021-07-13 09:13:52 +08:00
diffLineTypeBuffers [ DiffLinePlain ] = new ( bytes . Buffer )
diffLineTypeBuffers [ DiffLineAdd ] = new ( bytes . Buffer )
diffLineTypeBuffers [ DiffLineDel ] = new ( bytes . Buffer )
2020-10-17 01:13:18 +08:00
for _ , f := range diff . Files {
2022-05-09 06:29:50 +08:00
f . NameHash = base . EncodeSha1 ( f . Name )
2021-07-13 09:13:52 +08:00
for _ , buffer := range diffLineTypeBuffers {
buffer . Reset ( )
}
2020-10-17 01:13:18 +08:00
for _ , sec := range f . Sections {
for _ , l := range sec . Lines {
if l . Type == DiffLineSection {
continue
}
2021-07-13 09:13:52 +08:00
diffLineTypeBuffers [ l . Type ] . WriteString ( l . Content [ 1 : ] )
diffLineTypeBuffers [ l . Type ] . WriteString ( "\n" )
2020-10-17 01:13:18 +08:00
}
}
2021-07-13 09:13:52 +08:00
for lineType , buffer := range diffLineTypeBuffers {
diffLineTypeDecoders [ lineType ] = nil
if buffer . Len ( ) == 0 {
continue
}
charsetLabel , err := charset . DetectEncoding ( buffer . Bytes ( ) )
if charsetLabel != "UTF-8" && err == nil {
encoding , _ := stdcharset . Lookup ( charsetLabel )
if encoding != nil {
diffLineTypeDecoders [ lineType ] = encoding . NewDecoder ( )
}
}
}
for _ , sec := range f . Sections {
for _ , l := range sec . Lines {
decoder := diffLineTypeDecoders [ l . Type ]
if decoder != nil {
if c , _ , err := transform . String ( decoder , l . Content [ 1 : ] ) ; err == nil {
l . Content = l . Content [ 0 : 1 ] + c
2020-10-17 01:13:18 +08:00
}
}
}
2014-09-17 12:03:03 +08:00
}
2020-10-17 01:13:18 +08:00
}
2014-09-17 12:03:03 +08:00
2020-10-17 01:13:18 +08:00
diff . NumFiles = len ( diff . Files )
return diff , nil
}
2016-12-26 09:16:37 +08:00
2021-11-20 21:50:00 +08:00
func skipToNextDiffHead ( input * bufio . Reader ) ( line string , err error ) {
// need to skip until the next cmdDiffHead
2022-06-20 18:02:49 +08:00
var isFragment , wasFragment bool
2021-11-20 21:50:00 +08:00
var lineBytes [ ] byte
for {
lineBytes , isFragment , err = input . ReadLine ( )
if err != nil {
return
}
if wasFragment {
wasFragment = isFragment
continue
}
if bytes . HasPrefix ( lineBytes , [ ] byte ( cmdDiffHead ) ) {
break
}
wasFragment = isFragment
}
line = string ( lineBytes )
if isFragment {
var tail string
tail , err = input . ReadString ( '\n' )
if err != nil {
return
}
line += tail
}
2022-06-20 18:02:49 +08:00
return line , err
2021-11-20 21:50:00 +08:00
}
2020-10-17 01:13:18 +08:00
func parseHunks ( curFile * DiffFile , maxLines , maxLineCharacters int , input * bufio . Reader ) ( lineBytes [ ] byte , isFragment bool , err error ) {
sb := strings . Builder { }
2016-12-26 09:16:37 +08:00
2020-10-17 01:13:18 +08:00
var (
curSection * DiffSection
curFileLinesCount int
curFileLFSPrefix bool
)
2016-12-26 09:16:37 +08:00
2021-08-29 22:28:04 +08:00
lastLeftIdx := - 1
2020-10-17 01:13:18 +08:00
leftLine , rightLine := 1 , 1
2016-12-26 09:16:37 +08:00
2020-10-17 01:13:18 +08:00
for {
2020-11-22 06:41:24 +08:00
for isFragment {
curFile . IsIncomplete = true
2021-05-04 20:58:49 +08:00
curFile . IsIncompleteLineTooLong = true
2020-11-22 06:41:24 +08:00
_ , isFragment , err = input . ReadLine ( )
if err != nil {
// Now by the definition of ReadLine this cannot be io.EOF
2021-11-20 21:50:00 +08:00
err = fmt . Errorf ( "unable to ReadLine: %w" , err )
2020-11-22 06:41:24 +08:00
return
}
}
2020-10-17 01:13:18 +08:00
sb . Reset ( )
lineBytes , isFragment , err = input . ReadLine ( )
if err != nil {
if err == io . EOF {
return
2016-12-26 09:16:37 +08:00
}
2021-11-20 21:50:00 +08:00
err = fmt . Errorf ( "unable to ReadLine: %w" , err )
2020-10-17 01:13:18 +08:00
return
}
if lineBytes [ 0 ] == 'd' {
// End of hunks
return
2016-12-26 09:16:37 +08:00
}
2020-10-17 01:13:18 +08:00
switch lineBytes [ 0 ] {
case '@' :
2021-11-22 00:51:08 +08:00
if maxLines > - 1 && curFileLinesCount >= maxLines {
2020-10-17 01:13:18 +08:00
curFile . IsIncomplete = true
continue
}
2014-04-13 09:35:36 +08:00
2020-10-17 01:13:18 +08:00
_ , _ = sb . Write ( lineBytes )
for isFragment {
// This is very odd indeed - we're in a section header and the line is too long
// This really shouldn't happen...
lineBytes , isFragment , err = input . ReadLine ( )
if err != nil {
// Now by the definition of ReadLine this cannot be io.EOF
2021-11-20 21:50:00 +08:00
err = fmt . Errorf ( "unable to ReadLine: %w" , err )
2020-10-17 01:13:18 +08:00
return
}
_ , _ = sb . Write ( lineBytes )
}
line := sb . String ( )
// Create a new section to represent this hunk
2021-11-18 04:37:00 +08:00
curSection = & DiffSection { file : curFile }
2021-09-05 18:16:49 +08:00
lastLeftIdx = - 1
2014-04-13 09:35:36 +08:00
curFile . Sections = append ( curFile . Sections , curSection )
2020-10-17 01:13:18 +08:00
2020-01-24 01:28:15 +08:00
lineSectionInfo := getDiffLineSectionInfo ( curFile . Name , line , leftLine - 1 , rightLine - 1 )
2019-11-15 10:52:59 +08:00
diffLine := & DiffLine {
Type : DiffLineSection ,
Content : line ,
SectionInfo : lineSectionInfo ,
2015-07-29 22:55:01 +08:00
}
2019-11-15 10:52:59 +08:00
curSection . Lines = append ( curSection . Lines , diffLine )
2020-07-01 05:34:03 +08:00
curSection . FileName = curFile . Name
2019-11-15 10:52:59 +08:00
// update line number.
leftLine = lineSectionInfo . LeftIdx
rightLine = lineSectionInfo . RightIdx
2014-04-13 09:35:36 +08:00
continue
2020-10-17 01:13:18 +08:00
case '\\' :
2021-11-22 00:51:08 +08:00
if maxLines > - 1 && curFileLinesCount >= maxLines {
2020-10-17 01:13:18 +08:00
curFile . IsIncomplete = true
continue
}
// This is used only to indicate that the current file does not have a terminal newline
if ! bytes . Equal ( lineBytes , [ ] byte ( "\\ No newline at end of file" ) ) {
2021-11-20 21:50:00 +08:00
err = fmt . Errorf ( "unexpected line in hunk: %s" , string ( lineBytes ) )
2020-10-17 01:13:18 +08:00
return
}
// Technically this should be the end the file!
// FIXME: we should be putting a marker at the end of the file if there is no terminal new line
continue
case '+' :
curFileLinesCount ++
2014-04-13 09:35:36 +08:00
curFile . Addition ++
2021-11-22 00:51:08 +08:00
if maxLines > - 1 && curFileLinesCount >= maxLines {
2020-10-17 01:13:18 +08:00
curFile . IsIncomplete = true
continue
}
2021-08-29 22:28:04 +08:00
diffLine := & DiffLine { Type : DiffLineAdd , RightIdx : rightLine , Match : - 1 }
2014-04-13 09:35:36 +08:00
rightLine ++
2021-04-02 02:29:14 +08:00
if curSection == nil {
// Create a new section to represent this hunk
2021-11-18 04:37:00 +08:00
curSection = & DiffSection { file : curFile }
2021-04-02 02:29:14 +08:00
curFile . Sections = append ( curFile . Sections , curSection )
2021-09-05 18:16:49 +08:00
lastLeftIdx = - 1
2021-04-02 02:29:14 +08:00
}
2021-08-29 22:28:04 +08:00
if lastLeftIdx > - 1 {
diffLine . Match = lastLeftIdx
curSection . Lines [ lastLeftIdx ] . Match = len ( curSection . Lines )
lastLeftIdx ++
if lastLeftIdx >= len ( curSection . Lines ) || curSection . Lines [ lastLeftIdx ] . Type != DiffLineDel {
lastLeftIdx = - 1
}
}
2014-04-13 09:35:36 +08:00
curSection . Lines = append ( curSection . Lines , diffLine )
2020-10-17 01:13:18 +08:00
case '-' :
curFileLinesCount ++
2014-04-13 09:35:36 +08:00
curFile . Deletion ++
2021-11-22 00:51:08 +08:00
if maxLines > - 1 && curFileLinesCount >= maxLines {
2020-10-17 01:13:18 +08:00
curFile . IsIncomplete = true
continue
}
2021-08-29 22:28:04 +08:00
diffLine := & DiffLine { Type : DiffLineDel , LeftIdx : leftLine , Match : - 1 }
2014-04-13 09:35:36 +08:00
if leftLine > 0 {
leftLine ++
}
2021-04-02 02:29:14 +08:00
if curSection == nil {
// Create a new section to represent this hunk
2021-11-18 04:37:00 +08:00
curSection = & DiffSection { file : curFile }
2021-04-02 02:29:14 +08:00
curFile . Sections = append ( curFile . Sections , curSection )
2021-09-05 18:16:49 +08:00
lastLeftIdx = - 1
2021-04-02 02:29:14 +08:00
}
2021-08-29 22:28:04 +08:00
if len ( curSection . Lines ) == 0 || curSection . Lines [ len ( curSection . Lines ) - 1 ] . Type != DiffLineDel {
lastLeftIdx = len ( curSection . Lines )
}
2014-04-13 09:35:36 +08:00
curSection . Lines = append ( curSection . Lines , diffLine )
2020-10-17 01:13:18 +08:00
case ' ' :
curFileLinesCount ++
2021-11-22 00:51:08 +08:00
if maxLines > - 1 && curFileLinesCount >= maxLines {
2020-10-17 01:13:18 +08:00
curFile . IsIncomplete = true
continue
}
diffLine := & DiffLine { Type : DiffLinePlain , LeftIdx : leftLine , RightIdx : rightLine }
leftLine ++
rightLine ++
2021-08-29 22:28:04 +08:00
lastLeftIdx = - 1
2021-04-02 02:29:14 +08:00
if curSection == nil {
// Create a new section to represent this hunk
2021-11-18 04:37:00 +08:00
curSection = & DiffSection { file : curFile }
2021-04-02 02:29:14 +08:00
curFile . Sections = append ( curFile . Sections , curSection )
}
2020-10-17 01:13:18 +08:00
curSection . Lines = append ( curSection . Lines , diffLine )
default :
// This is unexpected
2021-11-20 21:50:00 +08:00
err = fmt . Errorf ( "unexpected line in hunk: %s" , string ( lineBytes ) )
2020-10-17 01:13:18 +08:00
return
2014-04-13 09:35:36 +08:00
}
2020-10-17 01:13:18 +08:00
line := string ( lineBytes )
if isFragment {
curFile . IsIncomplete = true
2021-05-04 20:58:49 +08:00
curFile . IsIncompleteLineTooLong = true
2020-10-17 01:13:18 +08:00
for isFragment {
lineBytes , isFragment , err = input . ReadLine ( )
2019-11-28 17:05:57 +08:00
if err != nil {
2020-10-17 01:13:18 +08:00
// Now by the definition of ReadLine this cannot be io.EOF
2021-11-20 21:50:00 +08:00
err = fmt . Errorf ( "unable to ReadLine: %w" , err )
2020-10-17 01:13:18 +08:00
return
2019-11-28 17:05:57 +08:00
}
}
2020-10-17 01:13:18 +08:00
}
2020-11-22 06:41:24 +08:00
if len ( line ) > maxLineCharacters {
curFile . IsIncomplete = true
2021-05-04 20:58:49 +08:00
curFile . IsIncompleteLineTooLong = true
2020-11-22 06:41:24 +08:00
line = line [ : maxLineCharacters ]
}
2020-10-17 01:13:18 +08:00
curSection . Lines [ len ( curSection . Lines ) - 1 ] . Content = line
2019-11-28 17:05:57 +08:00
2020-10-17 01:13:18 +08:00
// handle LFS
2021-04-09 06:25:57 +08:00
if line [ 1 : ] == lfs . MetaFileIdentifier {
2020-10-17 01:13:18 +08:00
curFileLFSPrefix = true
2021-04-09 06:25:57 +08:00
} else if curFileLFSPrefix && strings . HasPrefix ( line [ 1 : ] , lfs . MetaFileOidPrefix ) {
oid := strings . TrimPrefix ( line [ 1 : ] , lfs . MetaFileOidPrefix )
2020-10-17 01:13:18 +08:00
if len ( oid ) == 64 {
2022-06-12 23:51:54 +08:00
m := & git_model . LFSMetaObject { Pointer : lfs . Pointer { Oid : oid } }
2022-06-06 16:01:49 +08:00
count , err := db . CountByBean ( db . DefaultContext , m )
2015-12-02 14:10:13 +08:00
2020-10-17 01:13:18 +08:00
if err == nil && count > 0 {
curFile . IsBin = true
curFile . IsLFSFile = true
curSection . Lines = nil
2021-09-05 18:16:49 +08:00
lastLeftIdx = - 1
2014-04-13 09:35:36 +08:00
}
}
}
}
2020-10-17 01:13:18 +08:00
}
2014-04-13 09:35:36 +08:00
2020-10-17 01:13:18 +08:00
func createDiffFile ( diff * Diff , line string ) * DiffFile {
// The a/ and b/ filenames are the same unless rename/copy is involved.
// Especially, even for a creation or a deletion, /dev/null is not used
// in place of the a/ or b/ filenames.
//
// When rename/copy is involved, file1 and file2 show the name of the
// source file of the rename/copy and the name of the file that rename/copy
// produces, respectively.
//
// Path names are quoted if necessary.
//
// This means that you should always be able to determine the file name even when there
// there is potential ambiguity...
//
// but we can be simpler with our heuristics by just forcing git to prefix things nicely
curFile := & DiffFile {
Index : len ( diff . Files ) + 1 ,
Type : DiffFileChange ,
Sections : make ( [ ] * DiffSection , 0 , 10 ) ,
}
rd := strings . NewReader ( line [ len ( cmdDiffHead ) : ] + " " )
curFile . Type = DiffFileChange
2022-06-20 18:02:49 +08:00
var oldNameAmbiguity , newNameAmbiguity bool
2021-02-28 02:46:14 +08:00
curFile . OldName , oldNameAmbiguity = readFileName ( rd )
curFile . Name , newNameAmbiguity = readFileName ( rd )
if oldNameAmbiguity && newNameAmbiguity {
curFile . IsAmbiguous = true
// OK we should bet that the oldName and the newName are the same if they can be made to be same
// So we need to start again ...
if ( len ( line ) - len ( cmdDiffHead ) - 1 ) % 2 == 0 {
// diff --git a/b b/b b/b b/b b/b b/b
//
midpoint := ( len ( line ) + len ( cmdDiffHead ) - 1 ) / 2
new , old := line [ len ( cmdDiffHead ) : midpoint ] , line [ midpoint + 1 : ]
if len ( new ) > 2 && len ( old ) > 2 && new [ 2 : ] == old [ 2 : ] {
curFile . OldName = old [ 2 : ]
curFile . Name = old [ 2 : ]
}
}
}
2020-10-17 01:13:18 +08:00
curFile . IsRenamed = curFile . Name != curFile . OldName
return curFile
}
2021-02-28 02:46:14 +08:00
func readFileName ( rd * strings . Reader ) ( string , bool ) {
ambiguity := false
2020-10-17 01:13:18 +08:00
var name string
char , _ := rd . ReadByte ( )
_ = rd . UnreadByte ( )
if char == '"' {
fmt . Fscanf ( rd , "%q " , & name )
2021-11-08 01:52:50 +08:00
if len ( name ) == 0 {
2022-07-23 19:28:02 +08:00
log . Error ( "Reader has no file name: reader=%+v" , rd )
2021-11-08 01:52:50 +08:00
return "" , true
}
2020-10-17 01:13:18 +08:00
if name [ 0 ] == '\\' {
name = name [ 1 : ]
2014-12-09 15:18:25 +08:00
}
2020-10-17 01:13:18 +08:00
} else {
2021-02-28 02:46:14 +08:00
// This technique is potentially ambiguous it may not be possible to uniquely identify the filenames from the diff line alone
ambiguity = true
2020-10-17 01:13:18 +08:00
fmt . Fscanf ( rd , "%s " , & name )
2021-02-28 02:46:14 +08:00
char , _ := rd . ReadByte ( )
_ = rd . UnreadByte ( )
for ! ( char == 0 || char == '"' || char == 'b' ) {
var suffix string
fmt . Fscanf ( rd , "%s " , & suffix )
name += " " + suffix
char , _ = rd . ReadByte ( )
_ = rd . UnreadByte ( )
}
}
if len ( name ) < 2 {
2022-07-23 19:28:02 +08:00
log . Error ( "Unable to determine name from reader: reader=%+v" , rd )
2021-02-28 02:46:14 +08:00
return "" , true
2014-12-09 15:18:25 +08:00
}
2021-02-28 02:46:14 +08:00
return name [ 2 : ] , ambiguity
2014-04-13 09:35:36 +08:00
}
2021-11-22 00:51:08 +08:00
// DiffOptions represents the options for a DiffRange
type DiffOptions struct {
BeforeCommitID string
AfterCommitID string
SkipTo string
MaxLines int
MaxLineCharacters int
MaxFiles int
WhitespaceBehavior string
DirectComparison bool
}
// GetDiff builds a Diff between two commits of a repository.
2018-08-15 01:49:33 +08:00
// Passing the empty string as beforeCommitID returns a diff from the parent commit.
// The whitespaceBehavior is either an empty string or a git flag
2021-11-22 00:51:08 +08:00
func GetDiff ( gitRepo * git . Repository , opts * DiffOptions , files ... string ) ( * Diff , error ) {
2021-08-31 12:16:23 +08:00
repoPath := gitRepo . Path
2014-04-13 09:35:36 +08:00
2021-11-22 00:51:08 +08:00
commit , err := gitRepo . GetCommit ( opts . AfterCommitID )
2014-04-13 09:35:36 +08:00
if err != nil {
return nil , err
}
2021-10-16 00:05:33 +08:00
argsLength := 6
2021-11-22 00:51:08 +08:00
if len ( opts . WhitespaceBehavior ) > 0 {
2021-10-16 00:05:33 +08:00
argsLength ++
}
2021-11-22 00:51:08 +08:00
if len ( opts . SkipTo ) > 0 {
2021-10-16 00:05:33 +08:00
argsLength ++
}
2021-11-22 00:51:08 +08:00
if len ( files ) > 0 {
argsLength += len ( files ) + 1
}
2021-10-16 00:05:33 +08:00
diffArgs := make ( [ ] string , 0 , argsLength )
2021-11-22 00:51:08 +08:00
if ( len ( opts . BeforeCommitID ) == 0 || opts . BeforeCommitID == git . EmptySHA ) && commit . ParentCount ( ) == 0 {
2021-10-16 00:05:33 +08:00
diffArgs = append ( diffArgs , "diff" , "--src-prefix=\\a/" , "--dst-prefix=\\b/" , "-M" )
2021-11-22 00:51:08 +08:00
if len ( opts . WhitespaceBehavior ) != 0 {
diffArgs = append ( diffArgs , opts . WhitespaceBehavior )
2020-10-22 07:08:44 +08:00
}
// append empty tree ref
diffArgs = append ( diffArgs , "4b825dc642cb6eb9a060e54bf8d69288fbee4904" )
2021-11-22 00:51:08 +08:00
diffArgs = append ( diffArgs , opts . AfterCommitID )
2014-05-29 10:15:15 +08:00
} else {
2021-11-22 00:51:08 +08:00
actualBeforeCommitID := opts . BeforeCommitID
2018-08-15 01:49:33 +08:00
if len ( actualBeforeCommitID ) == 0 {
parentCommit , _ := commit . Parent ( 0 )
actualBeforeCommitID = parentCommit . ID . String ( )
}
2021-10-16 00:05:33 +08:00
diffArgs = append ( diffArgs , "diff" , "--src-prefix=\\a/" , "--dst-prefix=\\b/" , "-M" )
2021-11-22 00:51:08 +08:00
if len ( opts . WhitespaceBehavior ) != 0 {
diffArgs = append ( diffArgs , opts . WhitespaceBehavior )
2018-08-15 01:49:33 +08:00
}
diffArgs = append ( diffArgs , actualBeforeCommitID )
2021-11-22 00:51:08 +08:00
diffArgs = append ( diffArgs , opts . AfterCommitID )
opts . BeforeCommitID = actualBeforeCommitID
2014-04-13 09:35:36 +08:00
}
2021-11-20 21:50:00 +08:00
// In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
// so if we are using at least this version of git we don't have to tell ParsePatch to do
// the skipping for us
2021-11-22 00:51:08 +08:00
parsePatchSkipToFile := opts . SkipTo
if opts . SkipTo != "" && git . CheckGitVersionAtLeast ( "2.31" ) == nil {
diffArgs = append ( diffArgs , "--skip-to=" + opts . SkipTo )
2021-11-20 21:50:00 +08:00
parsePatchSkipToFile = ""
2021-10-16 00:05:33 +08:00
}
2021-11-22 00:51:08 +08:00
if len ( files ) > 0 {
diffArgs = append ( diffArgs , "--" )
diffArgs = append ( diffArgs , files ... )
}
2022-01-23 13:57:52 +08:00
reader , writer := io . Pipe ( )
defer func ( ) {
_ = reader . Close ( )
_ = writer . Close ( )
} ( )
go func ( ctx context . Context , diffArgs [ ] string , repoPath string , writer * io . PipeWriter ) {
2022-02-07 03:01:47 +08:00
cmd := git . NewCommand ( ctx , diffArgs ... )
2022-01-23 13:57:52 +08:00
cmd . SetDescription ( fmt . Sprintf ( "GetDiffRange [repo_path: %s]" , repoPath ) )
2022-04-01 10:55:30 +08:00
if err := cmd . Run ( & git . RunOpts {
2022-01-23 13:57:52 +08:00
Timeout : time . Duration ( setting . Git . Timeout . Default ) * time . Second ,
Dir : repoPath ,
Stderr : os . Stderr ,
Stdout : writer ,
} ) ; err != nil {
log . Error ( "error during RunWithContext: %w" , err )
}
2015-12-02 14:10:13 +08:00
2022-01-23 13:57:52 +08:00
_ = writer . Close ( )
} ( gitRepo . Ctx , diffArgs , repoPath , writer )
2014-07-07 05:32:36 +08:00
2022-01-23 13:57:52 +08:00
diff , err := ParsePatch ( opts . MaxLines , opts . MaxLineCharacters , opts . MaxFiles , reader , parsePatchSkipToFile )
2015-12-02 14:10:13 +08:00
if err != nil {
2021-11-20 21:50:00 +08:00
return nil , fmt . Errorf ( "unable to ParsePatch: %w" , err )
2015-12-02 14:10:13 +08:00
}
2021-11-22 00:51:08 +08:00
diff . Start = opts . SkipTo
2021-09-10 04:13:36 +08:00
2022-06-16 23:47:44 +08:00
checker , deferable := gitRepo . CheckAttributeReader ( opts . AfterCommitID )
defer deferable ( )
2021-09-10 04:13:36 +08:00
2019-11-15 10:52:59 +08:00
for _ , diffFile := range diff . Files {
2021-09-10 04:13:36 +08:00
gotVendor := false
gotGenerated := false
if checker != nil {
attrs , err := checker . CheckPath ( diffFile . Name )
if err == nil {
if vendored , has := attrs [ "linguist-vendored" ] ; has {
if vendored == "set" || vendored == "true" {
diffFile . IsVendored = true
gotVendor = true
} else {
gotVendor = vendored == "false"
}
}
if generated , has := attrs [ "linguist-generated" ] ; has {
if generated == "set" || generated == "true" {
diffFile . IsGenerated = true
gotGenerated = true
} else {
gotGenerated = generated == "false"
}
}
2021-11-18 04:37:00 +08:00
if language , has := attrs [ "linguist-language" ] ; has && language != "unspecified" && language != "" {
diffFile . Language = language
} else if language , has := attrs [ "gitlab-language" ] ; has && language != "unspecified" && language != "" {
diffFile . Language = language
}
2021-09-10 04:13:36 +08:00
}
}
if ! gotVendor {
diffFile . IsVendored = analyze . IsVendor ( diffFile . Name )
}
if ! gotGenerated {
diffFile . IsGenerated = analyze . IsGenerated ( diffFile . Name )
}
2021-11-22 00:51:08 +08:00
tailSection := diffFile . GetTailSection ( gitRepo , opts . BeforeCommitID , opts . AfterCommitID )
2019-11-15 10:52:59 +08:00
if tailSection != nil {
diffFile . Sections = append ( diffFile . Sections , tailSection )
}
}
2015-12-02 14:10:13 +08:00
2021-09-27 20:19:34 +08:00
separator := "..."
2021-11-22 00:51:08 +08:00
if opts . DirectComparison {
2021-09-27 20:19:34 +08:00
separator = ".."
}
2021-11-22 00:51:08 +08:00
shortstatArgs := [ ] string { opts . BeforeCommitID + separator + opts . AfterCommitID }
if len ( opts . BeforeCommitID ) == 0 || opts . BeforeCommitID == git . EmptySHA {
shortstatArgs = [ ] string { git . EmptyTreeSHA , opts . AfterCommitID }
2020-05-30 05:14:00 +08:00
}
2022-01-23 13:57:52 +08:00
diff . NumFiles , diff . TotalAddition , diff . TotalDeletion , err = git . GetDiffShortStat ( gitRepo . Ctx , repoPath , shortstatArgs ... )
2020-07-30 01:53:04 +08:00
if err != nil && strings . Contains ( err . Error ( ) , "no merge base" ) {
// git >= 2.28 now returns an error if base and head have become unrelated.
// previously it would return the results of git diff --shortstat base head so let's try that...
2021-11-22 00:51:08 +08:00
shortstatArgs = [ ] string { opts . BeforeCommitID , opts . AfterCommitID }
2022-01-23 13:57:52 +08:00
diff . NumFiles , diff . TotalAddition , diff . TotalDeletion , err = git . GetDiffShortStat ( gitRepo . Ctx , repoPath , shortstatArgs ... )
2020-07-30 01:53:04 +08:00
}
2020-05-26 13:58:07 +08:00
if err != nil {
return nil , err
}
2015-12-02 14:10:13 +08:00
return diff , nil
2014-04-13 09:35:36 +08:00
}
2016-07-30 23:02:22 +08:00
2022-05-08 02:28:10 +08:00
// SyncAndGetUserSpecificDiff is like GetDiff, except that user specific data such as which files the given user has already viewed on the given PR will also be set
// Additionally, the database asynchronously is updated if files have changed since the last review
2022-06-13 17:37:59 +08:00
func SyncAndGetUserSpecificDiff ( ctx context . Context , userID int64 , pull * issues_model . PullRequest , gitRepo * git . Repository , opts * DiffOptions , files ... string ) ( * Diff , error ) {
2022-05-08 02:28:10 +08:00
diff , err := GetDiff ( gitRepo , opts , files ... )
if err != nil {
return nil , err
}
review , err := pull_model . GetNewestReviewState ( ctx , userID , pull . ID )
if err != nil || review == nil || review . UpdatedFiles == nil {
return diff , err
}
latestCommit := opts . AfterCommitID
if latestCommit == "" {
latestCommit = pull . HeadBranch // opts.AfterCommitID is preferred because it handles PRs from forks correctly and the branch name doesn't
}
changedFiles , err := gitRepo . GetFilesChangedBetween ( review . CommitSHA , latestCommit )
if err != nil {
return diff , err
}
filesChangedSinceLastDiff := make ( map [ string ] pull_model . ViewedState )
outer :
for _ , diffFile := range diff . Files {
fileViewedState := review . UpdatedFiles [ diffFile . GetDiffFileName ( ) ]
// Check whether it was previously detected that the file has changed since the last review
if fileViewedState == pull_model . HasChanged {
diffFile . HasChangedSinceLastReview = true
continue
}
filename := diffFile . GetDiffFileName ( )
// Check explicitly whether the file has changed since the last review
for _ , changedFile := range changedFiles {
diffFile . HasChangedSinceLastReview = filename == changedFile
if diffFile . HasChangedSinceLastReview {
filesChangedSinceLastDiff [ filename ] = pull_model . HasChanged
continue outer // We don't want to check if the file is viewed here as that would fold the file, which is in this case unwanted
}
}
// Check whether the file has already been viewed
if fileViewedState == pull_model . Viewed {
diffFile . IsViewed = true
diff . NumViewedFiles ++
}
}
// Explicitly store files that have changed in the database, if any is present at all.
// This has the benefit that the "Has Changed" attribute will be present as long as the user does not explicitly mark this file as viewed, so it will even survive a page reload after marking another file as viewed.
// On the other hand, this means that even if a commit reverting an unseen change is committed, the file will still be seen as changed.
if len ( filesChangedSinceLastDiff ) > 0 {
err := pull_model . UpdateReviewState ( ctx , review . UserID , review . PullID , review . CommitSHA , filesChangedSinceLastDiff )
if err != nil {
log . Warn ( "Could not update review for user %d, pull %d, commit %s and the changed files %v: %v" , review . UserID , review . PullID , review . CommitSHA , filesChangedSinceLastDiff , err )
return nil , err
}
}
return diff , err
}
2019-09-06 10:20:09 +08:00
// CommentAsDiff returns c.Patch as *Diff
2022-06-13 17:37:59 +08:00
func CommentAsDiff ( c * issues_model . Comment ) ( * Diff , error ) {
2019-09-06 10:20:09 +08:00
diff , err := ParsePatch ( setting . Git . MaxGitDiffLines ,
2021-11-20 21:50:00 +08:00
setting . Git . MaxGitDiffLineCharacters , setting . Git . MaxGitDiffFiles , strings . NewReader ( c . Patch ) , "" )
2019-09-06 10:20:09 +08:00
if err != nil {
2021-02-28 02:46:14 +08:00
log . Error ( "Unable to parse patch: %v" , err )
2019-09-06 10:20:09 +08:00
return nil , err
}
if len ( diff . Files ) == 0 {
return nil , fmt . Errorf ( "no file found for comment ID: %d" , c . ID )
}
secs := diff . Files [ 0 ] . Sections
if len ( secs ) == 0 {
return nil , fmt . Errorf ( "no sections found for comment ID: %d" , c . ID )
}
return diff , nil
}
// CommentMustAsDiff executes AsDiff and logs the error instead of returning
2022-06-13 17:37:59 +08:00
func CommentMustAsDiff ( c * issues_model . Comment ) * Diff {
2021-03-22 00:59:58 +08:00
if c == nil {
return nil
}
defer func ( ) {
if err := recover ( ) ; err != nil {
log . Error ( "PANIC whilst retrieving diff for comment[%d] Error: %v\nStack: %s" , c . ID , err , log . Stack ( 2 ) )
}
} ( )
2019-09-06 10:20:09 +08:00
diff , err := CommentAsDiff ( c )
if err != nil {
log . Warn ( "CommentMustAsDiff: %v" , err )
}
return diff
}
2021-02-13 12:35:43 +08:00
// GetWhitespaceFlag returns git diff flag for treating whitespaces
2022-02-08 14:15:04 +08:00
func GetWhitespaceFlag ( whitespaceBehavior string ) string {
2021-02-13 12:35:43 +08:00
whitespaceFlags := map [ string ] string {
"ignore-all" : "-w" ,
"ignore-change" : "-b" ,
"ignore-eol" : "--ignore-space-at-eol" ,
2022-02-08 14:15:04 +08:00
"show-all" : "" ,
2022-01-21 01:46:10 +08:00
}
2021-02-13 12:35:43 +08:00
2022-02-08 14:15:04 +08:00
if flag , ok := whitespaceFlags [ whitespaceBehavior ] ; ok {
return flag
}
log . Warn ( "unknown whitespace behavior: %q, default to 'show-all'" , whitespaceBehavior )
return ""
2021-02-13 12:35:43 +08:00
}