2021-03-30 04:44:28 +08:00
// Copyright 2021 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package gitdiff
import (
"encoding/csv"
"errors"
"io"
"code.gitea.io/gitea/modules/util"
)
const unmappedColumn = - 1
const maxRowsToInspect int = 10
const minRatioToMatch float32 = 0.8
// TableDiffCellType represents the type of a TableDiffCell.
type TableDiffCellType uint8
// TableDiffCellType possible values.
const (
2021-10-21 04:55:34 +08:00
TableDiffCellUnchanged TableDiffCellType = iota + 1
2021-03-30 04:44:28 +08:00
TableDiffCellChanged
TableDiffCellAdd
TableDiffCellDel
2021-10-21 04:55:34 +08:00
TableDiffCellMovedUnchanged
TableDiffCellMovedChanged
2021-03-30 04:44:28 +08:00
)
// TableDiffCell represents a cell of a TableDiffRow
type TableDiffCell struct {
LeftCell string
RightCell string
Type TableDiffCellType
}
// TableDiffRow represents a row of a TableDiffSection.
type TableDiffRow struct {
RowIdx int
Cells [ ] * TableDiffCell
}
// TableDiffSection represents a section of a DiffFile.
type TableDiffSection struct {
Rows [ ] * TableDiffRow
}
// csvReader wraps a csv.Reader which buffers the first rows.
type csvReader struct {
reader * csv . Reader
buffer [ ] [ ] string
line int
eof bool
}
2021-10-21 04:55:34 +08:00
// ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV
var ErrorUndefinedCell = errors . New ( "undefined cell" )
2021-03-30 04:44:28 +08:00
// createCsvReader creates a csvReader and fills the buffer
func createCsvReader ( reader * csv . Reader , bufferRowCount int ) ( * csvReader , error ) {
csv := & csvReader { reader : reader }
csv . buffer = make ( [ ] [ ] string , bufferRowCount )
for i := 0 ; i < bufferRowCount && ! csv . eof ; i ++ {
row , err := csv . readNextRow ( )
if err != nil {
return nil , err
}
csv . buffer [ i ] = row
}
csv . line = bufferRowCount
return csv , nil
}
// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
func ( csv * csvReader ) GetRow ( row int ) ( [ ] string , error ) {
2021-10-21 04:55:34 +08:00
if row < len ( csv . buffer ) && row >= 0 {
2021-03-30 04:44:28 +08:00
return csv . buffer [ row ] , nil
}
if csv . eof {
return nil , nil
}
for {
fields , err := csv . readNextRow ( )
if err != nil {
return nil , err
}
if csv . eof {
return nil , nil
}
csv . line ++
if csv . line - 1 == row {
return fields , nil
}
}
}
func ( csv * csvReader ) readNextRow ( ) ( [ ] string , error ) {
if csv . eof {
return nil , nil
}
row , err := csv . reader . Read ( )
if err != nil {
if err != io . EOF {
return nil , err
}
csv . eof = true
}
return row , nil
}
// CreateCsvDiff creates a tabular diff based on two CSV readers.
func CreateCsvDiff ( diffFile * DiffFile , baseReader * csv . Reader , headReader * csv . Reader ) ( [ ] * TableDiffSection , error ) {
if baseReader != nil && headReader != nil {
return createCsvDiff ( diffFile , baseReader , headReader )
}
if baseReader != nil {
return createCsvDiffSingle ( baseReader , TableDiffCellDel )
}
return createCsvDiffSingle ( headReader , TableDiffCellAdd )
}
// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted.
func createCsvDiffSingle ( reader * csv . Reader , celltype TableDiffCellType ) ( [ ] * TableDiffSection , error ) {
var rows [ ] * TableDiffRow
i := 1
for {
row , err := reader . Read ( )
if err != nil {
if err == io . EOF {
break
}
return nil , err
}
cells := make ( [ ] * TableDiffCell , len ( row ) )
for j := 0 ; j < len ( row ) ; j ++ {
2021-10-21 04:55:34 +08:00
if celltype == TableDiffCellDel {
cells [ j ] = & TableDiffCell { LeftCell : row [ j ] , Type : celltype }
} else {
cells [ j ] = & TableDiffCell { RightCell : row [ j ] , Type : celltype }
}
2021-03-30 04:44:28 +08:00
}
rows = append ( rows , & TableDiffRow { RowIdx : i , Cells : cells } )
i ++
}
return [ ] * TableDiffSection { { Rows : rows } } , nil
}
func createCsvDiff ( diffFile * DiffFile , baseReader * csv . Reader , headReader * csv . Reader ) ( [ ] * TableDiffSection , error ) {
2021-10-21 04:55:34 +08:00
// Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively
baseCSVReader , err := createCsvReader ( baseReader , maxRowsToInspect )
2021-03-30 04:44:28 +08:00
if err != nil {
return nil , err
}
2021-10-21 04:55:34 +08:00
headCSVReader , err := createCsvReader ( headReader , maxRowsToInspect )
2021-03-30 04:44:28 +08:00
if err != nil {
return nil , err
}
2021-10-21 04:55:34 +08:00
// Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns
a2bColMap , b2aColMap := getColumnMapping ( baseCSVReader , headCSVReader )
2021-03-30 04:44:28 +08:00
2021-10-21 04:55:34 +08:00
// Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base
numDiffTableCols := len ( a2bColMap ) + countUnmappedColumns ( b2aColMap )
if len ( a2bColMap ) < len ( b2aColMap ) {
numDiffTableCols = len ( b2aColMap ) + countUnmappedColumns ( a2bColMap )
2021-03-30 04:44:28 +08:00
}
2021-10-21 04:55:34 +08:00
// createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined)
// in the base or head respectively.
// Returns a TableDiffRow which has the row index
createDiffTableRow := func ( aLineNum int , bLineNum int ) ( * TableDiffRow , error ) {
// diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either
// the same size as the head table or bigger
diffTableCells := make ( [ ] * TableDiffCell , numDiffTableCols )
var bRow * [ ] string
if bLineNum > 0 {
row , err := headCSVReader . GetRow ( bLineNum - 1 )
2021-03-30 04:44:28 +08:00
if err != nil {
return nil , err
}
2021-10-21 04:55:34 +08:00
bRow = & row
2021-03-30 04:44:28 +08:00
}
2021-10-21 04:55:34 +08:00
var aRow * [ ] string
if aLineNum > 0 {
row , err := baseCSVReader . GetRow ( aLineNum - 1 )
if err != nil {
return nil , err
}
aRow = & row
2021-03-30 04:44:28 +08:00
}
2021-10-21 04:55:34 +08:00
if aRow == nil && bRow == nil {
// No content
2021-03-30 04:44:28 +08:00
return nil , nil
}
2021-10-21 04:55:34 +08:00
aIndex := 0 // tracks where we are in the a2bColMap
bIndex := 0 // tracks where we are in the b2aColMap
colsAdded := 0 // incremented whenever we found a column was added
colsDeleted := 0 // incrememted whenever a column was deleted
// We loop until both the aIndex and bIndex are greater than their col map, which then we are done
for aIndex < len ( a2bColMap ) || bIndex < len ( b2aColMap ) {
// Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex
for aIndex < len ( a2bColMap ) && a2bColMap [ aIndex ] == - 1 && ( bIndex >= len ( b2aColMap ) || aIndex <= bIndex ) {
var aCell string
if aRow != nil {
if cell , err := getCell ( * aRow , aIndex ) ; err != nil {
if err != ErrorUndefinedCell {
return nil , err
}
} else {
aCell = cell
}
2021-03-30 04:44:28 +08:00
}
2021-10-21 04:55:34 +08:00
diffTableCells [ bIndex + colsDeleted ] = & TableDiffCell { LeftCell : aCell , Type : TableDiffCellDel }
aIndex ++
colsDeleted ++
}
2021-03-30 04:44:28 +08:00
2021-10-21 04:55:34 +08:00
// aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former,
// we can just increment aIndex until it points to a -1 column or one greater than the current bIndex
for aIndex < len ( a2bColMap ) && a2bColMap [ aIndex ] != - 1 {
aIndex ++
2021-03-30 04:44:28 +08:00
}
2021-10-21 04:55:34 +08:00
// Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex
for bIndex < len ( b2aColMap ) && b2aColMap [ bIndex ] == - 1 && ( aIndex >= len ( a2bColMap ) || bIndex < aIndex ) {
var bCell string
cellType := TableDiffCellAdd
if bRow != nil {
if cell , err := getCell ( * bRow , bIndex ) ; err != nil {
if err != ErrorUndefinedCell {
return nil , err
}
} else {
bCell = cell
}
} else {
cellType = TableDiffCellDel
}
diffTableCells [ bIndex + colsDeleted ] = & TableDiffCell { RightCell : bCell , Type : cellType }
bIndex ++
colsAdded ++
}
// aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former,
// we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table
for bIndex < len ( b2aColMap ) && b2aColMap [ bIndex ] != - 1 && ( aIndex >= len ( a2bColMap ) || bIndex < aIndex ) {
var diffTableCell TableDiffCell
var aCell * string
// get the aCell value if the aRow exists
if aRow != nil {
if cell , err := getCell ( * aRow , b2aColMap [ bIndex ] ) ; err != nil {
if err != ErrorUndefinedCell {
return nil , err
}
} else {
aCell = & cell
diffTableCell . LeftCell = cell
}
} else {
diffTableCell . Type = TableDiffCellAdd
}
var bCell * string
// get the bCell value if the bRow exists
if bRow != nil {
if cell , err := getCell ( * bRow , bIndex ) ; err != nil {
if err != ErrorUndefinedCell {
return nil , err
}
} else {
bCell = & cell
diffTableCell . RightCell = cell
}
} else {
diffTableCell . Type = TableDiffCellDel
}
// if both a and b have a row that exists, compare the value and determine if the row has moved
if aCell != nil && bCell != nil {
moved := ( ( bIndex + colsDeleted ) != ( b2aColMap [ bIndex ] + colsAdded ) )
if * aCell != * bCell {
if moved {
diffTableCell . Type = TableDiffCellMovedChanged
} else {
diffTableCell . Type = TableDiffCellChanged
}
} else {
if moved {
diffTableCell . Type = TableDiffCellMovedUnchanged
} else {
diffTableCell . Type = TableDiffCellUnchanged
}
diffTableCell . LeftCell = ""
}
}
// Add the diff column to the diff row
diffTableCells [ bIndex + colsDeleted ] = & diffTableCell
bIndex ++
2021-03-30 04:44:28 +08:00
}
}
2021-10-21 04:55:34 +08:00
return & TableDiffRow { RowIdx : bLineNum , Cells : diffTableCells } , nil
2021-03-30 04:44:28 +08:00
}
2021-10-21 04:55:34 +08:00
// diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view
var diffTableSections [ ] * TableDiffSection
2021-03-30 04:44:28 +08:00
for i , section := range diffFile . Sections {
2021-10-21 04:55:34 +08:00
// Each section has multiple diffTableRows
var diffTableRows [ ] * TableDiffRow
2021-03-30 04:44:28 +08:00
lines := tryMergeLines ( section . Lines )
2021-10-21 04:55:34 +08:00
// Loop through the merged lines to get each row of the CSV diff table for this section
2021-03-30 04:44:28 +08:00
for j , line := range lines {
if i == 0 && j == 0 && ( line [ 0 ] != 1 || line [ 1 ] != 1 ) {
2021-10-21 04:55:34 +08:00
diffTableRow , err := createDiffTableRow ( 1 , 1 )
2021-03-30 04:44:28 +08:00
if err != nil {
return nil , err
}
2021-10-21 04:55:34 +08:00
if diffTableRow != nil {
diffTableRows = append ( diffTableRows , diffTableRow )
2021-03-30 04:44:28 +08:00
}
}
2021-10-21 04:55:34 +08:00
diffTableRow , err := createDiffTableRow ( line [ 0 ] , line [ 1 ] )
2021-03-30 04:44:28 +08:00
if err != nil {
return nil , err
}
2021-10-21 04:55:34 +08:00
if diffTableRow != nil {
diffTableRows = append ( diffTableRows , diffTableRow )
2021-03-30 04:44:28 +08:00
}
}
2021-10-21 04:55:34 +08:00
if len ( diffTableRows ) > 0 {
diffTableSections = append ( diffTableSections , & TableDiffSection { Rows : diffTableRows } )
2021-03-30 04:44:28 +08:00
}
}
2021-10-21 04:55:34 +08:00
return diffTableSections , nil
2021-03-30 04:44:28 +08:00
}
// getColumnMapping creates a mapping of columns between a and b
2021-10-21 04:55:34 +08:00
func getColumnMapping ( baseCSVReader * csvReader , headCSVReader * csvReader ) ( [ ] int , [ ] int ) {
baseRow , _ := baseCSVReader . GetRow ( 0 )
headRow , _ := headCSVReader . GetRow ( 0 )
2021-03-30 04:44:28 +08:00
2021-10-21 04:55:34 +08:00
base2HeadColMap := [ ] int { }
head2BaseColMap := [ ] int { }
2021-03-30 04:44:28 +08:00
2021-10-21 04:55:34 +08:00
if baseRow != nil {
base2HeadColMap = make ( [ ] int , len ( baseRow ) )
2021-03-30 04:44:28 +08:00
}
2021-10-21 04:55:34 +08:00
if headRow != nil {
head2BaseColMap = make ( [ ] int , len ( headRow ) )
2021-03-30 04:44:28 +08:00
}
2021-10-21 04:55:34 +08:00
// Initializes all head2base mappings to be unmappedColumn (-1)
for i := 0 ; i < len ( head2BaseColMap ) ; i ++ {
head2BaseColMap [ i ] = unmappedColumn
2021-03-30 04:44:28 +08:00
}
2021-10-21 04:55:34 +08:00
// Loops through the baseRow and see if there is a match in the head row
for i := 0 ; i < len ( baseRow ) ; i ++ {
base2HeadColMap [ i ] = unmappedColumn
baseCell , err := getCell ( baseRow , i )
if err == nil {
for j := 0 ; j < len ( headRow ) ; j ++ {
if head2BaseColMap [ j ] == - 1 {
headCell , err := getCell ( headRow , j )
if err == nil && baseCell == headCell {
base2HeadColMap [ i ] = j
head2BaseColMap [ j ] = i
break
}
2021-03-30 04:44:28 +08:00
}
}
}
}
2021-10-21 04:55:34 +08:00
tryMapColumnsByContent ( baseCSVReader , base2HeadColMap , headCSVReader , head2BaseColMap )
tryMapColumnsByContent ( headCSVReader , head2BaseColMap , baseCSVReader , base2HeadColMap )
2021-03-30 04:44:28 +08:00
2021-10-21 04:55:34 +08:00
return base2HeadColMap , head2BaseColMap
2021-03-30 04:44:28 +08:00
}
// tryMapColumnsByContent tries to map missing columns by the content of the first lines.
2021-10-21 04:55:34 +08:00
func tryMapColumnsByContent ( baseCSVReader * csvReader , base2HeadColMap [ ] int , headCSVReader * csvReader , head2BaseColMap [ ] int ) {
for i := 0 ; i < len ( base2HeadColMap ) ; i ++ {
headStart := 0
for base2HeadColMap [ i ] == unmappedColumn && headStart < len ( head2BaseColMap ) {
if head2BaseColMap [ headStart ] == unmappedColumn {
rows := util . Min ( maxRowsToInspect , util . Max ( 0 , util . Min ( len ( baseCSVReader . buffer ) , len ( headCSVReader . buffer ) ) - 1 ) )
2021-03-30 04:44:28 +08:00
same := 0
for j := 1 ; j <= rows ; j ++ {
2021-10-21 04:55:34 +08:00
baseCell , baseErr := getCell ( baseCSVReader . buffer [ j ] , i )
headCell , headErr := getCell ( headCSVReader . buffer [ j ] , headStart )
if baseErr == nil && headErr == nil && baseCell == headCell {
2021-03-30 04:44:28 +08:00
same ++
}
}
if ( float32 ( same ) / float32 ( rows ) ) > minRatioToMatch {
2021-10-21 04:55:34 +08:00
base2HeadColMap [ i ] = headStart
head2BaseColMap [ headStart ] = i
2021-03-30 04:44:28 +08:00
}
}
2021-10-21 04:55:34 +08:00
headStart ++
2021-03-30 04:44:28 +08:00
}
}
}
// getCell returns the specific cell or nil if not present.
func getCell ( row [ ] string , column int ) ( string , error ) {
if column < len ( row ) {
return row [ column ] , nil
}
2021-10-21 04:55:34 +08:00
return "" , ErrorUndefinedCell
2021-03-30 04:44:28 +08:00
}
// countUnmappedColumns returns the count of unmapped columns.
func countUnmappedColumns ( mapping [ ] int ) int {
count := 0
for i := 0 ; i < len ( mapping ) ; i ++ {
if mapping [ i ] == unmappedColumn {
count ++
}
}
return count
}
// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered.
func tryMergeLines ( lines [ ] * DiffLine ) [ ] [ 2 ] int {
ids := make ( [ ] [ 2 ] int , len ( lines ) )
i := 0
for _ , line := range lines {
if line . Type != DiffLineSection {
ids [ i ] [ 0 ] = line . LeftIdx
ids [ i ] [ 1 ] = line . RightIdx
i ++
}
}
ids = ids [ : i ]
result := make ( [ ] [ 2 ] int , len ( ids ) )
j := 0
for i = 0 ; i < len ( ids ) ; i ++ {
if ids [ i ] [ 0 ] == 0 {
if j > 0 && result [ j - 1 ] [ 1 ] == 0 {
temp := j
for temp > 0 && result [ temp - 1 ] [ 1 ] == 0 {
temp --
}
result [ temp ] [ 1 ] = ids [ i ] [ 1 ]
continue
}
}
result [ j ] = ids [ i ]
j ++
}
return result [ : j ]
}