/
csvdiff.go
110 lines (94 loc) · 2.8 KB
/
csvdiff.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
package main
import (
"encoding/csv"
"fmt"
"io"
"log"
"os"
)
// Usage instructions to print if incorrect options or -h or --help are passed.
const Usage = "usage: csvdiff new_revision.csv old_revision.csv"
// The column index to use as the identifier (matching) column. This should be
// made into an option eventually.
const identifierColumn = 0
// Extracts filenames from command line arguments. Should be renamed to
// argumentParser and take other options as well.
func revisionFilenames() (filenameX string, filenameY string, err error) {
err = fmt.Errorf(Usage)
if len(os.Args) > 1 && (os.Args[1] == "-h" || os.Args[1] == "--help") {
err = fmt.Errorf(Usage)
return "", "", err
}
if len(os.Args) == 3 {
return os.Args[1], os.Args[2], nil
}
return "", "", err
}
// Takes two csv files and starts the diffing operation. Goes through revisionX
// comparing each row against every row in revisionY and then repeats the
// process in reverse.
func csvDiff(fileX, fileY io.Reader) (err error) {
revisionXRows, err := csv.NewReader(fileX).ReadAll()
revisionYRows, err := csv.NewReader(fileY).ReadAll()
for _, revisionXRow := range revisionXRows {
compareRowAgainstRows(revisionXRow, revisionYRows, true)
}
for _, revisionYRow := range revisionYRows {
compareRowAgainstRows(revisionYRow, revisionXRows, false)
}
return
}
// Takes a single row in one revision and compares every row in the other
// revision against that row. If changesAndAdditions is true it only records
// changes and additions, if it is false it only records removals.
func compareRowAgainstRows(row []string, rows [][]string, changesAndAdditions bool) {
match := false
// Ideally this algorithm would be more efficient by removing rows once they have been matched.
for _, comparisonRow := range rows {
if row[identifierColumn] == comparisonRow[identifierColumn] {
match = true
changed := false
for i := 1; i < len(row); i++ {
if row[i] != comparisonRow[i] {
changed = true
}
}
if changed && changesAndAdditions {
fmt.Println("Changed: ", comparisonRow, row)
}
return
}
}
if !match {
if changesAndAdditions {
fmt.Println("Added: ", row)
} else {
fmt.Println("Removed: ", row)
}
}
}
// Application starts here.
func main() {
filenameX, filenameY, err := revisionFilenames()
if err != nil {
fmt.Println(err)
os.Exit(1)
}
fileX := os.Stdin
if filenameX != "" {
if fileX, err = os.Open(filenameX); err != nil {
log.Fatal(err)
}
defer fileX.Close()
}
fileY := os.Stdin
if filenameY != "" {
if fileY, err = os.Open(filenameY); err != nil {
log.Fatal(err)
}
defer fileY.Close()
}
if err = csvDiff(fileX, fileY); err != nil {
log.Fatal(err)
}
}