func diffLineSet(orgLines, newLines []string, format string) { sort.Strings(orgLines) sort.Strings(newLines) _, matA, matB := ed.EditDistanceFFull(len(orgLines), len(newLines), func(iA, iB int) int { return tm.DiffOfStrings(orgLines[iA], newLines[iB], 4000) }, ed.ConstCost(1000), ed.ConstCost(1000)) for i, j := 0, 0; i < len(orgLines) || j < len(newLines); { switch { case j >= len(newLines) || i < len(orgLines) && matA[i] < 0: showDelLine(fmt.Sprintf(format, orgLines[i])) i++ case i >= len(orgLines) || j < len(newLines) && matB[j] < 0: showInsLine(fmt.Sprintf(format, newLines[j])) j++ default: if strings.TrimSpace(orgLines[i]) != strings.TrimSpace(newLines[j]) { showDiffLine(fmt.Sprintf(format, orgLines[i]), fmt.Sprintf(format, newLines[j])) } // if i++ j++ } } // for i, j }
func MatchTokens(delT, insT []string) (matA, matB []int) { delL, delR := nearChecks(delT) _, matA, matB = ed.EditDistanceFFull(len(delT), len(insT), func(iA, iB int) int { if delT[iA] == insT[iB] { c := 0 if delL[iA] { c += diffAt(delT, iA-1, insT, iB-1) } if delR[iA] { c += diffAt(delT, iA+1, insT, iB+1) } return c } // if return len(delT[iA]) + len(insT[iB]) + 5 }, func(iA int) int { if delT[iA] == " " { return 0 } // if return len(delT[iA]) + 2 }, func(iB int) int { if insT[iB] == " " { return 0 } // if return len(insT[iB]) + 2 }) delP, insP := findPairs(delT), findPairs(insT) alignPairs(matA, matB, delP, insP) return matA, matB }
func TestDiffLine(t *testing.T) { delT := tm.LineToTokens("func g(src string, dst string) error {") insT := tm.LineToTokens("func (m *monitor) g(gsp string) error {") _, matA, matB := ed.EditDistanceFFull(len(delT), len(insT), func(iA, iB int) int { if delT[iA] == insT[iB] { return 0 } // if return len(delT[iA]) + len(insT[iB]) + 1 }, func(iA int) int { if delT[iA] == " " { return 0 } // if return len(delT[iA]) }, func(iB int) int { if insT[iB] == " " { return 0 } // if return len(insT[iB]) }) ShowDelTokens(delT, matA, insT) ShowInsTokens(insT, matB, delT) }
func AssertStringsEqual(t *testing.T, name string, act, exp []string) { if villa.StringSlice(exp).Equals(act) { return } t.Errorf("%s unexpected(exp: %d lines, act %d lines)!", name, len(exp), len(act)) t.Logf("exp --- act +++") t.Logf("Difference:") _, matA, matB := ed.EditDistanceFFull(len(exp), len(act), func(iA, iB int) int { sa, sb := exp[iA], act[iB] if sa == sb { return 0 } return ed.String(sa, sb) }, func(iA int) int { return len(exp[iA]) + 1 }, func(iB int) int { return len(act[iB]) + 1 }) for i, j := 0, 0; i < len(exp) || j < len(act); { switch { case j >= len(act) || i < len(exp) && matA[i] < 0: t.Logf("--- %3d: %s", i+1, showText(exp[i])) i++ case i >= len(exp) || j < len(act) && matB[j] < 0: t.Logf("+++ %3d: %s", j+1, showText(act[j])) j++ default: if exp[i] != act[j] { t.Logf("--- %3d: %s", i+1, showText(exp[i])) t.Logf("+++ %3d: %s", j+1, showText(act[j])) } // else i++ j++ } } // for i, j }
func DiffLines(orgLines, newLines []string, format string) { if len(orgLines)+len(newLines) == 0 { return } // if _, matA, matB := ed.EditDistanceFFull(len(orgLines), len(newLines), func(iA, iB int) int { sa, sb := strings.TrimSpace(orgLines[iA]), strings.TrimSpace(newLines[iB]) // When sa and sb has 1/3 in common, convertion const is equal to del+ins const return tm.CalcDiffOfSourceLine(sa, sb, (len(sa)+len(sb))*120) }, func(iA int) int { return max(1, len(strings.TrimSpace(orgLines[iA]))*100) }, func(iB int) int { return max(1, len(strings.TrimSpace(newLines[iB]))*100) }) var lo lineOutput for i, j := 0, 0; i < len(orgLines) || j < len(newLines); { switch { case j >= len(newLines) || i < len(orgLines) && matA[i] < 0: lo.outputDel(fmt.Sprintf(format, orgLines[i])) i++ case i >= len(orgLines) || j < len(newLines) && matB[j] < 0: lo.outputIns(fmt.Sprintf(format, newLines[j])) j++ default: if strings.TrimSpace(orgLines[i]) != strings.TrimSpace(newLines[j]) { lo.outputChange(fmt.Sprintf(format, orgLines[i]), fmt.Sprintf(format, newLines[j])) } else { lo.outputSame(fmt.Sprintf(format, newLines[j])) } // else i++ j++ } } // for i, j lo.end() }
func diffLinesTo(orgLines, newLines []string, format string, lo lineOutputer) int { if len(orgLines)+len(newLines) == 0 { return 0 } start, orgEnd, newEnd := 0, len(orgLines), len(newLines) if len(orgLines)*len(newLines) > 1024*1024 { // Use trivial comparison to offset same head and tail lines. start, orgEnd, newEnd = offsetHeadTails(orgLines, newLines) } fastMode := false if len(orgLines)*len(newLines) > 1024*1024 { fastMode = true } _, matA, matB := ed.EditDistanceFFull(orgEnd-start, newEnd-start, func(iA, iB int) int { sa, sb := orgLines[iA+start], newLines[iB+start] rawEqual := sa == sb sa, sb = strings.TrimSpace(sa), strings.TrimSpace(sb) posCost := 0 if isBlockStart(sa) { posCost += (newEnd - start - 1 - iB) * 10 / (newEnd - start) } if rawEqual { return posCost } if sa == sb { return posCost + 1 } mx := (len(sa) + len(sb)) * 150 var dist int if fastMode && len(sa) > 10*len(sb) { dist = 100 * (len(sa) - len(sb)) } else if fastMode && len(sb) > 10*len(sa) { dist = 100 * (len(sb) - len(sa)) } else { // When sa and sb has 1/3 in common, convertion const is equal to del+ins const dist = tm.CalcDiffOfSourceLine(sa, sb, mx) } // Even a small change, both lines will be shown, so add a 20% penalty on that. return (dist*4+mx)/5 + 1 + posCost }, func(iA int) int { return mathp.MaxI(1, len(strings.TrimSpace(orgLines[iA+start]))*100) }, func(iB int) int { return mathp.MaxI(1, len(strings.TrimSpace(newLines[iB+start]))*100) }) cnt := 0 for i, j := 0, 0; i < len(orgLines) || j < len(newLines); { switch { case i < start || i >= orgEnd && j >= newEnd: // cut by offsetHeadTails lo.outputSame(fmt.Sprintf(format, newLines[j])) i++ j++ case j >= newEnd || i < orgEnd && matA[i-start] < 0: lo.outputDel(fmt.Sprintf(format, orgLines[i])) cnt++ i++ case i >= orgEnd || j < newEnd && matB[j-start] < 0: lo.outputIns(fmt.Sprintf(format, newLines[j])) cnt++ j++ default: if strings.TrimSpace(orgLines[i]) != strings.TrimSpace(newLines[j]) { lo.outputChange(fmt.Sprintf(format, orgLines[i]), fmt.Sprintf(format, newLines[j])) cnt += 2 } else { lo.outputSame(fmt.Sprintf(format, newLines[j])) } // else i++ j++ } } lo.end() return cnt }