func inner(t *testing.T, cases *[]TestCase, wantIdx int, opt ls_core.Options, sortIt bool) { for i, tc := range *cases { m := ls_core.New(WrapAsEqualer(tc.src, sortIt), WrapAsEqualer(tc.dst, sortIt), opt) got, relDist := m.Distance() _ = relDist // fmt.Printf("%v %v\n", got, relDist) ssrc := fmt.Sprintf("%v", tc.src) sdst := fmt.Sprintf("%v", tc.dst) if got != tc.distances[wantIdx] { t.Logf( "%2v: Distance between %20v and %20v should be %v - but got %v (sorted %v)", i, stringspb.Ellipsoider(ssrc, 8), stringspb.Ellipsoider(sdst, 8), tc.distances[wantIdx], got, sortIt) t.Fail() } m.Print() fmt.Printf("\n") es := m.EditScript() got2 := m.ApplyEditScript(es) if !m.CompareToCol(got2) { t.Logf("\nwnt %v \ngot %v ", WrapAsEqualer(tc.dst, sortIt), got2) t.Fail() } fmt.Printf("\n") fmt.Printf("\n") } }
func similarTextifiedTrees2(src *TextifiedTree, mp map[string][]*TextifiedTree, skipPrefix map[string]bool) { // srcE := word.WrapAsEqualer(string(src.Text), true) // ssrc as Equaler srcE := wordb.WrapAsEqualer(src.Text, true) srcLen := float64(len(src.Text)) for fnKey, tts := range mp { if fnKey == src.SourceID { pf(" to %v SKIP self\n", fnKey) continue } pf(" to %v\n", fnKey) cntr, br := 0, true for _, tt := range tts { // outl, text := tt.Outl, tt.Text if tt.Lvl > src.Lvl+levelsTolerance { break // since we are now sorted by lvl, we can this is safe } if tt.Lvl == src.Lvl || (tt.Lvl > src.Lvl && tt.Lvl <= src.Lvl+levelsTolerance) { // proceed } else { continue } if src.NumTokens < 1 { continue } if src.NumTokens < 5 && tt.NumTokens > 7 { continue } if HistoBasedDistance(src, tt) > 0.51 { breakMapsTooDistinct++ continue } relSize := srcLen / float64(util.Max(1, len(tt.Text))) if relSize < 0.33 || relSize > 3 { continue } absDist, relDist := 0, 0.0 if tt.NumTokens == src.NumTokens && len(tt.Text) == len(src.Text) && bytes.Equal(tt.Text, src.Text) { absDist, relDist = 0, 0.0 appliedCompare++ } else { dstE := wordb.WrapAsEqualer(tt.Text, true) // destinations as Equaler m := levenshtein.New(srcE, dstE, opt) absDist, relDist = m.Distance() appliedLevenshtein++ } // if relDist < 0.26 && absDist < 10 { if br { pf("\t") } sd := "" sd = string(tt.Text[:util.Min(2*excerptLen, len(tt.Text)-1)]) sd = stringspb.ToLen(sd, 2*excerptLen+1) pf("%12v %v %4v %5.2v ", tt.Outline, sd, absDist, relDist) cntr++ br = false sim := Similar{} sim.SourceID = fnKey sim.Lvl = tt.Lvl sim.Outline = tt.Outline sim.AbsLevenshtein = absDist sim.RelLevenshtein = relDist sim.Text = tt.Text src.Similars = append(src.Similars, sim) src.SumAbsLevenshtein += absDist src.SumRelLevenshtein += relDist if cntr%2 == 0 || cntr > 20 { pf("\n") br = true } if cntr > 20 { break } } } if !br { pf("\n") } } }