// Implement the same sorting criteria as V2 func calculateScore(caseSensitive bool, normalize bool, text util.Chars, pattern []rune, sidx int, eidx int, withPos bool) (int, *[]int) { pidx, score, inGap, consecutive, firstBonus := 0, 0, false, 0, int16(0) pos := posArray(withPos, len(pattern)) prevClass := charNonWord if sidx > 0 { prevClass = charClassOf(text.Get(sidx - 1)) } for idx := sidx; idx < eidx; idx++ { char := text.Get(idx) class := charClassOf(char) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } // pattern is already normalized if normalize { char = normalizeRune(char) } if char == pattern[pidx] { if withPos { *pos = append(*pos, idx) } score += scoreMatch bonus := bonusFor(prevClass, class) if consecutive == 0 { firstBonus = bonus } else { // Break consecutive chunk if bonus == bonusBoundary { firstBonus = bonus } bonus = util.Max16(util.Max16(bonus, firstBonus), bonusConsecutive) } if pidx == 0 { score += int(bonus * bonusFirstCharMultiplier) } else { score += int(bonus) } inGap = false consecutive++ pidx++ } else { if inGap { score += scoreGapExtention } else { score += scoreGapStart } inGap = true consecutive = 0 firstBonus = 0 } prevClass = class } return score, pos }
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { // Assume that pattern is given in lowercase if case-insensitive. // First check if there's a match and calculate bonus for each position. // If the input string is too long, consider finding the matching chars in // this phase as well (non-optimal alignment). N := input.Length() M := len(pattern) switch M { case 0: return Result{0, 0, 0}, posArray(withPos, M) case 1: return ExactMatchNaive(caseSensitive, normalize, forward, input, pattern[0:1], withPos, slab) } // Since O(nm) algorithm can be prohibitively expensive for large input, // we fall back to the greedy algorithm. if slab != nil && N*M > cap(slab.I16) { return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab) } // Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages offset16 := 0 offset32 := 0 // Bonus point for each position offset16, B := alloc16(offset16, slab, N, false) // The first occurrence of each character in the pattern offset32, F := alloc32(offset32, slab, M, false) // Rune array offset32, T := alloc32(offset32, slab, N, false) // Phase 1. Check if there's a match and calculate bonus for each point pidx, lastIdx, prevClass := 0, 0, charNonWord for idx := 0; idx < N; idx++ { char := input.Get(idx) var class charClass if char <= unicode.MaxASCII { class = charClassOfAscii(char) } else { class = charClassOfNonAscii(char) } if !caseSensitive && class == charUpper { if char <= unicode.MaxASCII { char += 32 } else { char = unicode.To(unicode.LowerCase, char) } } if normalize { char = normalizeRune(char) } T[idx] = char B[idx] = bonusFor(prevClass, class) prevClass = class if pidx < M { if char == pattern[pidx] { lastIdx = idx F[pidx] = int32(idx) pidx++ } } else { if char == pattern[M-1] { lastIdx = idx } } } if pidx != M { return Result{-1, -1, 0}, nil } // Phase 2. Fill in score matrix (H) // Unlike the original algorithm, we do not allow omission. width := lastIdx - int(F[0]) + 1 offset16, H := alloc16(offset16, slab, width*M, false) // Possible length of consecutive chunk at each position. offset16, C := alloc16(offset16, slab, width*M, false) maxScore, maxScorePos := int16(0), 0 for i := 0; i < M; i++ { I := i * width inGap := false for j := int(F[i]); j <= lastIdx; j++ { j0 := j - int(F[0]) var s1, s2, consecutive int16 if j > int(F[i]) { if inGap { s2 = H[I+j0-1] + scoreGapExtention } else { s2 = H[I+j0-1] + scoreGapStart } } if pattern[i] == T[j] { var diag int16 if i > 0 && j0 > 0 { diag = H[I-width+j0-1] } s1 = diag + scoreMatch b := B[j] if i > 0 { // j > 0 if i > 0 consecutive = C[I-width+j0-1] + 1 // Break consecutive chunk if b == bonusBoundary { consecutive = 1 } else if consecutive > 1 { b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1])) } } else { consecutive = 1 b *= bonusFirstCharMultiplier } if s1+b < s2 { s1 += B[j] consecutive = 0 } else { s1 += b } } C[I+j0] = consecutive inGap = s1 < s2 score := util.Max16(util.Max16(s1, s2), 0) if i == M-1 && (forward && score > maxScore || !forward && score >= maxScore) { maxScore, maxScorePos = score, j } H[I+j0] = score } if DEBUG { if i == 0 { fmt.Print(" ") for j := int(F[i]); j <= lastIdx; j++ { fmt.Printf(" " + string(input.Get(j)) + " ") } fmt.Println() } fmt.Print(string(pattern[i]) + " ") for idx := int(F[0]); idx < int(F[i]); idx++ { fmt.Print(" 0 ") } for idx := int(F[i]); idx <= lastIdx; idx++ { fmt.Printf("%2d ", H[i*width+idx-int(F[0])]) } fmt.Println() fmt.Print(" ") for idx, p := range C[I : I+width] { if idx+int(F[0]) < int(F[i]) { p = 0 } fmt.Printf("%2d ", p) } fmt.Println() } } // Phase 3. (Optional) Backtrace to find character positions pos := posArray(withPos, M) j := int(F[0]) if withPos { i := M - 1 j = maxScorePos preferMatch := true for { I := i * width j0 := j - int(F[0]) s := H[I+j0] var s1, s2 int16 if i > 0 && j >= int(F[i]) { s1 = H[I-width+j0-1] } if j > int(F[i]) { s2 = H[I+j0-1] } if s > s1 && (s > s2 || s == s2 && preferMatch) { *pos = append(*pos, j) if i == 0 { break } i-- } preferMatch = C[I+j0] > 1 || I+width+j0+1 < len(C) && C[I+width+j0+1] > 0 j-- } } // Start offset we return here is only relevant when begin tiebreak is used. // However finding the accurate offset requires backtracking, and we don't // want to pay extra cost for the option that has lost its importance. return Result{j, maxScorePos + 1, int(maxScore)}, pos }