// EqualMatch performs equal-match func EqualMatch(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { lenPattern := len(pattern) if text.Length() != lenPattern { return Result{-1, -1, 0}, nil } match := true if normalize { runes := text.ToRunes() for idx, pchar := range pattern { char := runes[idx] if !caseSensitive { char = unicode.To(unicode.LowerCase, char) } if normalizeRune(pchar) != normalizeRune(char) { match = false break } } } else { runesStr := text.ToString() if !caseSensitive { runesStr = strings.ToLower(runesStr) } match = runesStr == string(pattern) } if match { return Result{0, lenPattern, (scoreMatch+bonusBoundary)*lenPattern + (bonusFirstCharMultiplier-1)*bonusBoundary}, nil } return Result{-1, -1, 0}, nil }
// SuffixMatch performs suffix-match func SuffixMatch(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { lenRunes := text.Length() trimmedLen := lenRunes - text.TrailingWhitespaces() if len(pattern) == 0 { return Result{trimmedLen, trimmedLen, 0}, nil } diff := trimmedLen - len(pattern) if diff < 0 { return Result{-1, -1, 0}, nil } for index, r := range pattern { char := text.Get(index + diff) if !caseSensitive { char = unicode.ToLower(char) } if normalize { char = normalizeRune(char) } if char != r { return Result{-1, -1, 0}, nil } } lenPattern := len(pattern) sidx := trimmedLen - lenPattern eidx := trimmedLen score, _ := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, false) return Result{sidx, eidx, score}, nil }
// PrefixMatch performs prefix-match func PrefixMatch(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { if len(pattern) == 0 { return Result{0, 0, 0}, nil } if text.Length() < len(pattern) { return Result{-1, -1, 0}, nil } for index, r := range pattern { char := text.Get(index) if !caseSensitive { char = unicode.ToLower(char) } if normalize { char = normalizeRune(char) } if char != r { return Result{-1, -1, 0}, nil } } lenPattern := len(pattern) score, _ := calculateScore(caseSensitive, normalize, text, pattern, 0, lenPattern, false) return Result{0, lenPattern, score}, nil }
func awkTokenizer(input util.Chars) ([]util.Chars, int) { // 9, 32 ret := []util.Chars{} prefixLength := 0 state := awkNil numChars := input.Length() begin := 0 end := 0 for idx := 0; idx < numChars; idx++ { r := input.Get(idx) white := r == 9 || r == 32 switch state { case awkNil: if white { prefixLength++ } else { state, begin, end = awkBlack, idx, idx+1 } case awkBlack: end = idx + 1 if white { state = awkWhite } case awkWhite: if white { end = idx + 1 } else { ret = append(ret, input.Slice(begin, end)) state, begin, end = awkBlack, idx, idx+1 } } } if begin < end { ret = append(ret, input.Slice(begin, end)) } return ret, prefixLength }
// ExactMatchNaive is a basic string searching algorithm that handles case // sensitivity. Although naive, it still performs better than the combination // of strings.ToLower + strings.Index for typical fzf use cases where input // strings and patterns are not very long. // // Since 0.15.0, this function searches for the match with the highest // bonus point, instead of stopping immediately after finding the first match. // The solution is much cheaper since there is only one possible alignment of // the pattern. func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { if len(pattern) == 0 { return Result{0, 0, 0}, nil } lenRunes := text.Length() lenPattern := len(pattern) if lenRunes < lenPattern { return Result{-1, -1, 0}, nil } // For simplicity, only look at the bonus at the first character position pidx := 0 bestPos, bonus, bestBonus := -1, int16(0), int16(-1) for index := 0; index < lenRunes; index++ { index_ := indexAt(index, lenRunes, forward) char := text.Get(index_) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } if normalize { char = normalizeRune(char) } pidx_ := indexAt(pidx, lenPattern, forward) pchar := pattern[pidx_] if pchar == char { if pidx_ == 0 { bonus = bonusAt(text, index_) } pidx++ if pidx == lenPattern { if bonus > bestBonus { bestPos, bestBonus = index, bonus } if bonus == bonusBoundary { break } index -= pidx - 1 pidx, bonus = 0, 0 } } else { index -= pidx pidx, bonus = 0, 0 } } if bestPos >= 0 { var sidx, eidx int if forward { sidx = bestPos - lenPattern + 1 eidx = bestPos + 1 } else { sidx = lenRunes - (bestPos + 1) eidx = lenRunes - (bestPos - lenPattern + 1) } score, _ := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, false) return Result{sidx, eidx, score}, nil } return Result{-1, -1, 0}, nil }
// FuzzyMatchV1 performs fuzzy-match func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { if len(pattern) == 0 { return Result{0, 0, 0}, nil } pidx := 0 sidx := -1 eidx := -1 lenRunes := text.Length() lenPattern := len(pattern) for index := 0; index < lenRunes; index++ { char := text.Get(indexAt(index, lenRunes, forward)) // This is considerably faster than blindly applying strings.ToLower to the // whole string if !caseSensitive { // Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable // difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go // compiler as of now does not inline non-leaf functions.) if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } if normalize { char = normalizeRune(char) } pchar := pattern[indexAt(pidx, lenPattern, forward)] if char == pchar { if sidx < 0 { sidx = index } if pidx++; pidx == lenPattern { eidx = index + 1 break } } } if sidx >= 0 && eidx >= 0 { pidx-- for index := eidx - 1; index >= sidx; index-- { tidx := indexAt(index, lenRunes, forward) char := text.Get(tidx) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pidx_ := indexAt(pidx, lenPattern, forward) pchar := pattern[pidx_] if char == pchar { if pidx--; pidx < 0 { sidx = index break } } } if !forward { sidx, eidx = lenRunes-eidx, lenRunes-sidx } score, pos := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, withPos) return Result{sidx, eidx, score}, pos } return Result{-1, -1, 0}, nil }
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { // Assume that pattern is given in lowercase if case-insensitive. // First check if there's a match and calculate bonus for each position. // If the input string is too long, consider finding the matching chars in // this phase as well (non-optimal alignment). N := input.Length() M := len(pattern) switch M { case 0: return Result{0, 0, 0}, posArray(withPos, M) case 1: return ExactMatchNaive(caseSensitive, normalize, forward, input, pattern[0:1], withPos, slab) } // Since O(nm) algorithm can be prohibitively expensive for large input, // we fall back to the greedy algorithm. if slab != nil && N*M > cap(slab.I16) { return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab) } // Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages offset16 := 0 offset32 := 0 // Bonus point for each position offset16, B := alloc16(offset16, slab, N, false) // The first occurrence of each character in the pattern offset32, F := alloc32(offset32, slab, M, false) // Rune array offset32, T := alloc32(offset32, slab, N, false) // Phase 1. Check if there's a match and calculate bonus for each point pidx, lastIdx, prevClass := 0, 0, charNonWord for idx := 0; idx < N; idx++ { char := input.Get(idx) var class charClass if char <= unicode.MaxASCII { class = charClassOfAscii(char) } else { class = charClassOfNonAscii(char) } if !caseSensitive && class == charUpper { if char <= unicode.MaxASCII { char += 32 } else { char = unicode.To(unicode.LowerCase, char) } } if normalize { char = normalizeRune(char) } T[idx] = char B[idx] = bonusFor(prevClass, class) prevClass = class if pidx < M { if char == pattern[pidx] { lastIdx = idx F[pidx] = int32(idx) pidx++ } } else { if char == pattern[M-1] { lastIdx = idx } } } if pidx != M { return Result{-1, -1, 0}, nil } // Phase 2. Fill in score matrix (H) // Unlike the original algorithm, we do not allow omission. width := lastIdx - int(F[0]) + 1 offset16, H := alloc16(offset16, slab, width*M, false) // Possible length of consecutive chunk at each position. offset16, C := alloc16(offset16, slab, width*M, false) maxScore, maxScorePos := int16(0), 0 for i := 0; i < M; i++ { I := i * width inGap := false for j := int(F[i]); j <= lastIdx; j++ { j0 := j - int(F[0]) var s1, s2, consecutive int16 if j > int(F[i]) { if inGap { s2 = H[I+j0-1] + scoreGapExtention } else { s2 = H[I+j0-1] + scoreGapStart } } if pattern[i] == T[j] { var diag int16 if i > 0 && j0 > 0 { diag = H[I-width+j0-1] } s1 = diag + scoreMatch b := B[j] if i > 0 { // j > 0 if i > 0 consecutive = C[I-width+j0-1] + 1 // Break consecutive chunk if b == bonusBoundary { consecutive = 1 } else if consecutive > 1 { b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1])) } } else { consecutive = 1 b *= bonusFirstCharMultiplier } if s1+b < s2 { s1 += B[j] consecutive = 0 } else { s1 += b } } C[I+j0] = consecutive inGap = s1 < s2 score := util.Max16(util.Max16(s1, s2), 0) if i == M-1 && (forward && score > maxScore || !forward && score >= maxScore) { maxScore, maxScorePos = score, j } H[I+j0] = score } if DEBUG { if i == 0 { fmt.Print(" ") for j := int(F[i]); j <= lastIdx; j++ { fmt.Printf(" " + string(input.Get(j)) + " ") } fmt.Println() } fmt.Print(string(pattern[i]) + " ") for idx := int(F[0]); idx < int(F[i]); idx++ { fmt.Print(" 0 ") } for idx := int(F[i]); idx <= lastIdx; idx++ { fmt.Printf("%2d ", H[i*width+idx-int(F[0])]) } fmt.Println() fmt.Print(" ") for idx, p := range C[I : I+width] { if idx+int(F[0]) < int(F[i]) { p = 0 } fmt.Printf("%2d ", p) } fmt.Println() } } // Phase 3. (Optional) Backtrace to find character positions pos := posArray(withPos, M) j := int(F[0]) if withPos { i := M - 1 j = maxScorePos preferMatch := true for { I := i * width j0 := j - int(F[0]) s := H[I+j0] var s1, s2 int16 if i > 0 && j >= int(F[i]) { s1 = H[I-width+j0-1] } if j > int(F[i]) { s2 = H[I+j0-1] } if s > s1 && (s > s2 || s == s2 && preferMatch) { *pos = append(*pos, j) if i == 0 { break } i-- } preferMatch = C[I+j0] > 1 || I+width+j0+1 < len(C) && C[I+width+j0+1] > 0 j-- } } // Start offset we return here is only relevant when begin tiebreak is used. // However finding the accurate offset requires backtracking, and we don't // want to pay extra cost for the option that has lost its importance. return Result{j, maxScorePos + 1, int(maxScore)}, pos }