// PrefixMatch performs prefix-match func PrefixMatch(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { if len(pattern) == 0 { return Result{0, 0, 0}, nil } if text.Length() < len(pattern) { return Result{-1, -1, 0}, nil } for index, r := range pattern { char := text.Get(index) if !caseSensitive { char = unicode.ToLower(char) } if normalize { char = normalizeRune(char) } if char != r { return Result{-1, -1, 0}, nil } } lenPattern := len(pattern) score, _ := calculateScore(caseSensitive, normalize, text, pattern, 0, lenPattern, false) return Result{0, lenPattern, score}, nil }
// Tokenize tokenizes the given string with the delimiter func Tokenize(text util.Chars, delimiter Delimiter) []Token { if delimiter.str == nil && delimiter.regex == nil { // AWK-style (\S+\s*) tokens, prefixLength := awkTokenizer(text) return withPrefixLengths(tokens, prefixLength) } if delimiter.str != nil { return withPrefixLengths(text.Split(*delimiter.str), 0) } // FIXME performance var tokens []string if delimiter.regex != nil { str := text.ToString() for len(str) > 0 { loc := delimiter.regex.FindStringIndex(str) if loc == nil { loc = []int{0, len(str)} } last := util.Max(loc[1], 1) tokens = append(tokens, str[:last]) str = str[last:] } } asRunes := make([]util.Chars, len(tokens)) for i, token := range tokens { asRunes[i] = util.RunesToChars([]rune(token)) } return withPrefixLengths(asRunes, 0) }
// Implement the same sorting criteria as V2 func calculateScore(caseSensitive bool, normalize bool, text util.Chars, pattern []rune, sidx int, eidx int, withPos bool) (int, *[]int) { pidx, score, inGap, consecutive, firstBonus := 0, 0, false, 0, int16(0) pos := posArray(withPos, len(pattern)) prevClass := charNonWord if sidx > 0 { prevClass = charClassOf(text.Get(sidx - 1)) } for idx := sidx; idx < eidx; idx++ { char := text.Get(idx) class := charClassOf(char) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } // pattern is already normalized if normalize { char = normalizeRune(char) } if char == pattern[pidx] { if withPos { *pos = append(*pos, idx) } score += scoreMatch bonus := bonusFor(prevClass, class) if consecutive == 0 { firstBonus = bonus } else { // Break consecutive chunk if bonus == bonusBoundary { firstBonus = bonus } bonus = util.Max16(util.Max16(bonus, firstBonus), bonusConsecutive) } if pidx == 0 { score += int(bonus * bonusFirstCharMultiplier) } else { score += int(bonus) } inGap = false consecutive++ pidx++ } else { if inGap { score += scoreGapExtention } else { score += scoreGapStart } inGap = true consecutive = 0 firstBonus = 0 } prevClass = class } return score, pos }
// EqualMatch performs equal-match func EqualMatch(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { lenPattern := len(pattern) if text.Length() != lenPattern { return Result{-1, -1, 0}, nil } match := true if normalize { runes := text.ToRunes() for idx, pchar := range pattern { char := runes[idx] if !caseSensitive { char = unicode.To(unicode.LowerCase, char) } if normalizeRune(pchar) != normalizeRune(char) { match = false break } } } else { runesStr := text.ToString() if !caseSensitive { runesStr = strings.ToLower(runesStr) } match = runesStr == string(pattern) } if match { return Result{0, lenPattern, (scoreMatch+bonusBoundary)*lenPattern + (bonusFirstCharMultiplier-1)*bonusBoundary}, nil } return Result{-1, -1, 0}, nil }
// SuffixMatch performs suffix-match func SuffixMatch(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { lenRunes := text.Length() trimmedLen := lenRunes - text.TrailingWhitespaces() if len(pattern) == 0 { return Result{trimmedLen, trimmedLen, 0}, nil } diff := trimmedLen - len(pattern) if diff < 0 { return Result{-1, -1, 0}, nil } for index, r := range pattern { char := text.Get(index + diff) if !caseSensitive { char = unicode.ToLower(char) } if normalize { char = normalizeRune(char) } if char != r { return Result{-1, -1, 0}, nil } } lenPattern := len(pattern) sidx := trimmedLen - lenPattern eidx := trimmedLen score, _ := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, false) return Result{sidx, eidx, score}, nil }
func awkTokenizer(input util.Chars) ([]util.Chars, int) { // 9, 32 ret := []util.Chars{} prefixLength := 0 state := awkNil numChars := input.Length() begin := 0 end := 0 for idx := 0; idx < numChars; idx++ { r := input.Get(idx) white := r == 9 || r == 32 switch state { case awkNil: if white { prefixLength++ } else { state, begin, end = awkBlack, idx, idx+1 } case awkBlack: end = idx + 1 if white { state = awkWhite } case awkWhite: if white { end = idx + 1 } else { ret = append(ret, input.Slice(begin, end)) state, begin, end = awkBlack, idx, idx+1 } } } if begin < end { ret = append(ret, input.Slice(begin, end)) } return ret, prefixLength }
// Transform is used to transform the input when --with-nth option is given func Transform(tokens []Token, withNth []Range) []Token { transTokens := make([]Token, len(withNth)) numTokens := len(tokens) for idx, r := range withNth { parts := []*util.Chars{} minIdx := 0 if r.begin == r.end { idx := r.begin if idx == rangeEllipsis { chars := util.RunesToChars(joinTokens(tokens)) parts = append(parts, &chars) } else { if idx < 0 { idx += numTokens + 1 } if idx >= 1 && idx <= numTokens { minIdx = idx - 1 parts = append(parts, tokens[idx-1].text) } } } else { var begin, end int if r.begin == rangeEllipsis { // ..N begin, end = 1, r.end if end < 0 { end += numTokens + 1 } } else if r.end == rangeEllipsis { // N.. begin, end = r.begin, numTokens if begin < 0 { begin += numTokens + 1 } } else { begin, end = r.begin, r.end if begin < 0 { begin += numTokens + 1 } if end < 0 { end += numTokens + 1 } } minIdx = util.Max(0, begin-1) for idx := begin; idx <= end; idx++ { if idx >= 1 && idx <= numTokens { parts = append(parts, tokens[idx-1].text) } } } // Merge multiple parts var merged util.Chars switch len(parts) { case 0: merged = util.RunesToChars([]rune{}) case 1: merged = *parts[0] default: runes := []rune{} for _, part := range parts { runes = append(runes, part.ToRunes()...) } merged = util.RunesToChars(runes) } var prefixLength int32 if minIdx < numTokens { prefixLength = tokens[minIdx].prefixLength } else { prefixLength = 0 } transTokens[idx] = Token{&merged, prefixLength, int32(merged.TrimLength())} } return transTokens }
// ExactMatchNaive is a basic string searching algorithm that handles case // sensitivity. Although naive, it still performs better than the combination // of strings.ToLower + strings.Index for typical fzf use cases where input // strings and patterns are not very long. // // Since 0.15.0, this function searches for the match with the highest // bonus point, instead of stopping immediately after finding the first match. // The solution is much cheaper since there is only one possible alignment of // the pattern. func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { if len(pattern) == 0 { return Result{0, 0, 0}, nil } lenRunes := text.Length() lenPattern := len(pattern) if lenRunes < lenPattern { return Result{-1, -1, 0}, nil } // For simplicity, only look at the bonus at the first character position pidx := 0 bestPos, bonus, bestBonus := -1, int16(0), int16(-1) for index := 0; index < lenRunes; index++ { index_ := indexAt(index, lenRunes, forward) char := text.Get(index_) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } if normalize { char = normalizeRune(char) } pidx_ := indexAt(pidx, lenPattern, forward) pchar := pattern[pidx_] if pchar == char { if pidx_ == 0 { bonus = bonusAt(text, index_) } pidx++ if pidx == lenPattern { if bonus > bestBonus { bestPos, bestBonus = index, bonus } if bonus == bonusBoundary { break } index -= pidx - 1 pidx, bonus = 0, 0 } } else { index -= pidx pidx, bonus = 0, 0 } } if bestPos >= 0 { var sidx, eidx int if forward { sidx = bestPos - lenPattern + 1 eidx = bestPos + 1 } else { sidx = lenRunes - (bestPos + 1) eidx = lenRunes - (bestPos - lenPattern + 1) } score, _ := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, false) return Result{sidx, eidx, score}, nil } return Result{-1, -1, 0}, nil }
// FuzzyMatchV1 performs fuzzy-match func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { if len(pattern) == 0 { return Result{0, 0, 0}, nil } pidx := 0 sidx := -1 eidx := -1 lenRunes := text.Length() lenPattern := len(pattern) for index := 0; index < lenRunes; index++ { char := text.Get(indexAt(index, lenRunes, forward)) // This is considerably faster than blindly applying strings.ToLower to the // whole string if !caseSensitive { // Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable // difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go // compiler as of now does not inline non-leaf functions.) if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } if normalize { char = normalizeRune(char) } pchar := pattern[indexAt(pidx, lenPattern, forward)] if char == pchar { if sidx < 0 { sidx = index } if pidx++; pidx == lenPattern { eidx = index + 1 break } } } if sidx >= 0 && eidx >= 0 { pidx-- for index := eidx - 1; index >= sidx; index-- { tidx := indexAt(index, lenRunes, forward) char := text.Get(tidx) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pidx_ := indexAt(pidx, lenPattern, forward) pchar := pattern[pidx_] if char == pchar { if pidx--; pidx < 0 { sidx = index break } } } if !forward { sidx, eidx = lenRunes-eidx, lenRunes-sidx } score, pos := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, withPos) return Result{sidx, eidx, score}, pos } return Result{-1, -1, 0}, nil }
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { // Assume that pattern is given in lowercase if case-insensitive. // First check if there's a match and calculate bonus for each position. // If the input string is too long, consider finding the matching chars in // this phase as well (non-optimal alignment). N := input.Length() M := len(pattern) switch M { case 0: return Result{0, 0, 0}, posArray(withPos, M) case 1: return ExactMatchNaive(caseSensitive, normalize, forward, input, pattern[0:1], withPos, slab) } // Since O(nm) algorithm can be prohibitively expensive for large input, // we fall back to the greedy algorithm. if slab != nil && N*M > cap(slab.I16) { return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab) } // Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages offset16 := 0 offset32 := 0 // Bonus point for each position offset16, B := alloc16(offset16, slab, N, false) // The first occurrence of each character in the pattern offset32, F := alloc32(offset32, slab, M, false) // Rune array offset32, T := alloc32(offset32, slab, N, false) // Phase 1. Check if there's a match and calculate bonus for each point pidx, lastIdx, prevClass := 0, 0, charNonWord for idx := 0; idx < N; idx++ { char := input.Get(idx) var class charClass if char <= unicode.MaxASCII { class = charClassOfAscii(char) } else { class = charClassOfNonAscii(char) } if !caseSensitive && class == charUpper { if char <= unicode.MaxASCII { char += 32 } else { char = unicode.To(unicode.LowerCase, char) } } if normalize { char = normalizeRune(char) } T[idx] = char B[idx] = bonusFor(prevClass, class) prevClass = class if pidx < M { if char == pattern[pidx] { lastIdx = idx F[pidx] = int32(idx) pidx++ } } else { if char == pattern[M-1] { lastIdx = idx } } } if pidx != M { return Result{-1, -1, 0}, nil } // Phase 2. Fill in score matrix (H) // Unlike the original algorithm, we do not allow omission. width := lastIdx - int(F[0]) + 1 offset16, H := alloc16(offset16, slab, width*M, false) // Possible length of consecutive chunk at each position. offset16, C := alloc16(offset16, slab, width*M, false) maxScore, maxScorePos := int16(0), 0 for i := 0; i < M; i++ { I := i * width inGap := false for j := int(F[i]); j <= lastIdx; j++ { j0 := j - int(F[0]) var s1, s2, consecutive int16 if j > int(F[i]) { if inGap { s2 = H[I+j0-1] + scoreGapExtention } else { s2 = H[I+j0-1] + scoreGapStart } } if pattern[i] == T[j] { var diag int16 if i > 0 && j0 > 0 { diag = H[I-width+j0-1] } s1 = diag + scoreMatch b := B[j] if i > 0 { // j > 0 if i > 0 consecutive = C[I-width+j0-1] + 1 // Break consecutive chunk if b == bonusBoundary { consecutive = 1 } else if consecutive > 1 { b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1])) } } else { consecutive = 1 b *= bonusFirstCharMultiplier } if s1+b < s2 { s1 += B[j] consecutive = 0 } else { s1 += b } } C[I+j0] = consecutive inGap = s1 < s2 score := util.Max16(util.Max16(s1, s2), 0) if i == M-1 && (forward && score > maxScore || !forward && score >= maxScore) { maxScore, maxScorePos = score, j } H[I+j0] = score } if DEBUG { if i == 0 { fmt.Print(" ") for j := int(F[i]); j <= lastIdx; j++ { fmt.Printf(" " + string(input.Get(j)) + " ") } fmt.Println() } fmt.Print(string(pattern[i]) + " ") for idx := int(F[0]); idx < int(F[i]); idx++ { fmt.Print(" 0 ") } for idx := int(F[i]); idx <= lastIdx; idx++ { fmt.Printf("%2d ", H[i*width+idx-int(F[0])]) } fmt.Println() fmt.Print(" ") for idx, p := range C[I : I+width] { if idx+int(F[0]) < int(F[i]) { p = 0 } fmt.Printf("%2d ", p) } fmt.Println() } } // Phase 3. (Optional) Backtrace to find character positions pos := posArray(withPos, M) j := int(F[0]) if withPos { i := M - 1 j = maxScorePos preferMatch := true for { I := i * width j0 := j - int(F[0]) s := H[I+j0] var s1, s2 int16 if i > 0 && j >= int(F[i]) { s1 = H[I-width+j0-1] } if j > int(F[i]) { s2 = H[I+j0-1] } if s > s1 && (s > s2 || s == s2 && preferMatch) { *pos = append(*pos, j) if i == 0 { break } i-- } preferMatch = C[I+j0] > 1 || I+width+j0+1 < len(C) && C[I+width+j0+1] > 0 j-- } } // Start offset we return here is only relevant when begin tiebreak is used. // However finding the accurate offset requires backtracking, and we don't // want to pay extra cost for the option that has lost its importance. return Result{j, maxScorePos + 1, int(maxScore)}, pos }
func bonusAt(input util.Chars, idx int) int16 { if idx == 0 { return bonusBoundary } return bonusFor(charClassOf(input.Get(idx-1)), charClassOf(input.Get(idx))) }