// ExactMatchNaive is a basic string searching algorithm that handles case // sensitivity. Although naive, it still performs better than the combination // of strings.ToLower + strings.Index for typical fzf use cases where input // strings and patterns are not very long. // // We might try to implement better algorithms in the future: // http://en.wikipedia.org/wiki/String_searching_algorithm func ExactMatchNaive(caseSensitive bool, runes []rune, pattern []rune) (int, int) { if len(pattern) == 0 { return 0, 0 } numRunes := len(runes) plen := len(pattern) if numRunes < plen { return -1, -1 } pidx := 0 for index := 0; index < numRunes; index++ { char := runes[index] if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } if pattern[pidx] == char { pidx++ if pidx == plen { return index - plen + 1, index + 1 } } else { index -= pidx pidx = 0 } } return -1, -1 }
// EqualMatch performs equal-match func EqualMatch(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { lenPattern := len(pattern) if text.Length() != lenPattern { return Result{-1, -1, 0}, nil } match := true if normalize { runes := text.ToRunes() for idx, pchar := range pattern { char := runes[idx] if !caseSensitive { char = unicode.To(unicode.LowerCase, char) } if normalizeRune(pchar) != normalizeRune(char) { match = false break } } } else { runesStr := text.ToString() if !caseSensitive { runesStr = strings.ToLower(runesStr) } match = runesStr == string(pattern) } if match { return Result{0, lenPattern, (scoreMatch+bonusBoundary)*lenPattern + (bonusFirstCharMultiplier-1)*bonusBoundary}, nil } return Result{-1, -1, 0}, nil }
// Implement the same sorting criteria as V2 func calculateScore(caseSensitive bool, normalize bool, text util.Chars, pattern []rune, sidx int, eidx int, withPos bool) (int, *[]int) { pidx, score, inGap, consecutive, firstBonus := 0, 0, false, 0, int16(0) pos := posArray(withPos, len(pattern)) prevClass := charNonWord if sidx > 0 { prevClass = charClassOf(text.Get(sidx - 1)) } for idx := sidx; idx < eidx; idx++ { char := text.Get(idx) class := charClassOf(char) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } // pattern is already normalized if normalize { char = normalizeRune(char) } if char == pattern[pidx] { if withPos { *pos = append(*pos, idx) } score += scoreMatch bonus := bonusFor(prevClass, class) if consecutive == 0 { firstBonus = bonus } else { // Break consecutive chunk if bonus == bonusBoundary { firstBonus = bonus } bonus = util.Max16(util.Max16(bonus, firstBonus), bonusConsecutive) } if pidx == 0 { score += int(bonus * bonusFirstCharMultiplier) } else { score += int(bonus) } inGap = false consecutive++ pidx++ } else { if inGap { score += scoreGapExtention } else { score += scoreGapStart } inGap = true consecutive = 0 firstBonus = 0 } prevClass = class } return score, pos }
func Spell(w string) { last := len(w) - 1 for i, c := range w { c = unicode.To(unicode.LowerCase, c) if val, ok := Table[c]; ok { fmt.Print(val) } else { fmt.Print(string(c)) } if i != last { fmt.Print(" ") } } }
func normalizeReference(s string) string { var buf bytes.Buffer lastSpace := false for _, r := range s { if unicode.IsSpace(r) { if !lastSpace { buf.WriteByte(' ') lastSpace = true } continue } buf.WriteRune(unicode.To(unicode.LowerCase, r)) lastSpace = false } return string(bytes.TrimSpace(buf.Bytes())) }
// ExactMatchNaive is a basic string searching algorithm that handles case // sensitivity. Although naive, it still performs better than the combination // of strings.ToLower + strings.Index for typical fzf use cases where input // strings and patterns are not very long. // // We might try to implement better algorithms in the future: // http://en.wikipedia.org/wiki/String_searching_algorithm func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { if len(pattern) == 0 { return Result{0, 0, 0} } lenRunes := len(runes) lenPattern := len(pattern) if lenRunes < lenPattern { return Result{-1, -1, 0} } pidx := 0 for index := 0; index < lenRunes; index++ { char := runeAt(runes, index, lenRunes, forward) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := runeAt(pattern, pidx, lenPattern, forward) if pchar == char { pidx++ if pidx == lenPattern { var sidx, eidx int if forward { sidx = index - lenPattern + 1 eidx = index + 1 } else { sidx = lenRunes - (index + 1) eidx = lenRunes - (index - lenPattern + 1) } return Result{int32(sidx), int32(eidx), evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)} } } else { index -= pidx pidx = 0 } } return Result{-1, -1, 0} }
// ExactMatchNaive is a basic string searching algorithm that handles case // sensitivity. Although naive, it still performs better than the combination // of strings.ToLower + strings.Index for typical fzf use cases where input // strings and patterns are not very long. // // We might try to implement better algorithms in the future: // http://en.wikipedia.org/wiki/String_searching_algorithm func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) (int, int) { if len(pattern) == 0 { return 0, 0 } lenRunes := len(runes) lenPattern := len(pattern) if lenRunes < lenPattern { return -1, -1 } pidx := 0 for index := 0; index < lenRunes; index++ { char := runeAt(runes, index, lenRunes, forward) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := runeAt(pattern, pidx, lenPattern, forward) if pchar == char { pidx++ if pidx == lenPattern { if forward { return index - lenPattern + 1, index + 1 } return lenRunes - (index + 1), lenRunes - (index - lenPattern + 1) } } else { index -= pidx pidx = 0 } } return -1, -1 }
func ExampleTo() { const lcG = 'g' fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, lcG)) fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, lcG)) fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, lcG)) const ucG = 'G' fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, ucG)) fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, ucG)) fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, ucG)) // Output: // U+0047 'G' // U+0067 'g' // U+0047 'G' // U+0047 'G' // U+0067 'g' // U+0047 'G' }
func evaluateBonus(caseSensitive bool, runes []rune, pattern []rune, sidx int, eidx int) int32 { var bonus int32 pidx := 0 lenPattern := len(pattern) consecutive := false prevClass := charNonWord for index := 0; index < eidx; index++ { char := runes[index] var class charClass if unicode.IsLower(char) { class = charLower } else if unicode.IsUpper(char) { class = charUpper } else if unicode.IsLetter(char) { class = charLetter } else if unicode.IsNumber(char) { class = charNumber } else { class = charNonWord } var point int32 if prevClass == charNonWord && class != charNonWord { // Word boundary point = 2 } else if prevClass == charLower && class == charUpper || prevClass != charNumber && class == charNumber { // camelCase letter123 point = 1 } prevClass = class if index >= sidx { if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := pattern[pidx] if pchar == char { // Boost bonus for the first character in the pattern if pidx == 0 { point *= 2 } // Bonus to consecutive matching chars if consecutive { point++ } bonus += point if pidx++; pidx == lenPattern { break } consecutive = true } else { consecutive = false } } } return bonus }
// FuzzyMatch performs fuzzy-match func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result { if len(pattern) == 0 { return Result{0, 0, 0} } // 0. (FIXME) How to find the shortest match? // a_____b__c__abc // ^^^^^^^^^^ ^^^ // 1. forward scan (abc) // *-----*-----*> // a_____b___abc__ // 2. reverse scan (cba) // a_____b___abc__ // <*** pidx := 0 sidx := -1 eidx := -1 lenRunes := len(runes) lenPattern := len(pattern) for index := range runes { char := runeAt(runes, index, lenRunes, forward) // This is considerably faster than blindly applying strings.ToLower to the // whole string if !caseSensitive { // Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable // difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go // compiler as of now does not inline non-leaf functions.) if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := runeAt(pattern, pidx, lenPattern, forward) if char == pchar { if sidx < 0 { sidx = index } if pidx++; pidx == lenPattern { eidx = index + 1 break } } } if sidx >= 0 && eidx >= 0 { pidx-- for index := eidx - 1; index >= sidx; index-- { char := runeAt(runes, index, lenRunes, forward) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pchar := runeAt(pattern, pidx, lenPattern, forward) if char == pchar { if pidx--; pidx < 0 { sidx = index break } } } // Calculate the bonus. This can't be done at the same time as the // pattern scan above because 'forward' may be false. if !forward { sidx, eidx = lenRunes-eidx, lenRunes-sidx } return Result{int32(sidx), int32(eidx), evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)} } return Result{-1, -1, 0} }
// FuzzyMatch performs fuzzy-match func FuzzyMatch(caseSensitive bool, runes []rune, pattern []rune) (int, int) { if len(pattern) == 0 { return 0, 0 } // 0. (FIXME) How to find the shortest match? // a_____b__c__abc // ^^^^^^^^^^ ^^^ // 1. forward scan (abc) // *-----*-----*> // a_____b___abc__ // 2. reverse scan (cba) // a_____b___abc__ // <*** pidx := 0 sidx := -1 eidx := -1 for index, char := range runes { // This is considerably faster than blindly applying strings.ToLower to the // whole string if !caseSensitive { // Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable // difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go // compiler as of now does not inline non-leaf functions.) if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } if char == pattern[pidx] { if sidx < 0 { sidx = index } if pidx++; pidx == len(pattern) { eidx = index + 1 break } } } if sidx >= 0 && eidx >= 0 { pidx-- for index := eidx - 1; index >= sidx; index-- { char := runes[index] if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } if char == pattern[pidx] { if pidx--; pidx < 0 { sidx = index break } } } return sidx, eidx } return -1, -1 }
// ExactMatchNaive is a basic string searching algorithm that handles case // sensitivity. Although naive, it still performs better than the combination // of strings.ToLower + strings.Index for typical fzf use cases where input // strings and patterns are not very long. // // Since 0.15.0, this function searches for the match with the highest // bonus point, instead of stopping immediately after finding the first match. // The solution is much cheaper since there is only one possible alignment of // the pattern. func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { if len(pattern) == 0 { return Result{0, 0, 0}, nil } lenRunes := text.Length() lenPattern := len(pattern) if lenRunes < lenPattern { return Result{-1, -1, 0}, nil } // For simplicity, only look at the bonus at the first character position pidx := 0 bestPos, bonus, bestBonus := -1, int16(0), int16(-1) for index := 0; index < lenRunes; index++ { index_ := indexAt(index, lenRunes, forward) char := text.Get(index_) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } if normalize { char = normalizeRune(char) } pidx_ := indexAt(pidx, lenPattern, forward) pchar := pattern[pidx_] if pchar == char { if pidx_ == 0 { bonus = bonusAt(text, index_) } pidx++ if pidx == lenPattern { if bonus > bestBonus { bestPos, bestBonus = index, bonus } if bonus == bonusBoundary { break } index -= pidx - 1 pidx, bonus = 0, 0 } } else { index -= pidx pidx, bonus = 0, 0 } } if bestPos >= 0 { var sidx, eidx int if forward { sidx = bestPos - lenPattern + 1 eidx = bestPos + 1 } else { sidx = lenRunes - (bestPos + 1) eidx = lenRunes - (bestPos - lenPattern + 1) } score, _ := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, false) return Result{sidx, eidx, score}, nil } return Result{-1, -1, 0}, nil }
// FuzzyMatchV1 performs fuzzy-match func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { if len(pattern) == 0 { return Result{0, 0, 0}, nil } pidx := 0 sidx := -1 eidx := -1 lenRunes := text.Length() lenPattern := len(pattern) for index := 0; index < lenRunes; index++ { char := text.Get(indexAt(index, lenRunes, forward)) // This is considerably faster than blindly applying strings.ToLower to the // whole string if !caseSensitive { // Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable // difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go // compiler as of now does not inline non-leaf functions.) if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } if normalize { char = normalizeRune(char) } pchar := pattern[indexAt(pidx, lenPattern, forward)] if char == pchar { if sidx < 0 { sidx = index } if pidx++; pidx == lenPattern { eidx = index + 1 break } } } if sidx >= 0 && eidx >= 0 { pidx-- for index := eidx - 1; index >= sidx; index-- { tidx := indexAt(index, lenRunes, forward) char := text.Get(tidx) if !caseSensitive { if char >= 'A' && char <= 'Z' { char += 32 } else if char > unicode.MaxASCII { char = unicode.To(unicode.LowerCase, char) } } pidx_ := indexAt(pidx, lenPattern, forward) pchar := pattern[pidx_] if char == pchar { if pidx--; pidx < 0 { sidx = index break } } } if !forward { sidx, eidx = lenRunes-eidx, lenRunes-sidx } score, pos := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, withPos) return Result{sidx, eidx, score}, pos } return Result{-1, -1, 0}, nil }
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) { // Assume that pattern is given in lowercase if case-insensitive. // First check if there's a match and calculate bonus for each position. // If the input string is too long, consider finding the matching chars in // this phase as well (non-optimal alignment). N := input.Length() M := len(pattern) switch M { case 0: return Result{0, 0, 0}, posArray(withPos, M) case 1: return ExactMatchNaive(caseSensitive, normalize, forward, input, pattern[0:1], withPos, slab) } // Since O(nm) algorithm can be prohibitively expensive for large input, // we fall back to the greedy algorithm. if slab != nil && N*M > cap(slab.I16) { return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab) } // Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages offset16 := 0 offset32 := 0 // Bonus point for each position offset16, B := alloc16(offset16, slab, N, false) // The first occurrence of each character in the pattern offset32, F := alloc32(offset32, slab, M, false) // Rune array offset32, T := alloc32(offset32, slab, N, false) // Phase 1. Check if there's a match and calculate bonus for each point pidx, lastIdx, prevClass := 0, 0, charNonWord for idx := 0; idx < N; idx++ { char := input.Get(idx) var class charClass if char <= unicode.MaxASCII { class = charClassOfAscii(char) } else { class = charClassOfNonAscii(char) } if !caseSensitive && class == charUpper { if char <= unicode.MaxASCII { char += 32 } else { char = unicode.To(unicode.LowerCase, char) } } if normalize { char = normalizeRune(char) } T[idx] = char B[idx] = bonusFor(prevClass, class) prevClass = class if pidx < M { if char == pattern[pidx] { lastIdx = idx F[pidx] = int32(idx) pidx++ } } else { if char == pattern[M-1] { lastIdx = idx } } } if pidx != M { return Result{-1, -1, 0}, nil } // Phase 2. Fill in score matrix (H) // Unlike the original algorithm, we do not allow omission. width := lastIdx - int(F[0]) + 1 offset16, H := alloc16(offset16, slab, width*M, false) // Possible length of consecutive chunk at each position. offset16, C := alloc16(offset16, slab, width*M, false) maxScore, maxScorePos := int16(0), 0 for i := 0; i < M; i++ { I := i * width inGap := false for j := int(F[i]); j <= lastIdx; j++ { j0 := j - int(F[0]) var s1, s2, consecutive int16 if j > int(F[i]) { if inGap { s2 = H[I+j0-1] + scoreGapExtention } else { s2 = H[I+j0-1] + scoreGapStart } } if pattern[i] == T[j] { var diag int16 if i > 0 && j0 > 0 { diag = H[I-width+j0-1] } s1 = diag + scoreMatch b := B[j] if i > 0 { // j > 0 if i > 0 consecutive = C[I-width+j0-1] + 1 // Break consecutive chunk if b == bonusBoundary { consecutive = 1 } else if consecutive > 1 { b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1])) } } else { consecutive = 1 b *= bonusFirstCharMultiplier } if s1+b < s2 { s1 += B[j] consecutive = 0 } else { s1 += b } } C[I+j0] = consecutive inGap = s1 < s2 score := util.Max16(util.Max16(s1, s2), 0) if i == M-1 && (forward && score > maxScore || !forward && score >= maxScore) { maxScore, maxScorePos = score, j } H[I+j0] = score } if DEBUG { if i == 0 { fmt.Print(" ") for j := int(F[i]); j <= lastIdx; j++ { fmt.Printf(" " + string(input.Get(j)) + " ") } fmt.Println() } fmt.Print(string(pattern[i]) + " ") for idx := int(F[0]); idx < int(F[i]); idx++ { fmt.Print(" 0 ") } for idx := int(F[i]); idx <= lastIdx; idx++ { fmt.Printf("%2d ", H[i*width+idx-int(F[0])]) } fmt.Println() fmt.Print(" ") for idx, p := range C[I : I+width] { if idx+int(F[0]) < int(F[i]) { p = 0 } fmt.Printf("%2d ", p) } fmt.Println() } } // Phase 3. (Optional) Backtrace to find character positions pos := posArray(withPos, M) j := int(F[0]) if withPos { i := M - 1 j = maxScorePos preferMatch := true for { I := i * width j0 := j - int(F[0]) s := H[I+j0] var s1, s2 int16 if i > 0 && j >= int(F[i]) { s1 = H[I-width+j0-1] } if j > int(F[i]) { s2 = H[I+j0-1] } if s > s1 && (s > s2 || s == s2 && preferMatch) { *pos = append(*pos, j) if i == 0 { break } i-- } preferMatch = C[I+j0] > 1 || I+width+j0+1 < len(C) && C[I+width+j0+1] > 0 j-- } } // Start offset we return here is only relevant when begin tiebreak is used. // However finding the accurate offset requires backtracking, and we don't // want to pay extra cost for the option that has lost its importance. return Result{j, maxScorePos + 1, int(maxScore)}, pos }