Exemple #1
0
// ExactMatchNaive is a basic string searching algorithm that handles case
// sensitivity. Although naive, it still performs better than the combination
// of strings.ToLower + strings.Index for typical fzf use cases where input
// strings and patterns are not very long.
//
// We might try to implement better algorithms in the future:
// http://en.wikipedia.org/wiki/String_searching_algorithm
func ExactMatchNaive(caseSensitive bool, runes []rune, pattern []rune) (int, int) {
	if len(pattern) == 0 {
		return 0, 0
	}

	numRunes := len(runes)
	plen := len(pattern)
	if numRunes < plen {
		return -1, -1
	}

	pidx := 0
	for index := 0; index < numRunes; index++ {
		char := runes[index]
		if !caseSensitive {
			if char >= 'A' && char <= 'Z' {
				char += 32
			} else if char > unicode.MaxASCII {
				char = unicode.To(unicode.LowerCase, char)
			}
		}
		if pattern[pidx] == char {
			pidx++
			if pidx == plen {
				return index - plen + 1, index + 1
			}
		} else {
			index -= pidx
			pidx = 0
		}
	}
	return -1, -1
}
Exemple #2
0
// EqualMatch performs equal-match
func EqualMatch(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
	lenPattern := len(pattern)
	if text.Length() != lenPattern {
		return Result{-1, -1, 0}, nil
	}
	match := true
	if normalize {
		runes := text.ToRunes()
		for idx, pchar := range pattern {
			char := runes[idx]
			if !caseSensitive {
				char = unicode.To(unicode.LowerCase, char)
			}
			if normalizeRune(pchar) != normalizeRune(char) {
				match = false
				break
			}
		}
	} else {
		runesStr := text.ToString()
		if !caseSensitive {
			runesStr = strings.ToLower(runesStr)
		}
		match = runesStr == string(pattern)
	}
	if match {
		return Result{0, lenPattern, (scoreMatch+bonusBoundary)*lenPattern +
			(bonusFirstCharMultiplier-1)*bonusBoundary}, nil
	}
	return Result{-1, -1, 0}, nil
}
Exemple #3
0
// Implement the same sorting criteria as V2
func calculateScore(caseSensitive bool, normalize bool, text util.Chars, pattern []rune, sidx int, eidx int, withPos bool) (int, *[]int) {
	pidx, score, inGap, consecutive, firstBonus := 0, 0, false, 0, int16(0)
	pos := posArray(withPos, len(pattern))
	prevClass := charNonWord
	if sidx > 0 {
		prevClass = charClassOf(text.Get(sidx - 1))
	}
	for idx := sidx; idx < eidx; idx++ {
		char := text.Get(idx)
		class := charClassOf(char)
		if !caseSensitive {
			if char >= 'A' && char <= 'Z' {
				char += 32
			} else if char > unicode.MaxASCII {
				char = unicode.To(unicode.LowerCase, char)
			}
		}
		// pattern is already normalized
		if normalize {
			char = normalizeRune(char)
		}
		if char == pattern[pidx] {
			if withPos {
				*pos = append(*pos, idx)
			}
			score += scoreMatch
			bonus := bonusFor(prevClass, class)
			if consecutive == 0 {
				firstBonus = bonus
			} else {
				// Break consecutive chunk
				if bonus == bonusBoundary {
					firstBonus = bonus
				}
				bonus = util.Max16(util.Max16(bonus, firstBonus), bonusConsecutive)
			}
			if pidx == 0 {
				score += int(bonus * bonusFirstCharMultiplier)
			} else {
				score += int(bonus)
			}
			inGap = false
			consecutive++
			pidx++
		} else {
			if inGap {
				score += scoreGapExtention
			} else {
				score += scoreGapStart
			}
			inGap = true
			consecutive = 0
			firstBonus = 0
		}
		prevClass = class
	}
	return score, pos
}
Exemple #4
0
func Spell(w string) {
	last := len(w) - 1
	for i, c := range w {
		c = unicode.To(unicode.LowerCase, c)
		if val, ok := Table[c]; ok {
			fmt.Print(val)
		} else {
			fmt.Print(string(c))
		}
		if i != last {
			fmt.Print(" ")
		}
	}
}
Exemple #5
0
func normalizeReference(s string) string {
	var buf bytes.Buffer
	lastSpace := false
	for _, r := range s {
		if unicode.IsSpace(r) {
			if !lastSpace {
				buf.WriteByte(' ')
				lastSpace = true
			}
			continue
		}

		buf.WriteRune(unicode.To(unicode.LowerCase, r))
		lastSpace = false
	}

	return string(bytes.TrimSpace(buf.Bytes()))
}
Exemple #6
0
// ExactMatchNaive is a basic string searching algorithm that handles case
// sensitivity. Although naive, it still performs better than the combination
// of strings.ToLower + strings.Index for typical fzf use cases where input
// strings and patterns are not very long.
//
// We might try to implement better algorithms in the future:
// http://en.wikipedia.org/wiki/String_searching_algorithm
func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result {
	if len(pattern) == 0 {
		return Result{0, 0, 0}
	}

	lenRunes := len(runes)
	lenPattern := len(pattern)

	if lenRunes < lenPattern {
		return Result{-1, -1, 0}
	}

	pidx := 0
	for index := 0; index < lenRunes; index++ {
		char := runeAt(runes, index, lenRunes, forward)
		if !caseSensitive {
			if char >= 'A' && char <= 'Z' {
				char += 32
			} else if char > unicode.MaxASCII {
				char = unicode.To(unicode.LowerCase, char)
			}
		}
		pchar := runeAt(pattern, pidx, lenPattern, forward)
		if pchar == char {
			pidx++
			if pidx == lenPattern {
				var sidx, eidx int
				if forward {
					sidx = index - lenPattern + 1
					eidx = index + 1
				} else {
					sidx = lenRunes - (index + 1)
					eidx = lenRunes - (index - lenPattern + 1)
				}
				return Result{int32(sidx), int32(eidx),
					evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)}
			}
		} else {
			index -= pidx
			pidx = 0
		}
	}
	return Result{-1, -1, 0}
}
Exemple #7
0
// ExactMatchNaive is a basic string searching algorithm that handles case
// sensitivity. Although naive, it still performs better than the combination
// of strings.ToLower + strings.Index for typical fzf use cases where input
// strings and patterns are not very long.
//
// We might try to implement better algorithms in the future:
// http://en.wikipedia.org/wiki/String_searching_algorithm
func ExactMatchNaive(caseSensitive bool, forward bool, runes []rune, pattern []rune) (int, int) {
	if len(pattern) == 0 {
		return 0, 0
	}

	lenRunes := len(runes)
	lenPattern := len(pattern)

	if lenRunes < lenPattern {
		return -1, -1
	}

	pidx := 0
	for index := 0; index < lenRunes; index++ {
		char := runeAt(runes, index, lenRunes, forward)
		if !caseSensitive {
			if char >= 'A' && char <= 'Z' {
				char += 32
			} else if char > unicode.MaxASCII {
				char = unicode.To(unicode.LowerCase, char)
			}
		}
		pchar := runeAt(pattern, pidx, lenPattern, forward)
		if pchar == char {
			pidx++
			if pidx == lenPattern {
				if forward {
					return index - lenPattern + 1, index + 1
				}
				return lenRunes - (index + 1), lenRunes - (index - lenPattern + 1)
			}
		} else {
			index -= pidx
			pidx = 0
		}
	}
	return -1, -1
}
Exemple #8
0
func ExampleTo() {
	const lcG = 'g'
	fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, lcG))
	fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, lcG))
	fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, lcG))

	const ucG = 'G'
	fmt.Printf("%#U\n", unicode.To(unicode.UpperCase, ucG))
	fmt.Printf("%#U\n", unicode.To(unicode.LowerCase, ucG))
	fmt.Printf("%#U\n", unicode.To(unicode.TitleCase, ucG))

	// Output:
	// U+0047 'G'
	// U+0067 'g'
	// U+0047 'G'
	// U+0047 'G'
	// U+0067 'g'
	// U+0047 'G'
}
Exemple #9
0
func evaluateBonus(caseSensitive bool, runes []rune, pattern []rune, sidx int, eidx int) int32 {
	var bonus int32
	pidx := 0
	lenPattern := len(pattern)
	consecutive := false
	prevClass := charNonWord
	for index := 0; index < eidx; index++ {
		char := runes[index]
		var class charClass
		if unicode.IsLower(char) {
			class = charLower
		} else if unicode.IsUpper(char) {
			class = charUpper
		} else if unicode.IsLetter(char) {
			class = charLetter
		} else if unicode.IsNumber(char) {
			class = charNumber
		} else {
			class = charNonWord
		}

		var point int32
		if prevClass == charNonWord && class != charNonWord {
			// Word boundary
			point = 2
		} else if prevClass == charLower && class == charUpper ||
			prevClass != charNumber && class == charNumber {
			// camelCase letter123
			point = 1
		}
		prevClass = class

		if index >= sidx {
			if !caseSensitive {
				if char >= 'A' && char <= 'Z' {
					char += 32
				} else if char > unicode.MaxASCII {
					char = unicode.To(unicode.LowerCase, char)
				}
			}
			pchar := pattern[pidx]
			if pchar == char {
				// Boost bonus for the first character in the pattern
				if pidx == 0 {
					point *= 2
				}
				// Bonus to consecutive matching chars
				if consecutive {
					point++
				}
				bonus += point

				if pidx++; pidx == lenPattern {
					break
				}
				consecutive = true
			} else {
				consecutive = false
			}
		}
	}
	return bonus
}
Exemple #10
0
// FuzzyMatch performs fuzzy-match
func FuzzyMatch(caseSensitive bool, forward bool, runes []rune, pattern []rune) Result {
	if len(pattern) == 0 {
		return Result{0, 0, 0}
	}

	// 0. (FIXME) How to find the shortest match?
	//    a_____b__c__abc
	//    ^^^^^^^^^^  ^^^
	// 1. forward scan (abc)
	//   *-----*-----*>
	//   a_____b___abc__
	// 2. reverse scan (cba)
	//   a_____b___abc__
	//            <***
	pidx := 0
	sidx := -1
	eidx := -1

	lenRunes := len(runes)
	lenPattern := len(pattern)

	for index := range runes {
		char := runeAt(runes, index, lenRunes, forward)
		// This is considerably faster than blindly applying strings.ToLower to the
		// whole string
		if !caseSensitive {
			// Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable
			// difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go
			// compiler as of now does not inline non-leaf functions.)
			if char >= 'A' && char <= 'Z' {
				char += 32
			} else if char > unicode.MaxASCII {
				char = unicode.To(unicode.LowerCase, char)
			}
		}
		pchar := runeAt(pattern, pidx, lenPattern, forward)
		if char == pchar {
			if sidx < 0 {
				sidx = index
			}
			if pidx++; pidx == lenPattern {
				eidx = index + 1
				break
			}
		}
	}

	if sidx >= 0 && eidx >= 0 {
		pidx--
		for index := eidx - 1; index >= sidx; index-- {
			char := runeAt(runes, index, lenRunes, forward)
			if !caseSensitive {
				if char >= 'A' && char <= 'Z' {
					char += 32
				} else if char > unicode.MaxASCII {
					char = unicode.To(unicode.LowerCase, char)
				}
			}

			pchar := runeAt(pattern, pidx, lenPattern, forward)
			if char == pchar {
				if pidx--; pidx < 0 {
					sidx = index
					break
				}
			}
		}

		// Calculate the bonus. This can't be done at the same time as the
		// pattern scan above because 'forward' may be false.
		if !forward {
			sidx, eidx = lenRunes-eidx, lenRunes-sidx
		}

		return Result{int32(sidx), int32(eidx),
			evaluateBonus(caseSensitive, runes, pattern, sidx, eidx)}
	}
	return Result{-1, -1, 0}
}
Exemple #11
0
// FuzzyMatch performs fuzzy-match
func FuzzyMatch(caseSensitive bool, runes []rune, pattern []rune) (int, int) {
	if len(pattern) == 0 {
		return 0, 0
	}

	// 0. (FIXME) How to find the shortest match?
	//    a_____b__c__abc
	//    ^^^^^^^^^^  ^^^
	// 1. forward scan (abc)
	//   *-----*-----*>
	//   a_____b___abc__
	// 2. reverse scan (cba)
	//   a_____b___abc__
	//            <***
	pidx := 0
	sidx := -1
	eidx := -1

	for index, char := range runes {
		// This is considerably faster than blindly applying strings.ToLower to the
		// whole string
		if !caseSensitive {
			// Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable
			// difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go
			// compiler as of now does not inline non-leaf functions.)
			if char >= 'A' && char <= 'Z' {
				char += 32
			} else if char > unicode.MaxASCII {
				char = unicode.To(unicode.LowerCase, char)
			}
		}
		if char == pattern[pidx] {
			if sidx < 0 {
				sidx = index
			}
			if pidx++; pidx == len(pattern) {
				eidx = index + 1
				break
			}
		}
	}

	if sidx >= 0 && eidx >= 0 {
		pidx--
		for index := eidx - 1; index >= sidx; index-- {
			char := runes[index]
			if !caseSensitive {
				if char >= 'A' && char <= 'Z' {
					char += 32
				} else if char > unicode.MaxASCII {
					char = unicode.To(unicode.LowerCase, char)
				}
			}
			if char == pattern[pidx] {
				if pidx--; pidx < 0 {
					sidx = index
					break
				}
			}
		}
		return sidx, eidx
	}
	return -1, -1
}
Exemple #12
0
// ExactMatchNaive is a basic string searching algorithm that handles case
// sensitivity. Although naive, it still performs better than the combination
// of strings.ToLower + strings.Index for typical fzf use cases where input
// strings and patterns are not very long.
//
// Since 0.15.0, this function searches for the match with the highest
// bonus point, instead of stopping immediately after finding the first match.
// The solution is much cheaper since there is only one possible alignment of
// the pattern.
func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
	if len(pattern) == 0 {
		return Result{0, 0, 0}, nil
	}

	lenRunes := text.Length()
	lenPattern := len(pattern)

	if lenRunes < lenPattern {
		return Result{-1, -1, 0}, nil
	}

	// For simplicity, only look at the bonus at the first character position
	pidx := 0
	bestPos, bonus, bestBonus := -1, int16(0), int16(-1)
	for index := 0; index < lenRunes; index++ {
		index_ := indexAt(index, lenRunes, forward)
		char := text.Get(index_)
		if !caseSensitive {
			if char >= 'A' && char <= 'Z' {
				char += 32
			} else if char > unicode.MaxASCII {
				char = unicode.To(unicode.LowerCase, char)
			}
		}
		if normalize {
			char = normalizeRune(char)
		}
		pidx_ := indexAt(pidx, lenPattern, forward)
		pchar := pattern[pidx_]
		if pchar == char {
			if pidx_ == 0 {
				bonus = bonusAt(text, index_)
			}
			pidx++
			if pidx == lenPattern {
				if bonus > bestBonus {
					bestPos, bestBonus = index, bonus
				}
				if bonus == bonusBoundary {
					break
				}
				index -= pidx - 1
				pidx, bonus = 0, 0
			}
		} else {
			index -= pidx
			pidx, bonus = 0, 0
		}
	}
	if bestPos >= 0 {
		var sidx, eidx int
		if forward {
			sidx = bestPos - lenPattern + 1
			eidx = bestPos + 1
		} else {
			sidx = lenRunes - (bestPos + 1)
			eidx = lenRunes - (bestPos - lenPattern + 1)
		}
		score, _ := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, false)
		return Result{sidx, eidx, score}, nil
	}
	return Result{-1, -1, 0}, nil
}
Exemple #13
0
// FuzzyMatchV1 performs fuzzy-match
func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
	if len(pattern) == 0 {
		return Result{0, 0, 0}, nil
	}

	pidx := 0
	sidx := -1
	eidx := -1

	lenRunes := text.Length()
	lenPattern := len(pattern)

	for index := 0; index < lenRunes; index++ {
		char := text.Get(indexAt(index, lenRunes, forward))
		// This is considerably faster than blindly applying strings.ToLower to the
		// whole string
		if !caseSensitive {
			// Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable
			// difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go
			// compiler as of now does not inline non-leaf functions.)
			if char >= 'A' && char <= 'Z' {
				char += 32
			} else if char > unicode.MaxASCII {
				char = unicode.To(unicode.LowerCase, char)
			}
		}
		if normalize {
			char = normalizeRune(char)
		}
		pchar := pattern[indexAt(pidx, lenPattern, forward)]
		if char == pchar {
			if sidx < 0 {
				sidx = index
			}
			if pidx++; pidx == lenPattern {
				eidx = index + 1
				break
			}
		}
	}

	if sidx >= 0 && eidx >= 0 {
		pidx--
		for index := eidx - 1; index >= sidx; index-- {
			tidx := indexAt(index, lenRunes, forward)
			char := text.Get(tidx)
			if !caseSensitive {
				if char >= 'A' && char <= 'Z' {
					char += 32
				} else if char > unicode.MaxASCII {
					char = unicode.To(unicode.LowerCase, char)
				}
			}

			pidx_ := indexAt(pidx, lenPattern, forward)
			pchar := pattern[pidx_]
			if char == pchar {
				if pidx--; pidx < 0 {
					sidx = index
					break
				}
			}
		}

		if !forward {
			sidx, eidx = lenRunes-eidx, lenRunes-sidx
		}

		score, pos := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, withPos)
		return Result{sidx, eidx, score}, pos
	}
	return Result{-1, -1, 0}, nil
}
Exemple #14
0
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
	// Assume that pattern is given in lowercase if case-insensitive.
	// First check if there's a match and calculate bonus for each position.
	// If the input string is too long, consider finding the matching chars in
	// this phase as well (non-optimal alignment).
	N := input.Length()
	M := len(pattern)
	switch M {
	case 0:
		return Result{0, 0, 0}, posArray(withPos, M)
	case 1:
		return ExactMatchNaive(caseSensitive, normalize, forward, input, pattern[0:1], withPos, slab)
	}

	// Since O(nm) algorithm can be prohibitively expensive for large input,
	// we fall back to the greedy algorithm.
	if slab != nil && N*M > cap(slab.I16) {
		return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab)
	}

	// Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
	offset16 := 0
	offset32 := 0
	// Bonus point for each position
	offset16, B := alloc16(offset16, slab, N, false)
	// The first occurrence of each character in the pattern
	offset32, F := alloc32(offset32, slab, M, false)
	// Rune array
	offset32, T := alloc32(offset32, slab, N, false)

	// Phase 1. Check if there's a match and calculate bonus for each point
	pidx, lastIdx, prevClass := 0, 0, charNonWord
	for idx := 0; idx < N; idx++ {
		char := input.Get(idx)
		var class charClass
		if char <= unicode.MaxASCII {
			class = charClassOfAscii(char)
		} else {
			class = charClassOfNonAscii(char)
		}

		if !caseSensitive && class == charUpper {
			if char <= unicode.MaxASCII {
				char += 32
			} else {
				char = unicode.To(unicode.LowerCase, char)
			}
		}

		if normalize {
			char = normalizeRune(char)
		}

		T[idx] = char
		B[idx] = bonusFor(prevClass, class)
		prevClass = class

		if pidx < M {
			if char == pattern[pidx] {
				lastIdx = idx
				F[pidx] = int32(idx)
				pidx++
			}
		} else {
			if char == pattern[M-1] {
				lastIdx = idx
			}
		}
	}
	if pidx != M {
		return Result{-1, -1, 0}, nil
	}

	// Phase 2. Fill in score matrix (H)
	// Unlike the original algorithm, we do not allow omission.
	width := lastIdx - int(F[0]) + 1
	offset16, H := alloc16(offset16, slab, width*M, false)

	// Possible length of consecutive chunk at each position.
	offset16, C := alloc16(offset16, slab, width*M, false)

	maxScore, maxScorePos := int16(0), 0
	for i := 0; i < M; i++ {
		I := i * width
		inGap := false
		for j := int(F[i]); j <= lastIdx; j++ {
			j0 := j - int(F[0])
			var s1, s2, consecutive int16

			if j > int(F[i]) {
				if inGap {
					s2 = H[I+j0-1] + scoreGapExtention
				} else {
					s2 = H[I+j0-1] + scoreGapStart
				}
			}

			if pattern[i] == T[j] {
				var diag int16
				if i > 0 && j0 > 0 {
					diag = H[I-width+j0-1]
				}
				s1 = diag + scoreMatch
				b := B[j]
				if i > 0 {
					// j > 0 if i > 0
					consecutive = C[I-width+j0-1] + 1
					// Break consecutive chunk
					if b == bonusBoundary {
						consecutive = 1
					} else if consecutive > 1 {
						b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1]))
					}
				} else {
					consecutive = 1
					b *= bonusFirstCharMultiplier
				}
				if s1+b < s2 {
					s1 += B[j]
					consecutive = 0
				} else {
					s1 += b
				}
			}
			C[I+j0] = consecutive

			inGap = s1 < s2
			score := util.Max16(util.Max16(s1, s2), 0)
			if i == M-1 && (forward && score > maxScore || !forward && score >= maxScore) {
				maxScore, maxScorePos = score, j
			}
			H[I+j0] = score
		}

		if DEBUG {
			if i == 0 {
				fmt.Print("  ")
				for j := int(F[i]); j <= lastIdx; j++ {
					fmt.Printf(" " + string(input.Get(j)) + " ")
				}
				fmt.Println()
			}
			fmt.Print(string(pattern[i]) + " ")
			for idx := int(F[0]); idx < int(F[i]); idx++ {
				fmt.Print(" 0 ")
			}
			for idx := int(F[i]); idx <= lastIdx; idx++ {
				fmt.Printf("%2d ", H[i*width+idx-int(F[0])])
			}
			fmt.Println()

			fmt.Print("  ")
			for idx, p := range C[I : I+width] {
				if idx+int(F[0]) < int(F[i]) {
					p = 0
				}
				fmt.Printf("%2d ", p)
			}
			fmt.Println()
		}
	}

	// Phase 3. (Optional) Backtrace to find character positions
	pos := posArray(withPos, M)
	j := int(F[0])
	if withPos {
		i := M - 1
		j = maxScorePos
		preferMatch := true
		for {
			I := i * width
			j0 := j - int(F[0])
			s := H[I+j0]

			var s1, s2 int16
			if i > 0 && j >= int(F[i]) {
				s1 = H[I-width+j0-1]
			}
			if j > int(F[i]) {
				s2 = H[I+j0-1]
			}

			if s > s1 && (s > s2 || s == s2 && preferMatch) {
				*pos = append(*pos, j)
				if i == 0 {
					break
				}
				i--
			}
			preferMatch = C[I+j0] > 1 || I+width+j0+1 < len(C) && C[I+width+j0+1] > 0
			j--
		}
	}
	// Start offset we return here is only relevant when begin tiebreak is used.
	// However finding the accurate offset requires backtracking, and we don't
	// want to pay extra cost for the option that has lost its importance.
	return Result{j, maxScorePos + 1, int(maxScore)}, pos
}