Пример #1
0
// PrefixMatch performs prefix-match
func PrefixMatch(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
	if len(pattern) == 0 {
		return Result{0, 0, 0}, nil
	}

	if text.Length() < len(pattern) {
		return Result{-1, -1, 0}, nil
	}

	for index, r := range pattern {
		char := text.Get(index)
		if !caseSensitive {
			char = unicode.ToLower(char)
		}
		if normalize {
			char = normalizeRune(char)
		}
		if char != r {
			return Result{-1, -1, 0}, nil
		}
	}
	lenPattern := len(pattern)
	score, _ := calculateScore(caseSensitive, normalize, text, pattern, 0, lenPattern, false)
	return Result{0, lenPattern, score}, nil
}
Пример #2
0
// Tokenize tokenizes the given string with the delimiter
func Tokenize(text util.Chars, delimiter Delimiter) []Token {
	if delimiter.str == nil && delimiter.regex == nil {
		// AWK-style (\S+\s*)
		tokens, prefixLength := awkTokenizer(text)
		return withPrefixLengths(tokens, prefixLength)
	}

	if delimiter.str != nil {
		return withPrefixLengths(text.Split(*delimiter.str), 0)
	}

	// FIXME performance
	var tokens []string
	if delimiter.regex != nil {
		str := text.ToString()
		for len(str) > 0 {
			loc := delimiter.regex.FindStringIndex(str)
			if loc == nil {
				loc = []int{0, len(str)}
			}
			last := util.Max(loc[1], 1)
			tokens = append(tokens, str[:last])
			str = str[last:]
		}
	}
	asRunes := make([]util.Chars, len(tokens))
	for i, token := range tokens {
		asRunes[i] = util.RunesToChars([]rune(token))
	}
	return withPrefixLengths(asRunes, 0)
}
Пример #3
0
// Implement the same sorting criteria as V2
func calculateScore(caseSensitive bool, normalize bool, text util.Chars, pattern []rune, sidx int, eidx int, withPos bool) (int, *[]int) {
	pidx, score, inGap, consecutive, firstBonus := 0, 0, false, 0, int16(0)
	pos := posArray(withPos, len(pattern))
	prevClass := charNonWord
	if sidx > 0 {
		prevClass = charClassOf(text.Get(sidx - 1))
	}
	for idx := sidx; idx < eidx; idx++ {
		char := text.Get(idx)
		class := charClassOf(char)
		if !caseSensitive {
			if char >= 'A' && char <= 'Z' {
				char += 32
			} else if char > unicode.MaxASCII {
				char = unicode.To(unicode.LowerCase, char)
			}
		}
		// pattern is already normalized
		if normalize {
			char = normalizeRune(char)
		}
		if char == pattern[pidx] {
			if withPos {
				*pos = append(*pos, idx)
			}
			score += scoreMatch
			bonus := bonusFor(prevClass, class)
			if consecutive == 0 {
				firstBonus = bonus
			} else {
				// Break consecutive chunk
				if bonus == bonusBoundary {
					firstBonus = bonus
				}
				bonus = util.Max16(util.Max16(bonus, firstBonus), bonusConsecutive)
			}
			if pidx == 0 {
				score += int(bonus * bonusFirstCharMultiplier)
			} else {
				score += int(bonus)
			}
			inGap = false
			consecutive++
			pidx++
		} else {
			if inGap {
				score += scoreGapExtention
			} else {
				score += scoreGapStart
			}
			inGap = true
			consecutive = 0
			firstBonus = 0
		}
		prevClass = class
	}
	return score, pos
}
Пример #4
0
// EqualMatch performs equal-match
func EqualMatch(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
	lenPattern := len(pattern)
	if text.Length() != lenPattern {
		return Result{-1, -1, 0}, nil
	}
	match := true
	if normalize {
		runes := text.ToRunes()
		for idx, pchar := range pattern {
			char := runes[idx]
			if !caseSensitive {
				char = unicode.To(unicode.LowerCase, char)
			}
			if normalizeRune(pchar) != normalizeRune(char) {
				match = false
				break
			}
		}
	} else {
		runesStr := text.ToString()
		if !caseSensitive {
			runesStr = strings.ToLower(runesStr)
		}
		match = runesStr == string(pattern)
	}
	if match {
		return Result{0, lenPattern, (scoreMatch+bonusBoundary)*lenPattern +
			(bonusFirstCharMultiplier-1)*bonusBoundary}, nil
	}
	return Result{-1, -1, 0}, nil
}
Пример #5
0
// SuffixMatch performs suffix-match
func SuffixMatch(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
	lenRunes := text.Length()
	trimmedLen := lenRunes - text.TrailingWhitespaces()
	if len(pattern) == 0 {
		return Result{trimmedLen, trimmedLen, 0}, nil
	}
	diff := trimmedLen - len(pattern)
	if diff < 0 {
		return Result{-1, -1, 0}, nil
	}

	for index, r := range pattern {
		char := text.Get(index + diff)
		if !caseSensitive {
			char = unicode.ToLower(char)
		}
		if normalize {
			char = normalizeRune(char)
		}
		if char != r {
			return Result{-1, -1, 0}, nil
		}
	}
	lenPattern := len(pattern)
	sidx := trimmedLen - lenPattern
	eidx := trimmedLen
	score, _ := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, false)
	return Result{sidx, eidx, score}, nil
}
Пример #6
0
func awkTokenizer(input util.Chars) ([]util.Chars, int) {
	// 9, 32
	ret := []util.Chars{}
	prefixLength := 0
	state := awkNil
	numChars := input.Length()
	begin := 0
	end := 0
	for idx := 0; idx < numChars; idx++ {
		r := input.Get(idx)
		white := r == 9 || r == 32
		switch state {
		case awkNil:
			if white {
				prefixLength++
			} else {
				state, begin, end = awkBlack, idx, idx+1
			}
		case awkBlack:
			end = idx + 1
			if white {
				state = awkWhite
			}
		case awkWhite:
			if white {
				end = idx + 1
			} else {
				ret = append(ret, input.Slice(begin, end))
				state, begin, end = awkBlack, idx, idx+1
			}
		}
	}
	if begin < end {
		ret = append(ret, input.Slice(begin, end))
	}
	return ret, prefixLength
}
Пример #7
0
// Transform is used to transform the input when --with-nth option is given
func Transform(tokens []Token, withNth []Range) []Token {
	transTokens := make([]Token, len(withNth))
	numTokens := len(tokens)
	for idx, r := range withNth {
		parts := []*util.Chars{}
		minIdx := 0
		if r.begin == r.end {
			idx := r.begin
			if idx == rangeEllipsis {
				chars := util.RunesToChars(joinTokens(tokens))
				parts = append(parts, &chars)
			} else {
				if idx < 0 {
					idx += numTokens + 1
				}
				if idx >= 1 && idx <= numTokens {
					minIdx = idx - 1
					parts = append(parts, tokens[idx-1].text)
				}
			}
		} else {
			var begin, end int
			if r.begin == rangeEllipsis { // ..N
				begin, end = 1, r.end
				if end < 0 {
					end += numTokens + 1
				}
			} else if r.end == rangeEllipsis { // N..
				begin, end = r.begin, numTokens
				if begin < 0 {
					begin += numTokens + 1
				}
			} else {
				begin, end = r.begin, r.end
				if begin < 0 {
					begin += numTokens + 1
				}
				if end < 0 {
					end += numTokens + 1
				}
			}
			minIdx = util.Max(0, begin-1)
			for idx := begin; idx <= end; idx++ {
				if idx >= 1 && idx <= numTokens {
					parts = append(parts, tokens[idx-1].text)
				}
			}
		}
		// Merge multiple parts
		var merged util.Chars
		switch len(parts) {
		case 0:
			merged = util.RunesToChars([]rune{})
		case 1:
			merged = *parts[0]
		default:
			runes := []rune{}
			for _, part := range parts {
				runes = append(runes, part.ToRunes()...)
			}
			merged = util.RunesToChars(runes)
		}

		var prefixLength int32
		if minIdx < numTokens {
			prefixLength = tokens[minIdx].prefixLength
		} else {
			prefixLength = 0
		}
		transTokens[idx] = Token{&merged, prefixLength, int32(merged.TrimLength())}
	}
	return transTokens
}
Пример #8
0
// ExactMatchNaive is a basic string searching algorithm that handles case
// sensitivity. Although naive, it still performs better than the combination
// of strings.ToLower + strings.Index for typical fzf use cases where input
// strings and patterns are not very long.
//
// Since 0.15.0, this function searches for the match with the highest
// bonus point, instead of stopping immediately after finding the first match.
// The solution is much cheaper since there is only one possible alignment of
// the pattern.
func ExactMatchNaive(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
	if len(pattern) == 0 {
		return Result{0, 0, 0}, nil
	}

	lenRunes := text.Length()
	lenPattern := len(pattern)

	if lenRunes < lenPattern {
		return Result{-1, -1, 0}, nil
	}

	// For simplicity, only look at the bonus at the first character position
	pidx := 0
	bestPos, bonus, bestBonus := -1, int16(0), int16(-1)
	for index := 0; index < lenRunes; index++ {
		index_ := indexAt(index, lenRunes, forward)
		char := text.Get(index_)
		if !caseSensitive {
			if char >= 'A' && char <= 'Z' {
				char += 32
			} else if char > unicode.MaxASCII {
				char = unicode.To(unicode.LowerCase, char)
			}
		}
		if normalize {
			char = normalizeRune(char)
		}
		pidx_ := indexAt(pidx, lenPattern, forward)
		pchar := pattern[pidx_]
		if pchar == char {
			if pidx_ == 0 {
				bonus = bonusAt(text, index_)
			}
			pidx++
			if pidx == lenPattern {
				if bonus > bestBonus {
					bestPos, bestBonus = index, bonus
				}
				if bonus == bonusBoundary {
					break
				}
				index -= pidx - 1
				pidx, bonus = 0, 0
			}
		} else {
			index -= pidx
			pidx, bonus = 0, 0
		}
	}
	if bestPos >= 0 {
		var sidx, eidx int
		if forward {
			sidx = bestPos - lenPattern + 1
			eidx = bestPos + 1
		} else {
			sidx = lenRunes - (bestPos + 1)
			eidx = lenRunes - (bestPos - lenPattern + 1)
		}
		score, _ := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, false)
		return Result{sidx, eidx, score}, nil
	}
	return Result{-1, -1, 0}, nil
}
Пример #9
0
// FuzzyMatchV1 performs fuzzy-match
func FuzzyMatchV1(caseSensitive bool, normalize bool, forward bool, text util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
	if len(pattern) == 0 {
		return Result{0, 0, 0}, nil
	}

	pidx := 0
	sidx := -1
	eidx := -1

	lenRunes := text.Length()
	lenPattern := len(pattern)

	for index := 0; index < lenRunes; index++ {
		char := text.Get(indexAt(index, lenRunes, forward))
		// This is considerably faster than blindly applying strings.ToLower to the
		// whole string
		if !caseSensitive {
			// Partially inlining `unicode.ToLower`. Ugly, but makes a noticeable
			// difference in CPU cost. (Measured on Go 1.4.1. Also note that the Go
			// compiler as of now does not inline non-leaf functions.)
			if char >= 'A' && char <= 'Z' {
				char += 32
			} else if char > unicode.MaxASCII {
				char = unicode.To(unicode.LowerCase, char)
			}
		}
		if normalize {
			char = normalizeRune(char)
		}
		pchar := pattern[indexAt(pidx, lenPattern, forward)]
		if char == pchar {
			if sidx < 0 {
				sidx = index
			}
			if pidx++; pidx == lenPattern {
				eidx = index + 1
				break
			}
		}
	}

	if sidx >= 0 && eidx >= 0 {
		pidx--
		for index := eidx - 1; index >= sidx; index-- {
			tidx := indexAt(index, lenRunes, forward)
			char := text.Get(tidx)
			if !caseSensitive {
				if char >= 'A' && char <= 'Z' {
					char += 32
				} else if char > unicode.MaxASCII {
					char = unicode.To(unicode.LowerCase, char)
				}
			}

			pidx_ := indexAt(pidx, lenPattern, forward)
			pchar := pattern[pidx_]
			if char == pchar {
				if pidx--; pidx < 0 {
					sidx = index
					break
				}
			}
		}

		if !forward {
			sidx, eidx = lenRunes-eidx, lenRunes-sidx
		}

		score, pos := calculateScore(caseSensitive, normalize, text, pattern, sidx, eidx, withPos)
		return Result{sidx, eidx, score}, pos
	}
	return Result{-1, -1, 0}, nil
}
Пример #10
0
func FuzzyMatchV2(caseSensitive bool, normalize bool, forward bool, input util.Chars, pattern []rune, withPos bool, slab *util.Slab) (Result, *[]int) {
	// Assume that pattern is given in lowercase if case-insensitive.
	// First check if there's a match and calculate bonus for each position.
	// If the input string is too long, consider finding the matching chars in
	// this phase as well (non-optimal alignment).
	N := input.Length()
	M := len(pattern)
	switch M {
	case 0:
		return Result{0, 0, 0}, posArray(withPos, M)
	case 1:
		return ExactMatchNaive(caseSensitive, normalize, forward, input, pattern[0:1], withPos, slab)
	}

	// Since O(nm) algorithm can be prohibitively expensive for large input,
	// we fall back to the greedy algorithm.
	if slab != nil && N*M > cap(slab.I16) {
		return FuzzyMatchV1(caseSensitive, normalize, forward, input, pattern, withPos, slab)
	}

	// Reuse pre-allocated integer slice to avoid unnecessary sweeping of garbages
	offset16 := 0
	offset32 := 0
	// Bonus point for each position
	offset16, B := alloc16(offset16, slab, N, false)
	// The first occurrence of each character in the pattern
	offset32, F := alloc32(offset32, slab, M, false)
	// Rune array
	offset32, T := alloc32(offset32, slab, N, false)

	// Phase 1. Check if there's a match and calculate bonus for each point
	pidx, lastIdx, prevClass := 0, 0, charNonWord
	for idx := 0; idx < N; idx++ {
		char := input.Get(idx)
		var class charClass
		if char <= unicode.MaxASCII {
			class = charClassOfAscii(char)
		} else {
			class = charClassOfNonAscii(char)
		}

		if !caseSensitive && class == charUpper {
			if char <= unicode.MaxASCII {
				char += 32
			} else {
				char = unicode.To(unicode.LowerCase, char)
			}
		}

		if normalize {
			char = normalizeRune(char)
		}

		T[idx] = char
		B[idx] = bonusFor(prevClass, class)
		prevClass = class

		if pidx < M {
			if char == pattern[pidx] {
				lastIdx = idx
				F[pidx] = int32(idx)
				pidx++
			}
		} else {
			if char == pattern[M-1] {
				lastIdx = idx
			}
		}
	}
	if pidx != M {
		return Result{-1, -1, 0}, nil
	}

	// Phase 2. Fill in score matrix (H)
	// Unlike the original algorithm, we do not allow omission.
	width := lastIdx - int(F[0]) + 1
	offset16, H := alloc16(offset16, slab, width*M, false)

	// Possible length of consecutive chunk at each position.
	offset16, C := alloc16(offset16, slab, width*M, false)

	maxScore, maxScorePos := int16(0), 0
	for i := 0; i < M; i++ {
		I := i * width
		inGap := false
		for j := int(F[i]); j <= lastIdx; j++ {
			j0 := j - int(F[0])
			var s1, s2, consecutive int16

			if j > int(F[i]) {
				if inGap {
					s2 = H[I+j0-1] + scoreGapExtention
				} else {
					s2 = H[I+j0-1] + scoreGapStart
				}
			}

			if pattern[i] == T[j] {
				var diag int16
				if i > 0 && j0 > 0 {
					diag = H[I-width+j0-1]
				}
				s1 = diag + scoreMatch
				b := B[j]
				if i > 0 {
					// j > 0 if i > 0
					consecutive = C[I-width+j0-1] + 1
					// Break consecutive chunk
					if b == bonusBoundary {
						consecutive = 1
					} else if consecutive > 1 {
						b = util.Max16(b, util.Max16(bonusConsecutive, B[j-int(consecutive)+1]))
					}
				} else {
					consecutive = 1
					b *= bonusFirstCharMultiplier
				}
				if s1+b < s2 {
					s1 += B[j]
					consecutive = 0
				} else {
					s1 += b
				}
			}
			C[I+j0] = consecutive

			inGap = s1 < s2
			score := util.Max16(util.Max16(s1, s2), 0)
			if i == M-1 && (forward && score > maxScore || !forward && score >= maxScore) {
				maxScore, maxScorePos = score, j
			}
			H[I+j0] = score
		}

		if DEBUG {
			if i == 0 {
				fmt.Print("  ")
				for j := int(F[i]); j <= lastIdx; j++ {
					fmt.Printf(" " + string(input.Get(j)) + " ")
				}
				fmt.Println()
			}
			fmt.Print(string(pattern[i]) + " ")
			for idx := int(F[0]); idx < int(F[i]); idx++ {
				fmt.Print(" 0 ")
			}
			for idx := int(F[i]); idx <= lastIdx; idx++ {
				fmt.Printf("%2d ", H[i*width+idx-int(F[0])])
			}
			fmt.Println()

			fmt.Print("  ")
			for idx, p := range C[I : I+width] {
				if idx+int(F[0]) < int(F[i]) {
					p = 0
				}
				fmt.Printf("%2d ", p)
			}
			fmt.Println()
		}
	}

	// Phase 3. (Optional) Backtrace to find character positions
	pos := posArray(withPos, M)
	j := int(F[0])
	if withPos {
		i := M - 1
		j = maxScorePos
		preferMatch := true
		for {
			I := i * width
			j0 := j - int(F[0])
			s := H[I+j0]

			var s1, s2 int16
			if i > 0 && j >= int(F[i]) {
				s1 = H[I-width+j0-1]
			}
			if j > int(F[i]) {
				s2 = H[I+j0-1]
			}

			if s > s1 && (s > s2 || s == s2 && preferMatch) {
				*pos = append(*pos, j)
				if i == 0 {
					break
				}
				i--
			}
			preferMatch = C[I+j0] > 1 || I+width+j0+1 < len(C) && C[I+width+j0+1] > 0
			j--
		}
	}
	// Start offset we return here is only relevant when begin tiebreak is used.
	// However finding the accurate offset requires backtracking, and we don't
	// want to pay extra cost for the option that has lost its importance.
	return Result{j, maxScorePos + 1, int(maxScore)}, pos
}
Пример #11
0
func bonusAt(input util.Chars, idx int) int16 {
	if idx == 0 {
		return bonusBoundary
	}
	return bonusFor(charClassOf(input.Get(idx-1)), charClassOf(input.Get(idx)))
}