Exemple #1
0
// Suggestion checks the spelling of a word
func (m *Model) Suggestion(word string, editDistanceMax int) []string {
	// and sort based on likelyness
	// change suggestions to a *Term will have to change slicing operations
	m.m.Lock()
	wordRune := []rune(word)
	if len(wordRune)-editDistanceMax > m.Max {
		return []string{}
	}

	hashset1 := util.NewHash()
	hashset2 := util.NewHash()

	candidates := []string{}
	candidates = append(candidates, word)
	//hashset1.Add(word)
	suggestions := []string{}

	//fmt.Println(len(candidates))

	for len(candidates) > 0 {
		// this does not change the slice the undlerying array is still the same
		//candidate, candidates := candidates[0], candidates[1:]
		// can do this https://play.golang.org/p/pyum8Afrin
		candidate := candidates[0]
		candidates = append(candidates[:0], candidates[1:]...)
		candidateRune := []rune(candidate)

		if temp, ok := m.Data[candidate]; ok {
			if temp.Count > 0 && hashset2.Add(candidate) {
				//fmt.Println("ok added to suggestions")
				suggestions = append(suggestions, temp.Suggestions[:]...)
			}

			for _, suggestion := range suggestions {
				suggestionRune := []rune(suggestion)

				if hashset2.Add(suggestion) {

					// check the beginning and back of the suggestion
					prefix := 0
					suffix := 0
					for (prefix < len(suggestionRune)) && (prefix < len(wordRune)) &&
						(suggestionRune[prefix] == wordRune[prefix]) {

						prefix++
					}
					for (suffix < len(suggestionRune)-prefix) && (suffix < len(wordRune)-prefix) &&
						(suggestionRune[len(suggestionRune)-suffix-prefix-1] == wordRune[len(wordRune)-suffix-prefix-1]) {

						suffix++
					}

					distance := 0
					if suggestion != word {
						if len(suggestionRune) == len(wordRune) {
							if (prefix > 0) || (suffix > 0) {
								distance = len(wordRune) - len(candidateRune)
							}
						} else if len(word) == len(candidate) {
							if (prefix > 0) || (suffix > 0) {
								distance = len(suggestionRune) - len(candidateRune)
							}
						} else {
							if (prefix > 0) || (suffix > 0) {

								// can used optimized one here from edit.go
								distance = DamerauLevenshteinDistance(
									suggestionRune[prefix:len(suggestionRune)-suffix],
									wordRune[prefix:len(wordRune)-suffix])
							} else {
								if (prefix > 0) || (suffix > 0) {
									distance = DamerauLevenshteinDistance(suggestionRune, wordRune)
								}
							}
						}
					}

					// remove for verbose missing here

					if distance <= editDistanceMax {
						if val, ok := m.Data[suggestion]; ok {
							suggestions = append(suggestions, val.Suggestions[:]...)
						}
					}
				}
			}
		}

		if len(wordRune)-len(candidateRune) < editDistanceMax {
			// verbose stuff here missing

			// this does the same thing as Edits()
			for i := 0; i < len(candidateRune); i++ {
				delete := string(wordRune[:i]) + string(wordRune[i+1:])
				if hashset1.Add(delete) {
					candidates = append(candidates, delete)
				}
			}
		}
	}
	m.m.Unlock()

	temp := []string{}
	a := make(map[string]struct{})
	for _, val := range suggestions {
		a[val] = struct{}{}
	}

	for k := range a {
		temp = append(temp, k)
	}

	return temp

}
Exemple #2
0
func generateWords(password string, m spell.Speller) []Word {

	var words []Word
	var wordsCollection []Word

	// collect best edit distance
	bestFoundDistance := 9999

	preanalysisRules := []string{":", "r"}
	//preanalysisRules := []string{":", "r", "}", "{"}

	if !*bruteRules {
		preanalysisRules = preanalysisRules[:1]
	}

	var prePassword string
	for _, preRule := range preanalysisRules {
		prePassword = rules.ApplyRules([]string{preRule}, password)

		var suggestions []string
		if len(*wordDebug) > 0 {
			suggestions = []string{*wordDebug}
		} else if *simpleWords {
			suggestions = generateSimpleWords(prePassword, m)
		} else {
			suggestions = generateAdvancedWords(prePassword, m)
		}

		hashset1 := util.NewHash()

		for _, val := range suggestions {
			hashset1.Add(val)
		}

		// rules for each of the suggestions
		for _, suggestion := range suggestions {
			suggestion = strings.Replace(suggestion, " ", "", -1)
			suggestion = strings.Replace(suggestion, "-", "", -1)

			if !hashset1.Exists(suggestion) {
				suggestions = append(suggestions, suggestion)
				hashset1.Add(suggestion)
			}
		}

		/*
			// TODO what is the point of this??
			// debugging??
			if len(suggestions) != hashset1.Len() {
				// make these sorted
				//fmt.Println(suggestions)
				//fmt.Println(hashset1)
			}
		*/

		for _, suggestion := range suggestions {
			distance := Levenshtein(suggestion, prePassword)

			temp := Word{
				suggestion:     suggestion,
				distance:       distance,
				password:       prePassword,
				preRule:        preRule,
				bestRuleLength: 9999,
			}

			words = append(words, temp)
		}
	}

	sort.Sort(Words(words))

	for _, word := range words {
		if !*moreWords {
			if word.distance < bestFoundDistance {
				bestFoundDistance = word.distance
			}
		} else if word.distance > bestFoundDistance {
			if *debug {
				log.Println("best found distance suboptimal")
			}
			break
		}

		// filter words with large edit distance
		if word.distance <= *maxWordDist {
			wordsCollection = append(wordsCollection, word)
		} else {
			if *debug {
				log.Println("max distance exceeded")
			}
		}
	}

	if *maxWords > 0 {
		if *maxWords > len(wordsCollection) {
			wordsCollection = wordsCollection[:]
		} else {
			wordsCollection = wordsCollection[:*maxWords]
		}
	}

	return wordsCollection
}