// Suggestion checks the spelling of a word func (m *Model) Suggestion(word string, editDistanceMax int) []string { // and sort based on likelyness // change suggestions to a *Term will have to change slicing operations m.m.Lock() wordRune := []rune(word) if len(wordRune)-editDistanceMax > m.Max { return []string{} } hashset1 := util.NewHash() hashset2 := util.NewHash() candidates := []string{} candidates = append(candidates, word) //hashset1.Add(word) suggestions := []string{} //fmt.Println(len(candidates)) for len(candidates) > 0 { // this does not change the slice the undlerying array is still the same //candidate, candidates := candidates[0], candidates[1:] // can do this https://play.golang.org/p/pyum8Afrin candidate := candidates[0] candidates = append(candidates[:0], candidates[1:]...) candidateRune := []rune(candidate) if temp, ok := m.Data[candidate]; ok { if temp.Count > 0 && hashset2.Add(candidate) { //fmt.Println("ok added to suggestions") suggestions = append(suggestions, temp.Suggestions[:]...) } for _, suggestion := range suggestions { suggestionRune := []rune(suggestion) if hashset2.Add(suggestion) { // check the beginning and back of the suggestion prefix := 0 suffix := 0 for (prefix < len(suggestionRune)) && (prefix < len(wordRune)) && (suggestionRune[prefix] == wordRune[prefix]) { prefix++ } for (suffix < len(suggestionRune)-prefix) && (suffix < len(wordRune)-prefix) && (suggestionRune[len(suggestionRune)-suffix-prefix-1] == wordRune[len(wordRune)-suffix-prefix-1]) { suffix++ } distance := 0 if suggestion != word { if len(suggestionRune) == len(wordRune) { if (prefix > 0) || (suffix > 0) { distance = len(wordRune) - len(candidateRune) } } else if len(word) == len(candidate) { if (prefix > 0) || (suffix > 0) { distance = len(suggestionRune) - len(candidateRune) } } else { if (prefix > 0) || (suffix > 0) { // can used optimized one here from edit.go distance = DamerauLevenshteinDistance( suggestionRune[prefix:len(suggestionRune)-suffix], wordRune[prefix:len(wordRune)-suffix]) } else { if (prefix > 0) || (suffix > 0) { distance = DamerauLevenshteinDistance(suggestionRune, wordRune) } } } } // remove for verbose missing here if distance <= editDistanceMax { if val, ok := m.Data[suggestion]; ok { suggestions = append(suggestions, val.Suggestions[:]...) } } } } } if len(wordRune)-len(candidateRune) < editDistanceMax { // verbose stuff here missing // this does the same thing as Edits() for i := 0; i < len(candidateRune); i++ { delete := string(wordRune[:i]) + string(wordRune[i+1:]) if hashset1.Add(delete) { candidates = append(candidates, delete) } } } } m.m.Unlock() temp := []string{} a := make(map[string]struct{}) for _, val := range suggestions { a[val] = struct{}{} } for k := range a { temp = append(temp, k) } return temp }
func generateWords(password string, m spell.Speller) []Word { var words []Word var wordsCollection []Word // collect best edit distance bestFoundDistance := 9999 preanalysisRules := []string{":", "r"} //preanalysisRules := []string{":", "r", "}", "{"} if !*bruteRules { preanalysisRules = preanalysisRules[:1] } var prePassword string for _, preRule := range preanalysisRules { prePassword = rules.ApplyRules([]string{preRule}, password) var suggestions []string if len(*wordDebug) > 0 { suggestions = []string{*wordDebug} } else if *simpleWords { suggestions = generateSimpleWords(prePassword, m) } else { suggestions = generateAdvancedWords(prePassword, m) } hashset1 := util.NewHash() for _, val := range suggestions { hashset1.Add(val) } // rules for each of the suggestions for _, suggestion := range suggestions { suggestion = strings.Replace(suggestion, " ", "", -1) suggestion = strings.Replace(suggestion, "-", "", -1) if !hashset1.Exists(suggestion) { suggestions = append(suggestions, suggestion) hashset1.Add(suggestion) } } /* // TODO what is the point of this?? // debugging?? if len(suggestions) != hashset1.Len() { // make these sorted //fmt.Println(suggestions) //fmt.Println(hashset1) } */ for _, suggestion := range suggestions { distance := Levenshtein(suggestion, prePassword) temp := Word{ suggestion: suggestion, distance: distance, password: prePassword, preRule: preRule, bestRuleLength: 9999, } words = append(words, temp) } } sort.Sort(Words(words)) for _, word := range words { if !*moreWords { if word.distance < bestFoundDistance { bestFoundDistance = word.distance } } else if word.distance > bestFoundDistance { if *debug { log.Println("best found distance suboptimal") } break } // filter words with large edit distance if word.distance <= *maxWordDist { wordsCollection = append(wordsCollection, word) } else { if *debug { log.Println("max distance exceeded") } } } if *maxWords > 0 { if *maxWords > len(wordsCollection) { wordsCollection = wordsCollection[:] } else { wordsCollection = wordsCollection[:*maxWords] } } return wordsCollection }