func edist(sent1, sent2 string, m *wordmodel.Model) C.float { words1 := strings.Split(sent1, " ") words2 := strings.Split(sent2, " ") vec1 := [][]float32{} vec2 := [][]float32{} for _, w1 := range words1 { vec := wordmodel.Vector(make([]float32, m.Layer1Size)) if wordId, ok := m.Vocab[w1]; !ok { fmt.Printf("word not found: %s\n", w1) } else { vec.Add(1, m.Vector(wordId)) } vec.Normalize() vec1 = append(vec1, vec) } for _, w2 := range words2 { vec := wordmodel.Vector(make([]float32, m.Layer1Size)) if wordId, ok := m.Vocab[w2]; !ok { fmt.Printf("word not found: %s\n", w2) } else { vec.Add(1, m.Vector(wordId)) } vec.Normalize() vec2 = append(vec2, vec) } f1, f2 := make([]C.feature_t, len(vec1)), make([]C.feature_t, len(vec2)) for i, v := range vec1 { f1[i].arr = (*C.float)(&v[0]) } for i, v := range vec2 { f2[i].arr = (*C.float)(&v[0]) } wt1, wt2 := make([]C.float, len(vec1[0])), make([]C.float, len(vec2[0])) for i := 0; i < len(wt1); i++ { wt1[i], wt2[i] = 1.0, 1.0 } s1 := C.signature_t{n: (C.int)(len(words1)), Weights: &wt1[0], Features: &f1[0]} s2 := C.signature_t{n: (C.int)(len(words2)), Weights: &wt2[0], Features: &f2[0]} var flow C.flow_t var flowsize C.int emd_dist := C.emd(&s1, &s2, (*[0]byte)(C.dist), &flow, &flowsize) return emd_dist }
func mostSimilar(query string, n int, m *wordmodel.Model) ([]Pair, error) { vec := wordmodel.Vector(make([]float32, m.Layer1Size)) if wordId, ok := m.Vocab[query]; !ok { return nil, fmt.Errorf("word not found: %s", query) } else { vec.Add(1, m.Vector(wordId)) } vec.Normalize() r := make([]Pair, n) for w, i := range m.Vocab { sim := vec.Dot(m.Vector(i)) this := Pair{w, sim} for j := 0; j < n; j++ { if this.Sim > r[j].Sim { this, r[j] = r[j], this } } } return r[1:], nil }