Esempio n. 1
0
func edist(sent1, sent2 string, m *wordmodel.Model) C.float {
	words1 := strings.Split(sent1, " ")
	words2 := strings.Split(sent2, " ")
	vec1 := [][]float32{}
	vec2 := [][]float32{}

	for _, w1 := range words1 {
		vec := wordmodel.Vector(make([]float32, m.Layer1Size))
		if wordId, ok := m.Vocab[w1]; !ok {
			fmt.Printf("word not found: %s\n", w1)
		} else {
			vec.Add(1, m.Vector(wordId))
		}
		vec.Normalize()
		vec1 = append(vec1, vec)
	}
	for _, w2 := range words2 {
		vec := wordmodel.Vector(make([]float32, m.Layer1Size))
		if wordId, ok := m.Vocab[w2]; !ok {
			fmt.Printf("word not found: %s\n", w2)
		} else {
			vec.Add(1, m.Vector(wordId))
		}
		vec.Normalize()
		vec2 = append(vec2, vec)
	}

	f1, f2 := make([]C.feature_t, len(vec1)), make([]C.feature_t, len(vec2))
	for i, v := range vec1 {
		f1[i].arr = (*C.float)(&v[0])
	}
	for i, v := range vec2 {
		f2[i].arr = (*C.float)(&v[0])
	}

	wt1, wt2 := make([]C.float, len(vec1[0])), make([]C.float, len(vec2[0]))

	for i := 0; i < len(wt1); i++ {
		wt1[i], wt2[i] = 1.0, 1.0
	}

	s1 := C.signature_t{n: (C.int)(len(words1)), Weights: &wt1[0], Features: &f1[0]}
	s2 := C.signature_t{n: (C.int)(len(words2)), Weights: &wt2[0], Features: &f2[0]}

	var flow C.flow_t
	var flowsize C.int

	emd_dist := C.emd(&s1, &s2, (*[0]byte)(C.dist), &flow, &flowsize)
	return emd_dist
}
Esempio n. 2
0
func mostSimilar(query string, n int, m *wordmodel.Model) ([]Pair, error) {
	vec := wordmodel.Vector(make([]float32, m.Layer1Size))
	if wordId, ok := m.Vocab[query]; !ok {
		return nil, fmt.Errorf("word not found: %s", query)
	} else {
		vec.Add(1, m.Vector(wordId))
	}
	vec.Normalize()

	r := make([]Pair, n)
	for w, i := range m.Vocab {
		sim := vec.Dot(m.Vector(i))
		this := Pair{w, sim}
		for j := 0; j < n; j++ {
			if this.Sim > r[j].Sim {
				this, r[j] = r[j], this
			}
		}
	}
	return r[1:], nil
}