Esempio n. 1
0
func (this *MITIE) Process(body string) *list.List {
	tokens := C.mitie_tokenize(C.CString(body))
	defer C.mitie_free(unsafe.Pointer(tokens))
	dets := C.mitie_extract_entities(this.ner, tokens)
	defer C.mitie_free(unsafe.Pointer(dets))
	num_dets := C.mitie_ner_get_num_detections(dets)
	duplicates := set.New()
	entites := list.New()
	for i := 0; i < int(num_dets); i++ {
		centity := C.get_entity(tokens, dets, C.ulong(i))
		model := C.GoString(centity.model)
		score := float64(centity.score)
		value := C.GoString(centity.value)
		key := fmt.Sprintf("%s:%s", value, model)
		if duplicates.Has(key) {
			continue
		}
		duplicates.Add(key)
		if score > 0.5 {
			entity := models.NewEntity(model, score, value)
			entites.PushBack(entity)
		}
	}
	return entites
}
Esempio n. 2
0
File: ner.go Progetto: sbl/ner
// Extract runs the extractor and returns a slice of Entities found in the
// given tokens.
func (ext *Extractor) Extract(tokens []string) ([]Entity, error) {
	ctokens := C.ner_arr_make(C.int(len(tokens)) + 1) // NULL termination
	defer C.ner_arr_free(ctokens, C.int(len(tokens))+1)
	for i, t := range tokens {
		cs := C.CString(t) // released by ner_arr_free
		C.ner_arr_set(ctokens, cs, C.int(i))
	}

	dets := C.mitie_extract_entities(ext.ner, ctokens)
	defer C.mitie_free(unsafe.Pointer(dets))
	if dets == nil {
		return nil, ErrMemory
	}

	n := int(C.mitie_ner_get_num_detections(dets))
	entities := make([]Entity, n, n)

	for i := 0; i < n; i++ {
		pos := int(C.mitie_ner_get_detection_position(dets, C.ulong(i)))
		len := int(C.mitie_ner_get_detection_length(dets, C.ulong(i)))

		entities[i] = Entity{
			Tag:   int(C.mitie_ner_get_detection_tag(dets, C.ulong(i))),
			Score: float64(C.mitie_ner_get_detection_score(dets, C.ulong(i))),
			Name:  strings.Join(tokens[pos:pos+len], " "),
			Range: Range{pos, pos + len},
		}
	}
	return entities, nil
}
Esempio n. 3
0
File: ner.go Progetto: sbl/ner
// Tokenize returns a slice that contains a tokenized copy of the input text.
func Tokenize(text string) []string {
	cs := C.CString(text)
	defer C.free(unsafe.Pointer(cs))
	ctokens := C.mitie_tokenize(cs)
	defer C.mitie_free(unsafe.Pointer(ctokens))
	i := 0
	// a hack since mitie arrays are NULL terminated.
	p := (*[1 << 30]*C.char)(unsafe.Pointer(ctokens))
	tokens := make([]string, 0, 20)
	for p[i] != nil {
		tokens = append(tokens, C.GoString(p[i]))
		i++
	}
	return tokens
}
Esempio n. 4
0
File: ner.go Progetto: sbl/ner
// Free frees the underlying used C memory.
func (ext *Extractor) Free() {
	C.mitie_free(unsafe.Pointer(ext.ner))
}
Esempio n. 5
0
func (this *MITIE) Release() {
	C.mitie_free(unsafe.Pointer(this.ner))
}