示例#1
0
文件: ner.go 项目: sbl/ner
// Extract runs the extractor and returns a slice of Entities found in the
// given tokens.
func (ext *Extractor) Extract(tokens []string) ([]Entity, error) {
	ctokens := C.ner_arr_make(C.int(len(tokens)) + 1) // NULL termination
	defer C.ner_arr_free(ctokens, C.int(len(tokens))+1)
	for i, t := range tokens {
		cs := C.CString(t) // released by ner_arr_free
		C.ner_arr_set(ctokens, cs, C.int(i))
	}

	dets := C.mitie_extract_entities(ext.ner, ctokens)
	defer C.mitie_free(unsafe.Pointer(dets))
	if dets == nil {
		return nil, ErrMemory
	}

	n := int(C.mitie_ner_get_num_detections(dets))
	entities := make([]Entity, n, n)

	for i := 0; i < n; i++ {
		pos := int(C.mitie_ner_get_detection_position(dets, C.ulong(i)))
		len := int(C.mitie_ner_get_detection_length(dets, C.ulong(i)))

		entities[i] = Entity{
			Tag:   int(C.mitie_ner_get_detection_tag(dets, C.ulong(i))),
			Score: float64(C.mitie_ner_get_detection_score(dets, C.ulong(i))),
			Name:  strings.Join(tokens[pos:pos+len], " "),
			Range: Range{pos, pos + len},
		}
	}
	return entities, nil
}
示例#2
0
func (this *MITIE) Process(body string) *list.List {
	tokens := C.mitie_tokenize(C.CString(body))
	defer C.mitie_free(unsafe.Pointer(tokens))
	dets := C.mitie_extract_entities(this.ner, tokens)
	defer C.mitie_free(unsafe.Pointer(dets))
	num_dets := C.mitie_ner_get_num_detections(dets)
	duplicates := set.New()
	entites := list.New()
	for i := 0; i < int(num_dets); i++ {
		centity := C.get_entity(tokens, dets, C.ulong(i))
		model := C.GoString(centity.model)
		score := float64(centity.score)
		value := C.GoString(centity.value)
		key := fmt.Sprintf("%s:%s", value, model)
		if duplicates.Has(key) {
			continue
		}
		duplicates.Add(key)
		if score > 0.5 {
			entity := models.NewEntity(model, score, value)
			entites.PushBack(entity)
		}
	}
	return entites
}