예제 #1
0
func (this *MITIE) Process(body string) *list.List {
	tokens := C.mitie_tokenize(C.CString(body))
	defer C.mitie_free(unsafe.Pointer(tokens))
	dets := C.mitie_extract_entities(this.ner, tokens)
	defer C.mitie_free(unsafe.Pointer(dets))
	num_dets := C.mitie_ner_get_num_detections(dets)
	duplicates := set.New()
	entites := list.New()
	for i := 0; i < int(num_dets); i++ {
		centity := C.get_entity(tokens, dets, C.ulong(i))
		model := C.GoString(centity.model)
		score := float64(centity.score)
		value := C.GoString(centity.value)
		key := fmt.Sprintf("%s:%s", value, model)
		if duplicates.Has(key) {
			continue
		}
		duplicates.Add(key)
		if score > 0.5 {
			entity := models.NewEntity(model, score, value)
			entites.PushBack(entity)
		}
	}
	return entites
}
예제 #2
0
파일: ner.go 프로젝트: sbl/ner
// Tokenize returns a slice that contains a tokenized copy of the input text.
func Tokenize(text string) []string {
	cs := C.CString(text)
	defer C.free(unsafe.Pointer(cs))
	ctokens := C.mitie_tokenize(cs)
	defer C.mitie_free(unsafe.Pointer(ctokens))
	i := 0
	// a hack since mitie arrays are NULL terminated.
	p := (*[1 << 30]*C.char)(unsafe.Pointer(ctokens))
	tokens := make([]string, 0, 20)
	for p[i] != nil {
		tokens = append(tokens, C.GoString(p[i]))
		i++
	}
	return tokens
}