Ejemplo n.º 1
0
func (this *NLPEngine) PrintTree(document *models.DocumentEntity) {
	ls := document.Sentences()
	for l := ls.Front(); l != nil; l = l.Next() {
		tr := l.Value.(*models.SentenceEntity).GetSentence().(*Sentence).pts[0]
		output := new(Output)
		out := ""

		output.PrintTree(&out, tr.begin(), 0)

		LOG.Trace(out)
		println(out)
	}
}
Ejemplo n.º 2
0
func (this *NLPEngine) PrintList(document *models.DocumentEntity) {
	ls := document.Sentences()
	for l := ls.Front(); l != nil; l = l.Next() {
		for w := l.Value.(*Sentence).Front(); w != nil; w = w.Next() {
			item := w.Value.(*Word).getForm() + ":"
			for a := w.Value.(*Word).Front(); a != nil; a = a.Next() {
				if a.Value.(*Analysis).isSelected(0) {
					item += a.Value.(*Analysis).getTag()
				}
			}
			println(item)
		}
	}
}
Ejemplo n.º 3
0
func (this *HttpServer) URLHandler(w http.ResponseWriter, r *http.Request) {
	params := r.URL.Query()
	url := params.Get("url")
	document := new(models.DocumentEntity)
	document.Url = url

	ch := make(chan *models.DocumentEntity)
	defer close(ch)
	go this.context.Engine.NLP.Workflow(document, ch)
	output := <-ch

	js := output.ToJSON()
	b, err := json.Marshal(js)

	if err != nil {
		w.Write([]byte(fmt.Sprintf("%s\n", err.Error())))
	} else {
		w.Write([]byte(fmt.Sprintf("%s\n", string(b))))
	}
}
Ejemplo n.º 4
0
func (this *NLPEngine) Workflow(document *models.DocumentEntity, output chan *models.DocumentEntity) {
	defer func() {
		if r := recover(); r != nil {
			err, _ := r.(error)
			if err != nil {
				output <- nil //err.Error()
			} else {
				output <- nil
			}
		}
	}()
	document.Init()
	tokens := list.New()
	url := document.Url
	content := document.Content

	if url != "" && content == "" {
		crawler := NewDefaultCrawler()
		article := crawler.Analyze(url)
		document.Title = article.Title
		document.Description = article.MetaDescription
		document.Keywords = article.MetaKeywords
		document.TopImage = article.TopImage
		document.Content = article.CleanedText
	}

	body := StringsAppend(document.Title, document.Description, document.Keywords, document.Content)

	if this.tokenizer != nil {
		this.tokenizer.Tokenize(body, 0, tokens)
	}

	sentences := list.New()

	if this.splitter != nil {
		sid := this.splitter.OpenSession()
		this.splitter.Split(sid, tokens, true, sentences)
		this.splitter.CloseSession(sid)
	}

	for ss := sentences.Front(); ss != nil; ss = ss.Next() {
		s := ss.Value.(*Sentence)
		if this.morfo != nil {
			this.morfo.Analyze(s)
		}
		if this.sense != nil {
			this.sense.Analyze(s)
		}
		if this.tagger != nil {
			this.tagger.Analyze(s)
		}
		if this.shallowParser != nil {
			this.shallowParser.Analyze(s)
		}
	}

	if this.dsb != nil {
		this.dsb.Analyze(sentences)
	}

	entities := make(map[string]int64)

	for ss := sentences.Front(); ss != nil; ss = ss.Next() {
		se := models.NewSentenceEntity()
		body := ""
		s := ss.Value.(*Sentence)
		for ww := s.Front(); ww != nil; ww = ww.Next() {
			w := ww.Value.(*Word)
			a := w.Front().Value.(*Analysis)
			te := models.NewTokenEntity(w.getForm(), a.getLemma(), a.getTag(), a.getProb())
			if a.getTag() == TAG_NP {
				entities[w.getForm()]++
			}
			body += w.getForm() + " "
			se.AddTokenEntity(te)
		}
		body = strings.Trim(body, " ")
		se.SetBody(body)
		se.SetSentence(s)

		document.AddSentenceEntity(se)
	}

	tempEntities := set.New()

	mitieEntities := this.mitie.Process(body)
	for e := mitieEntities.Front(); e != nil; e = e.Next() {
		entity := e.Value.(*models.Entity)
		tempEntities.Add(entity.GetValue())
	}

	for name, frequency := range entities {
		name = strings.Replace(name, "_", " ", -1)
		if !tempEntities.Has(name) {
			document.AddUnknownEntity(name, frequency)
		}
	}

	document.Entities = mitieEntities
	output <- document
}