func (this *NLPEngine) PrintTree(document *models.DocumentEntity) { ls := document.Sentences() for l := ls.Front(); l != nil; l = l.Next() { tr := l.Value.(*models.SentenceEntity).GetSentence().(*Sentence).pts[0] output := new(Output) out := "" output.PrintTree(&out, tr.begin(), 0) LOG.Trace(out) println(out) } }
func (this *NLPEngine) PrintList(document *models.DocumentEntity) { ls := document.Sentences() for l := ls.Front(); l != nil; l = l.Next() { for w := l.Value.(*Sentence).Front(); w != nil; w = w.Next() { item := w.Value.(*Word).getForm() + ":" for a := w.Value.(*Word).Front(); a != nil; a = a.Next() { if a.Value.(*Analysis).isSelected(0) { item += a.Value.(*Analysis).getTag() } } println(item) } } }
func (this *HttpServer) URLHandler(w http.ResponseWriter, r *http.Request) { params := r.URL.Query() url := params.Get("url") document := new(models.DocumentEntity) document.Url = url ch := make(chan *models.DocumentEntity) defer close(ch) go this.context.Engine.NLP.Workflow(document, ch) output := <-ch js := output.ToJSON() b, err := json.Marshal(js) if err != nil { w.Write([]byte(fmt.Sprintf("%s\n", err.Error()))) } else { w.Write([]byte(fmt.Sprintf("%s\n", string(b)))) } }
func (this *NLPEngine) Workflow(document *models.DocumentEntity, output chan *models.DocumentEntity) { defer func() { if r := recover(); r != nil { err, _ := r.(error) if err != nil { output <- nil //err.Error() } else { output <- nil } } }() document.Init() tokens := list.New() url := document.Url content := document.Content if url != "" && content == "" { crawler := NewDefaultCrawler() article := crawler.Analyze(url) document.Title = article.Title document.Description = article.MetaDescription document.Keywords = article.MetaKeywords document.TopImage = article.TopImage document.Content = article.CleanedText } body := StringsAppend(document.Title, document.Description, document.Keywords, document.Content) if this.tokenizer != nil { this.tokenizer.Tokenize(body, 0, tokens) } sentences := list.New() if this.splitter != nil { sid := this.splitter.OpenSession() this.splitter.Split(sid, tokens, true, sentences) this.splitter.CloseSession(sid) } for ss := sentences.Front(); ss != nil; ss = ss.Next() { s := ss.Value.(*Sentence) if this.morfo != nil { this.morfo.Analyze(s) } if this.sense != nil { this.sense.Analyze(s) } if this.tagger != nil { this.tagger.Analyze(s) } if this.shallowParser != nil { this.shallowParser.Analyze(s) } } if this.dsb != nil { this.dsb.Analyze(sentences) } entities := make(map[string]int64) for ss := sentences.Front(); ss != nil; ss = ss.Next() { se := models.NewSentenceEntity() body := "" s := ss.Value.(*Sentence) for ww := s.Front(); ww != nil; ww = ww.Next() { w := ww.Value.(*Word) a := w.Front().Value.(*Analysis) te := models.NewTokenEntity(w.getForm(), a.getLemma(), a.getTag(), a.getProb()) if a.getTag() == TAG_NP { entities[w.getForm()]++ } body += w.getForm() + " " se.AddTokenEntity(te) } body = strings.Trim(body, " ") se.SetBody(body) se.SetSentence(s) document.AddSentenceEntity(se) } tempEntities := set.New() mitieEntities := this.mitie.Process(body) for e := mitieEntities.Front(); e != nil; e = e.Next() { entity := e.Value.(*models.Entity) tempEntities.Add(entity.GetValue()) } for name, frequency := range entities { name = strings.Replace(name, "_", " ", -1) if !tempEntities.Has(name) { document.AddUnknownEntity(name, frequency) } } document.Entities = mitieEntities output <- document }