示例#1
0
文件: server.go 项目: huichen/bseg
func JsonRpcServer(w http.ResponseWriter, req *http.Request) {
	// 得到要分词的文本
	text := req.URL.Query().Get("text")
	if text == "" {
		text = req.PostFormValue("text")
	}

	lines := strings.Split(text, "\n")
	tokens, segments := bseg.GetSegmentsFromText(lines)

	seg := bseg.NewBSeg()
	seg.ProcessText(tokens, segments)
	ts := seg.GetDict()

	// 整理为输出格式
	ps := []*Phrase{}
	for _, token := range ts {
		ps = append(ps, &Phrase{Text: token.Name, Count: token.Count})
	}
	response, _ := json.Marshal(&JsonResponse{Phrases: ps})
	w.Header().Set("Content-Type", "application/json")
	io.WriteString(w, string(response))
}
示例#2
0
文件: train.go 项目: huichen/bseg
func main() {
	flag.Parse()

	file, err := os.Open(*input)
	if err != nil {
		log.Fatal(err)
	}
	defer file.Close()

	log.Printf("读入文本 %s", *input)
	scanner := bufio.NewScanner(file)
	lines := []string{}
	for scanner.Scan() {
		text := scanner.Text()
		if text != "" {
			lines = append(lines, text)
		}
	}
	log.Print("文件行数", len(lines))

	tokens, segments := bseg.GetSegmentsFromText(lines)
	seg := bseg.NewBSeg()

	// 打开处理器profile文件
	if *cpuprofile != "" {
		f, err := os.Create(*cpuprofile)
		if err != nil {
			log.Fatal(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	seg.ProcessText(tokens, segments)

	seg.DumpDict(*output_dict)
}