Ejemplo n.º 1
0
func main() {
	var args struct {
		Out string `name:"out" usage:"output path"`
	}
	cpuprofile := flag.String("cpuprofile", "", "path to write CPU profile")
	memprofile := flag.String("memprofile", "", "path to write memory profile")
	format := easy.StringChoice("fslm.format", "hash", "output format", []string{"hash", "sort"})
	scale := flag.Float64("fslm.scale", 1.5, "scale multiplier for deciding the hash table size; only active in hash format")
	easy.ParseFlagsAndArgs(&args)

	if *cpuprofile != "" {
		w := easy.MustCreate(*cpuprofile)
		pprof.StartCPUProfile(w)
		defer func() {
			pprof.StopCPUProfile()
			w.Close()
		}()
	}

	if *memprofile != "" {
		defer func() {
			w := easy.MustCreate(*memprofile)
			pprof.WriteHeapProfile(w)
			w.Close()
		}()
	}

	builder, err := fslm.FromARPA(os.Stdin)
	if err != nil {
		glog.Fatal(err)
	}

	var model CanWriteBinary

	switch *format {
	case "hash":
		model = builder.DumpHashed(*scale)
	case "sort":
		model = builder.DumpSorted()
	default:
		glog.Fatalf("unknown format %q", *format)
	}

	if err := model.WriteBinary(args.Out); err != nil {
		glog.Fatal(err)
	}
}
Ejemplo n.º 2
0
func main() {
	var args struct {
		Model string `name:"model" usage:"LM file"`
	}
	cpuprofile := flag.String("cpuprofile", "", "path to write CPU profile")
	memprofile := flag.String("memprofile", "", "path to write memory profile")
	easy.ParseFlagsAndArgs(&args)

	if *cpuprofile != "" {
		w := easy.MustCreate(*cpuprofile)
		pprof.StartCPUProfile(w)
		defer func() {
			pprof.StopCPUProfile()
			w.Close()
		}()
	}

	if *memprofile != "" {
		defer func() {
			w := easy.MustCreate(*memprofile)
			pprof.WriteHeapProfile(w)
			w.Close()
		}()
	}

	var before, after runtime.MemStats
	runtime.GC()
	runtime.ReadMemStats(&before)
	kind, modelI, file, err := fslm.FromBinary(args.Model)
	if err != nil {
		glog.Fatal("error in loading model: ", err)
	}
	defer file.Close()
	runtime.GC()
	runtime.ReadMemStats(&after)
	glog.Infof("LM memory overhead: %.2fMB", float64(after.Alloc-before.Alloc)/float64(1<<20))

	var (
		corpus                      [][]word.Id
		score                       float64
		numWords, numSents, numOOVs int
	)

	glog.Info("loading corpus took ", easy.Timed(func() { corpus = LoadCorpus(os.Stdin, modelI) }))

	numSents = len(corpus)
	for _, i := range corpus {
		numWords += len(i)
	}

	elapsed := easy.Timed(func() {
		score, numOOVs = ScoreCorpus(kind, modelI, corpus)
	})
	glog.Infof("scoring took %v; %g QPS", elapsed, float64(numSents+numWords)*float64(time.Second)/float64(elapsed))

	if numWords > 0 {
		fmt.Printf("%d sents, %d words, %d OOVs\n", numSents, numWords, numOOVs)
		fmt.Printf("logprob=%g ppl=%g ppl1=%g\n",
			score, math.Exp(-float64(score)/float64(numSents+numWords)*math.Log(10)),
			math.Exp(-float64(score)/float64(numWords)*math.Log(10)))
	}
}