func main() { var args struct { Out string `name:"out" usage:"output path"` } cpuprofile := flag.String("cpuprofile", "", "path to write CPU profile") memprofile := flag.String("memprofile", "", "path to write memory profile") format := easy.StringChoice("fslm.format", "hash", "output format", []string{"hash", "sort"}) scale := flag.Float64("fslm.scale", 1.5, "scale multiplier for deciding the hash table size; only active in hash format") easy.ParseFlagsAndArgs(&args) if *cpuprofile != "" { w := easy.MustCreate(*cpuprofile) pprof.StartCPUProfile(w) defer func() { pprof.StopCPUProfile() w.Close() }() } if *memprofile != "" { defer func() { w := easy.MustCreate(*memprofile) pprof.WriteHeapProfile(w) w.Close() }() } builder, err := fslm.FromARPA(os.Stdin) if err != nil { glog.Fatal(err) } var model CanWriteBinary switch *format { case "hash": model = builder.DumpHashed(*scale) case "sort": model = builder.DumpSorted() default: glog.Fatalf("unknown format %q", *format) } if err := model.WriteBinary(args.Out); err != nil { glog.Fatal(err) } }
func main() { var args struct { Model string `name:"model" usage:"LM file"` } cpuprofile := flag.String("cpuprofile", "", "path to write CPU profile") memprofile := flag.String("memprofile", "", "path to write memory profile") easy.ParseFlagsAndArgs(&args) if *cpuprofile != "" { w := easy.MustCreate(*cpuprofile) pprof.StartCPUProfile(w) defer func() { pprof.StopCPUProfile() w.Close() }() } if *memprofile != "" { defer func() { w := easy.MustCreate(*memprofile) pprof.WriteHeapProfile(w) w.Close() }() } var before, after runtime.MemStats runtime.GC() runtime.ReadMemStats(&before) kind, modelI, file, err := fslm.FromBinary(args.Model) if err != nil { glog.Fatal("error in loading model: ", err) } defer file.Close() runtime.GC() runtime.ReadMemStats(&after) glog.Infof("LM memory overhead: %.2fMB", float64(after.Alloc-before.Alloc)/float64(1<<20)) var ( corpus [][]word.Id score float64 numWords, numSents, numOOVs int ) glog.Info("loading corpus took ", easy.Timed(func() { corpus = LoadCorpus(os.Stdin, modelI) })) numSents = len(corpus) for _, i := range corpus { numWords += len(i) } elapsed := easy.Timed(func() { score, numOOVs = ScoreCorpus(kind, modelI, corpus) }) glog.Infof("scoring took %v; %g QPS", elapsed, float64(numSents+numWords)*float64(time.Second)/float64(elapsed)) if numWords > 0 { fmt.Printf("%d sents, %d words, %d OOVs\n", numSents, numWords, numOOVs) fmt.Printf("logprob=%g ppl=%g ppl1=%g\n", score, math.Exp(-float64(score)/float64(numSents+numWords)*math.Log(10)), math.Exp(-float64(score)/float64(numWords)*math.Log(10))) } }