func main() { flag.Parse() if flag.NArg() != 2 { flag.Usage() os.Exit(1) } configFile, err := os.Open(flag.Arg(0)) common.ExitIfError(err) defer configFile.Close() config, err := common.ParseConfig(configFile) common.ExitIfError(err) log.Printf("Transition system: %s", config.Parser.System) if config.Parser.HashKernelSize > 0 { log.Printf("Hash kernel size: %d", config.Parser.HashKernelSize) } generator, err := common.ReadFeatures(config.Parser.Features) common.ExitIfError(err) transitionSystem, ok := common.TransitionSystems[config.Parser.System] if !ok { log.Fatalf("Unknown transition system: %s", config.Parser.System) } oracleConstructor, ok := common.Oracles[config.Parser.System] if !ok { log.Fatalf("Unknown transition system: %s", config.Parser.System) } log.Println("Creating training instances...") var collector svm.GoLinearCollector if config.Parser.HashKernelSize == 0 { collector = featureParsing(transitionSystem, generator, oracleConstructor) } else { collector = hashKernelParsing(transitionSystem, generator, oracleConstructor, config.Parser.HashKernelSize) } if *libsvmOutput != "" { writeLibSVMOutput(collector.Problem()) } if config.Parser.Model != "" { model := trainModel(config.LibLinear, collector.Problem()) err := model.Save(config.Parser.Model) common.ExitIfError(err) } writeTransitions(transitionSystem, collector, config.Parser.Transitions) log.Println("Done!") }
func writeTransitions(ts system.TransitionSystem, collector system.InstanceCollector, transitionsFilename string) { serializer, ok := ts.(system.TransitionSerializer) if !ok { log.Fatal("Transition system does not implement transition serialization") } f, err := os.Create(transitionsFilename) common.ExitIfError(err) defer f.Close() err = collector.LabelNumberer().WriteLabelNumberer(f, serializer) common.ExitIfError(err) }
func run(parser system.Parser) { inputFile, err := os.Open(flag.Arg(1)) defer inputFile.Close() if err != nil { panic("Cannot open training data") } inputReader := conllx.NewReader(bufio.NewReader(inputFile)) writer := conllx.NewWriter(os.Stdout) for { s, err := inputReader.ReadSentence() if err != nil { break } deps, err := parser.Parse(s) common.ExitIfError(err) // Clear to ensure that no dependencies in the input leak // (if they were present). for idx := range s { s[idx].SetHead(0) s[idx].SetHeadRel("NULL") } for dep := range deps { s[dep.Dependent-1].SetHead(dep.Head) s[dep.Dependent-1].SetHeadRel(dep.Relation) } writer.WriteSentence(s) } }
func trainModel(conf common.LibLinear, problem *golinear.Problem) *golinear.Model { log.Println("Training classifier...") log.Println("Constraint violation cost:", conf.Cost) param := golinear.DefaultParameters() param.Cost = conf.Cost param.SolverType = golinear.NewMCSVMCSDefault() model, err := golinear.TrainModel(param, problem) common.ExitIfError(err) return model }
func main() { flag.Parse() if flag.NArg() != 2 { flag.Usage() os.Exit(1) } configFile, err := os.Open(flag.Arg(0)) common.ExitIfError(err) defer configFile.Close() config, err := common.ParseConfig(configFile) common.ExitIfError(err) generator, err := common.ReadFeatures(config.Parser.Features) common.ExitIfError(err) transitionSystem, ok := common.TransitionSystems[config.Parser.System] if !ok { log.Fatalf("Unknown transition system: %s", config.Parser.System) } labelNumberer, err := common.ReadTransitions(config.Parser.Transitions, transitionSystem) common.ExitIfError(err) model, err := golinear.LoadModel(config.Parser.Model) common.ExitIfError(err) if config.Parser.HashKernelSize == 0 { log.Fatal("Currently only models using a hash kernel are supported") } else { hashKernelParsing(transitionSystem, generator, model, labelNumberer, config.Parser.HashKernelSize) } }
func evaluate(parser system.Parser) { testFile, err := os.Open(flag.Arg(1)) defer testFile.Close() if err != nil { panic("Cannot open training data") } testReader := conllx.NewReader(bufio.NewReader(testFile)) total := 0 found := 0 count := 0 for { s, err := testReader.ReadSentence() if err != nil { break } goldDeps, err := system.SentenceToDependencies(s) common.ExitIfError(err) deps, err := parser.Parse(s) common.ExitIfError(err) total += len(goldDeps) found += foundAttachments(goldDeps, deps) count++ if count%100 == 0 { printAccuracy(found, total) } } printAccuracy(found, total) }
func writeLibSVMOutput(problem *golinear.Problem) { f, err := os.Create(*libsvmOutput) common.ExitIfError(err) defer f.Close() problem.Iterate(func(instance *golinear.TrainingInstance) bool { fmt.Fprintf(f, "%.0f", instance.Label) for _, fv := range instance.Features { fmt.Fprintf(f, " %d:%f", fv.Index, fv.Value) } fmt.Fprintln(f) return true }) }
func createTrainingInstances(trainer system.GreedyTrainer, collector system.InstanceCollector, oracleConstructor common.OracleConstructor) { f, err := os.Open(flag.Arg(1)) defer f.Close() if err != nil { panic("Cannot open training data") } r := conllx.NewReader(bufio.NewReader(f)) for { s, err := r.ReadSentence() if err != nil { break } goldDependencies, err := system.SentenceToDependencies(s) common.ExitIfError(err) oracle := oracleConstructor(goldDependencies) trainer.Parse(s, oracle) } }