Ejemplo n.º 1
0
func evaluate(parser system.Parser) {
	testFile, err := os.Open(flag.Arg(1))
	defer testFile.Close()
	if err != nil {
		panic("Cannot open evaluation data")
	}

	testReader := conllx.NewReader(bufio.NewReader(testFile))

	total := 0
	found := 0
	count := 0

	for {
		s, err := testReader.ReadSentence()
		if err != nil {
			break
		}

		goldDeps, err := system.SentenceToDependencies(s)
		if err != nil {
			log.Fatal(err)
		}

		deps, err := parser.Parse(s)
		if err != nil {
			log.Fatal(err)
		}

		total += len(goldDeps)
		found += foundAttachments(goldDeps, deps)
		count++

		if count%100 == 0 {
			printAccuracy(found, total)
		}
	}

	printAccuracy(found, total)
}
Ejemplo n.º 2
0
Archivo: main.go Proyecto: postfix/dpar
func createTrainingInstances(trainer system.GreedyTrainer, collector system.InstanceCollector,
	oracleConstructor common.OracleConstructor) {
	f, err := os.Open(flag.Arg(1))
	defer f.Close()
	if err != nil {
		panic("Cannot open training data")
	}

	r := conllx.NewReader(bufio.NewReader(f))

	for {
		s, err := r.ReadSentence()
		if err != nil {
			break
		}

		goldDependencies, err := system.SentenceToDependencies(s)
		common.ExitIfError(err)

		oracle := oracleConstructor(goldDependencies)
		trainer.Parse(s, oracle)
	}

}
Ejemplo n.º 3
0
func main() {
	flag.Parse()

	if flag.NArg() != 3 {
		flag.Usage()
		os.Exit(1)
	}

	config := common.ReadConfigOrExit(flag.Arg(0))

	transitionSystem, ok := common.TransitionSystems[config.Parser.System]
	if !ok {
		log.Fatalf("Unknown transition system: %s", config.Parser.System)
	}

	oracleConstructor, ok := common.Oracles[config.Parser.System]
	if !ok {
		log.Fatalf("Unknown transition system: %s", config.Parser.System)
	}

	log.Printf("Transition system: %s", config.Parser.System)

	ilas := common.ReadIlasOrExit(config.Parser.Inputs)

	var labelNumberer *system.LabelNumberer
	if config.Parser.Transitions != "" {
		if _, err := os.Stat(config.Parser.Transitions); err == nil {
			log.Printf("Transitions filename %s exists, reusing...", config.Parser.Transitions)
			labelNumberer = common.ReadTransitionsOrExit(config.Parser.Transitions, transitionSystem)
		}
	}

	instanceWriter, err := os.Create(flag.Arg(2))
	common.ExitIfError("Cannot open instance file for writing:", err)
	defer instanceWriter.Close()
	trainDataWriter := input.NewTrainDataWriter(instanceWriter)

	layerEmbeddings := common.MustReadAllEmbeddings(config.Embeddings)

	realizer := input.NewInputVectorRealizer(ilas, layerEmbeddings, nil)

	var collector *common.WritingCollector
	if labelNumberer == nil {
		collector = common.NewWritingCollector(realizer, trainDataWriter)
	} else {
		collector = common.NewWritingCollectorWithLabelNumberer(realizer, labelNumberer, trainDataWriter)
	}

	trainer := system.NewGreedyTrainer(transitionSystem, collector)

	f, err := os.Open(flag.Arg(1))
	common.ExitIfError("Cannot open training data:", err)
	defer f.Close()

	log.Println("Creating training instances...")
	common.ProcessData(f, func(s []conllx.Token) error {
		goldDependencies, err := system.SentenceToDependencies(s)
		if err != nil {
			return fmt.Errorf("Cannot extract dependencies: %s", err.Error())
		}
		trainer.Parse(s, oracleConstructor(goldDependencies))

		return nil
	})

	if err != nil {
		common.ExitIfError("Cannot process data:", err)
	}

	if config.Parser.Transitions != "" {
		if _, err := os.Stat(config.Parser.Transitions); err != nil {
			writeTransitions(transitionSystem, collector.LabelNumberer(), config.Parser.Transitions)
		}
	}

}