Exemple #1
0
func main() {
	flag.Parse()

	if flag.NArg() != 1 && flag.NArg() != 2 {
		flag.Usage()
		os.Exit(1)
	}

	config := common.ReadConfigOrExit(flag.Arg(0))

	transitionSystem, ok := common.TransitionSystems[config.Parser.System]
	if !ok {
		log.Fatalf("Unknown transition system: %s", config.Parser.System)
	}

	blasImpl, ok := blasImpls[*blas]
	if !ok {
		log.Fatalf("Unknown blas implementation: %s", *blas)
	}

	labelNumberer := common.ReadTransitionsOrExit(config.Parser.Transitions, transitionSystem)
	parseNetwork := common.ReadModelOrExit(config.Parser.Model, blasImpl)

	var morphNetwork *network.Network
	if len(config.Parser.MorphModel) != 0 {
		morphNetwork = common.ReadModelOrExit(config.Parser.MorphModel, blasImpl)
	}

	ilas := common.ReadIlasOrExit(config.Parser.Inputs)
	normalizer := common.ReadNormalizerOrExit(config.Parser.Normalisation)

	layerEmbeddings := common.MustReadAllEmbeddings(config.Embeddings)

	realizer := input.NewInputVectorRealizer(ilas, layerEmbeddings, normalizer)

	var guide system.Guide
	if morphNetwork != nil {
		log.Print("Using morphology")
		guide = network.NewMorphGuide(parseNetwork, morphNetwork, *labelNumberer, realizer)
	} else {
		guide = network.NewGuide(parseNetwork, *labelNumberer, realizer)
	}
	parser := system.NewGreedyParser(transitionSystem, guide)

	if *cpuprofile != "" {
		f, err := os.Create(*cpuprofile)
		if err != nil {
			log.Fatal(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	start := time.Now()
	run(parser)
	elapsed := time.Since(start)
	log.Printf("Parsing took %s", elapsed)
}
Exemple #2
0
func main() {
	flag.Parse()

	if flag.NArg() != 3 {
		flag.Usage()
		os.Exit(1)
	}

	config := common.ReadConfigOrExit(flag.Arg(0))

	if !config.Embeddings.Word.NormalizeInput {
		log.Println("Token layer inputs will not be normalized")
	}

	if !config.Embeddings.Tag.NormalizeInput {
		log.Println("Tag layer inputs will not be normalized")
	}

	if !config.Embeddings.DepRel.NormalizeInput {
		log.Println("Dependency layer inputs will not be normalized")
	}

	if !config.Embeddings.Feature.NormalizeInput {
		log.Println("Feature layer inputs will not be normalized")
	}

	if !config.Embeddings.Char.NormalizeInput {
		log.Println("Character layer inputs will not be normalized")
	}

	var normalizer *input.Normalizer

	normFilename := config.Parser.Normalisation
	if fileExists(normFilename) {
		log.Printf("Read normalization parameters from %s", normFilename)
		normalizer = common.ReadNormalizerOrExit(normFilename)
	} else {
		log.Print("Extracting normalization parameters from data")
		acc := input.NewAccumulator(normLayers(config), func() normalization.Accumulator { return normalization.NewVarianceAccumulator() })
		err := extractParameters(flag.Arg(1), acc)
		common.ExitIfError("Error extracting normalizer parameters: ", err)

		normalizer = acc.Normalizer()
		err = writeNormalizer(normFilename, normalizer)
		common.ExitIfError("Error writing normalizer parameters: ", err)
	}

	log.Printf("Normalizing data from %s and writing to %s", flag.Arg(1), flag.Arg(2))
	normalizeData(normalizer, flag.Arg(1), flag.Arg(2))
}
Exemple #3
0
func main() {
	flag.Parse()

	if flag.NArg() != 3 {
		flag.Usage()
		os.Exit(1)
	}

	config := common.ReadConfigOrExit(flag.Arg(0))

	transitionSystem, ok := common.TransitionSystems[config.Parser.System]
	if !ok {
		log.Fatalf("Unknown transition system: %s", config.Parser.System)
	}

	oracleConstructor, ok := common.Oracles[config.Parser.System]
	if !ok {
		log.Fatalf("Unknown transition system: %s", config.Parser.System)
	}

	log.Printf("Transition system: %s", config.Parser.System)

	ilas := common.ReadIlasOrExit(config.Parser.Inputs)

	var labelNumberer *system.LabelNumberer
	if config.Parser.Transitions != "" {
		if _, err := os.Stat(config.Parser.Transitions); err == nil {
			log.Printf("Transitions filename %s exists, reusing...", config.Parser.Transitions)
			labelNumberer = common.ReadTransitionsOrExit(config.Parser.Transitions, transitionSystem)
		}
	}

	instanceWriter, err := os.Create(flag.Arg(2))
	common.ExitIfError("Cannot open instance file for writing:", err)
	defer instanceWriter.Close()
	trainDataWriter := input.NewTrainDataWriter(instanceWriter)

	layerEmbeddings := common.MustReadAllEmbeddings(config.Embeddings)

	realizer := input.NewInputVectorRealizer(ilas, layerEmbeddings, nil)

	var collector *common.WritingCollector
	if labelNumberer == nil {
		collector = common.NewWritingCollector(realizer, trainDataWriter)
	} else {
		collector = common.NewWritingCollectorWithLabelNumberer(realizer, labelNumberer, trainDataWriter)
	}

	trainer := system.NewGreedyTrainer(transitionSystem, collector)

	f, err := os.Open(flag.Arg(1))
	common.ExitIfError("Cannot open training data:", err)
	defer f.Close()

	log.Println("Creating training instances...")
	common.ProcessData(f, func(s []conllx.Token) error {
		goldDependencies, err := system.SentenceToDependencies(s)
		if err != nil {
			return fmt.Errorf("Cannot extract dependencies: %s", err.Error())
		}
		trainer.Parse(s, oracleConstructor(goldDependencies))

		return nil
	})

	if err != nil {
		common.ExitIfError("Cannot process data:", err)
	}

	if config.Parser.Transitions != "" {
		if _, err := os.Stat(config.Parser.Transitions); err != nil {
			writeTransitions(transitionSystem, collector.LabelNumberer(), config.Parser.Transitions)
		}
	}

}
Exemple #4
0
func main() {
	flag.Parse()
	if flag.NArg() != 1 {
		flag.Usage()
		os.Exit(1)
	}

	config := common.ReadConfigOrExit(flag.Arg(0))

	transitionSystem, ok := common.TransitionSystems[config.Parser.System]
	if !ok {
		log.Fatalf("Unknown transition system: %s", config.Parser.System)
	}

	labelNumberer := common.ReadTransitionsOrExit(config.Parser.Transitions, transitionSystem)

	var layers []*caffe.LayerParameter
	layers = []*caffe.LayerParameter{
		createDataLayer("test/train-lmdb", caffe.Phase_TRAIN, caffe.DataParameter_LMDB),
		createDataLayer("test/validation-lmdb", caffe.Phase_TEST, caffe.DataParameter_LMDB),
		createDropoutLayer("data_dropout", "data", "data", 0.1),
	}

	ilas := common.ReadIlasOrExit(config.Parser.Inputs)
	vecs := common.ReadEmbeddingsOrExit(config.Embeddings.Word)
	tvecs := common.ReadEmbeddingsOrExit(config.Embeddings.Tag)
	rvecs := common.ReadEmbeddingsOrExit(config.Embeddings.DepRel)
	fvecs := common.ReadEmbeddingsOrExit(config.Embeddings.Feature)
	cvecs := common.ReadEmbeddingsOrExit(config.Embeddings.Char)

	layerVecs := map[addr.Layer]*go2vec.Embeddings{
		addr.TOKEN:   vecs,
		addr.TAG:     tvecs,
		addr.DEPREL:  rvecs,
		addr.FEATURE: fvecs,
		addr.CHAR:    cvecs,
	}

	wordSplits, wordNames := splitLayers(layerVecs, ilas, addr.TOKEN, "words", *wordEmbeddings)
	tagSplits, tagNames := splitLayers(layerVecs, ilas, addr.TAG, "tags", *tagEmbeddings)
	relSplits, relNames := splitLayers(layerVecs, ilas, addr.DEPREL, "deprels", *deprelEmbeddings)
	featureSplits, featureNames := splitLayers(layerVecs, ilas, addr.FEATURE, "features", *featureEmbeddings)
	charSplits, charNames := splitCharLayers(layerVecs, ilas, "chars")

	splits := make([]uint, 0, len(wordSplits)+len(tagSplits)+len(relSplits))
	splits = append(splits, wordSplits...)
	splits = append(splits, tagSplits...)
	splits = append(splits, relSplits...)
	splits = append(splits, featureSplits...)
	splits = append(splits, charSplits...)

	splitNames := make([]string, 0, len(wordNames)+len(tagNames)+len(relNames)+len(featureNames))
	splitNames = append(splitNames, wordNames...)
	splitNames = append(splitNames, tagNames...)
	splitNames = append(splitNames, relNames...)
	splitNames = append(splitNames, featureNames...)
	splitNames = append(splitNames, charNames...)

	layers = append(layers, createSliceLayer("slice", "data", splits, splitNames))

	concatLayers := make([]string, 0)

	if *wordEmbeddings {
		layers = append(layers, createEmbeddingLayers("words", "wordembed", wordSplits, 50, false)...)
		concatLayers = append(concatLayers, names("wordembed", wordSplits)...)
	} else {
		layers = append(layers, createActivationLayer("flat_words", "words", "flat_words", "Flatten"))
		concatLayers = append(concatLayers, "flat_words")
	}

	if *tagEmbeddings {
		layers = append(layers, createEmbeddingLayers("tags", "tagembed", tagSplits, 50, false)...)
		concatLayers = append(concatLayers, names("tagembed", tagSplits)...)
	} else {
		layers = append(layers, createActivationLayer("flat_tags", "tags", "flat_tags", "Flatten"))
		concatLayers = append(concatLayers, "flat_tags")
	}

	if *deprelEmbeddings {
		layers = append(layers, createEmbeddingLayers("deprels", "relembed", relSplits, 50, false)...)
		concatLayers = append(concatLayers, names("relembed", relSplits)...)
	} else {
		layers = append(layers, createActivationLayer("flat_deprels", "deprels", "flat_deprels", "Flatten"))
		concatLayers = append(concatLayers, "flat_deprels")
	}

	if *featureEmbeddings {
		layers = append(layers, createEmbeddingLayers("features", "featureembed", featureSplits, 50, false)...)
		concatLayers = append(concatLayers, names("featureembed", relSplits)...)
	} else if layerVecs[addr.FEATURE] != nil {
		layers = append(layers, createActivationLayer("flat_features", "features", "flat_features", "Flatten"))
		concatLayers = append(concatLayers, "flat_features")
	}

	layers = append(layers, createEmbeddingLayers("chars", "morph", charSplits, 210, true)...)
	for _, name := range names("morph", charSplits) {
		layers = append(layers, createActivationLayer(fmt.Sprintf("%s_activation", name), name, name, "Sigmoid"))
	}
	concatLayers = append(concatLayers, names("morph", charSplits)...)

	layers = append(layers,
		createConcatLayer("concat", "concat", concatLayers),
		createInnerProductLayer("ip", "concat", "ip", 200),
		createActivationLayer("tanh", "ip", "ip", "TanH"),
		createDropoutLayer("dropout", "ip", "ip", 0.05),
		createInnerProductLayer("ip2", "ip", "ip2", uint32(labelNumberer.Size())),
		createLossLayer("softmax", "ip2", "label", "softmax", "SoftmaxWithLoss"),
		createAccuracyLayer("accuracy", "ip2", "label", "accuracy", "Accuracy"),
	)

	net := &caffe.NetParameter{
		Name:  proto.String("dparnn-net"),
		Layer: layers,
	}

	proto.MarshalText(os.Stdout, net)
}