func main() { flag.Parse() if flag.NArg() != 1 && flag.NArg() != 2 { flag.Usage() os.Exit(1) } config := common.ReadConfigOrExit(flag.Arg(0)) transitionSystem, ok := common.TransitionSystems[config.Parser.System] if !ok { log.Fatalf("Unknown transition system: %s", config.Parser.System) } blasImpl, ok := blasImpls[*blas] if !ok { log.Fatalf("Unknown blas implementation: %s", *blas) } labelNumberer := common.ReadTransitionsOrExit(config.Parser.Transitions, transitionSystem) parseNetwork := common.ReadModelOrExit(config.Parser.Model, blasImpl) var morphNetwork *network.Network if len(config.Parser.MorphModel) != 0 { morphNetwork = common.ReadModelOrExit(config.Parser.MorphModel, blasImpl) } ilas := common.ReadIlasOrExit(config.Parser.Inputs) normalizer := common.ReadNormalizerOrExit(config.Parser.Normalisation) layerEmbeddings := common.MustReadAllEmbeddings(config.Embeddings) realizer := input.NewInputVectorRealizer(ilas, layerEmbeddings, normalizer) var guide system.Guide if morphNetwork != nil { log.Print("Using morphology") guide = network.NewMorphGuide(parseNetwork, morphNetwork, *labelNumberer, realizer) } else { guide = network.NewGuide(parseNetwork, *labelNumberer, realizer) } parser := system.NewGreedyParser(transitionSystem, guide) if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } start := time.Now() run(parser) elapsed := time.Since(start) log.Printf("Parsing took %s", elapsed) }
func main() { flag.Parse() if flag.NArg() != 3 { flag.Usage() os.Exit(1) } config := common.ReadConfigOrExit(flag.Arg(0)) transitionSystem, ok := common.TransitionSystems[config.Parser.System] if !ok { log.Fatalf("Unknown transition system: %s", config.Parser.System) } oracleConstructor, ok := common.Oracles[config.Parser.System] if !ok { log.Fatalf("Unknown transition system: %s", config.Parser.System) } log.Printf("Transition system: %s", config.Parser.System) ilas := common.ReadIlasOrExit(config.Parser.Inputs) var labelNumberer *system.LabelNumberer if config.Parser.Transitions != "" { if _, err := os.Stat(config.Parser.Transitions); err == nil { log.Printf("Transitions filename %s exists, reusing...", config.Parser.Transitions) labelNumberer = common.ReadTransitionsOrExit(config.Parser.Transitions, transitionSystem) } } instanceWriter, err := os.Create(flag.Arg(2)) common.ExitIfError("Cannot open instance file for writing:", err) defer instanceWriter.Close() trainDataWriter := input.NewTrainDataWriter(instanceWriter) layerEmbeddings := common.MustReadAllEmbeddings(config.Embeddings) realizer := input.NewInputVectorRealizer(ilas, layerEmbeddings, nil) var collector *common.WritingCollector if labelNumberer == nil { collector = common.NewWritingCollector(realizer, trainDataWriter) } else { collector = common.NewWritingCollectorWithLabelNumberer(realizer, labelNumberer, trainDataWriter) } trainer := system.NewGreedyTrainer(transitionSystem, collector) f, err := os.Open(flag.Arg(1)) common.ExitIfError("Cannot open training data:", err) defer f.Close() log.Println("Creating training instances...") common.ProcessData(f, func(s []conllx.Token) error { goldDependencies, err := system.SentenceToDependencies(s) if err != nil { return fmt.Errorf("Cannot extract dependencies: %s", err.Error()) } trainer.Parse(s, oracleConstructor(goldDependencies)) return nil }) if err != nil { common.ExitIfError("Cannot process data:", err) } if config.Parser.Transitions != "" { if _, err := os.Stat(config.Parser.Transitions); err != nil { writeTransitions(transitionSystem, collector.LabelNumberer(), config.Parser.Transitions) } } }
func main() { flag.Parse() if flag.NArg() != 1 { flag.Usage() os.Exit(1) } config := common.ReadConfigOrExit(flag.Arg(0)) transitionSystem, ok := common.TransitionSystems[config.Parser.System] if !ok { log.Fatalf("Unknown transition system: %s", config.Parser.System) } labelNumberer := common.ReadTransitionsOrExit(config.Parser.Transitions, transitionSystem) var layers []*caffe.LayerParameter layers = []*caffe.LayerParameter{ createDataLayer("test/train-lmdb", caffe.Phase_TRAIN, caffe.DataParameter_LMDB), createDataLayer("test/validation-lmdb", caffe.Phase_TEST, caffe.DataParameter_LMDB), createDropoutLayer("data_dropout", "data", "data", 0.1), } ilas := common.ReadIlasOrExit(config.Parser.Inputs) vecs := common.ReadEmbeddingsOrExit(config.Embeddings.Word) tvecs := common.ReadEmbeddingsOrExit(config.Embeddings.Tag) rvecs := common.ReadEmbeddingsOrExit(config.Embeddings.DepRel) fvecs := common.ReadEmbeddingsOrExit(config.Embeddings.Feature) cvecs := common.ReadEmbeddingsOrExit(config.Embeddings.Char) layerVecs := map[addr.Layer]*go2vec.Embeddings{ addr.TOKEN: vecs, addr.TAG: tvecs, addr.DEPREL: rvecs, addr.FEATURE: fvecs, addr.CHAR: cvecs, } wordSplits, wordNames := splitLayers(layerVecs, ilas, addr.TOKEN, "words", *wordEmbeddings) tagSplits, tagNames := splitLayers(layerVecs, ilas, addr.TAG, "tags", *tagEmbeddings) relSplits, relNames := splitLayers(layerVecs, ilas, addr.DEPREL, "deprels", *deprelEmbeddings) featureSplits, featureNames := splitLayers(layerVecs, ilas, addr.FEATURE, "features", *featureEmbeddings) charSplits, charNames := splitCharLayers(layerVecs, ilas, "chars") splits := make([]uint, 0, len(wordSplits)+len(tagSplits)+len(relSplits)) splits = append(splits, wordSplits...) splits = append(splits, tagSplits...) splits = append(splits, relSplits...) splits = append(splits, featureSplits...) splits = append(splits, charSplits...) splitNames := make([]string, 0, len(wordNames)+len(tagNames)+len(relNames)+len(featureNames)) splitNames = append(splitNames, wordNames...) splitNames = append(splitNames, tagNames...) splitNames = append(splitNames, relNames...) splitNames = append(splitNames, featureNames...) splitNames = append(splitNames, charNames...) layers = append(layers, createSliceLayer("slice", "data", splits, splitNames)) concatLayers := make([]string, 0) if *wordEmbeddings { layers = append(layers, createEmbeddingLayers("words", "wordembed", wordSplits, 50, false)...) concatLayers = append(concatLayers, names("wordembed", wordSplits)...) } else { layers = append(layers, createActivationLayer("flat_words", "words", "flat_words", "Flatten")) concatLayers = append(concatLayers, "flat_words") } if *tagEmbeddings { layers = append(layers, createEmbeddingLayers("tags", "tagembed", tagSplits, 50, false)...) concatLayers = append(concatLayers, names("tagembed", tagSplits)...) } else { layers = append(layers, createActivationLayer("flat_tags", "tags", "flat_tags", "Flatten")) concatLayers = append(concatLayers, "flat_tags") } if *deprelEmbeddings { layers = append(layers, createEmbeddingLayers("deprels", "relembed", relSplits, 50, false)...) concatLayers = append(concatLayers, names("relembed", relSplits)...) } else { layers = append(layers, createActivationLayer("flat_deprels", "deprels", "flat_deprels", "Flatten")) concatLayers = append(concatLayers, "flat_deprels") } if *featureEmbeddings { layers = append(layers, createEmbeddingLayers("features", "featureembed", featureSplits, 50, false)...) concatLayers = append(concatLayers, names("featureembed", relSplits)...) } else if layerVecs[addr.FEATURE] != nil { layers = append(layers, createActivationLayer("flat_features", "features", "flat_features", "Flatten")) concatLayers = append(concatLayers, "flat_features") } layers = append(layers, createEmbeddingLayers("chars", "morph", charSplits, 210, true)...) for _, name := range names("morph", charSplits) { layers = append(layers, createActivationLayer(fmt.Sprintf("%s_activation", name), name, name, "Sigmoid")) } concatLayers = append(concatLayers, names("morph", charSplits)...) layers = append(layers, createConcatLayer("concat", "concat", concatLayers), createInnerProductLayer("ip", "concat", "ip", 200), createActivationLayer("tanh", "ip", "ip", "TanH"), createDropoutLayer("dropout", "ip", "ip", 0.05), createInnerProductLayer("ip2", "ip", "ip2", uint32(labelNumberer.Size())), createLossLayer("softmax", "ip2", "label", "softmax", "SoftmaxWithLoss"), createAccuracyLayer("accuracy", "ip2", "label", "accuracy", "Accuracy"), ) net := &caffe.NetParameter{ Name: proto.String("dparnn-net"), Layer: layers, } proto.MarshalText(os.Stdout, net) }