func main() { flag.Parse() if flag.NArg() != 3 { flag.Usage() os.Exit(1) } vecs := common.ReadEmbeddingsOrExit(common.Embedding{flag.Arg(0), false, false}) network := common.ReadModelOrExit(flag.Arg(1), cblas.Implementation{}) out, err := os.Create(flag.Arg(2)) common.ExitIfError("Cannot open output vectors for writing: ", err) defer out.Close() if network.Layers() != 1 { fmt.Fprintf(os.Stderr, "Weight file contains %d layers, expected 1", network.Layers()) os.Exit(1) } layer := network.Layer(0) weights := layer.W() if layer.Inputs() != uint(vecs.Size()) { fmt.Fprintf(os.Stderr, "Embedding layer and one-hot size mismatch: %d - %d", layer.Inputs(), vecs.Size()) os.Exit(1) } mergedVecs := go2vec.NewEmbeddings(int(layer.Outputs())) wordIdx := 0 vec := make([]float32, layer.Outputs()) vecs.Iterate(func(word string, vector []float32) bool { for idx := range vec { vec[idx] = weights[uint(idx)*layer.Inputs()+uint(wordIdx)] } mergedVecs.Put(word, vec) wordIdx++ return true }) writer := bufio.NewWriter(out) mergedVecs.Write(writer) writer.Flush() }
func main() { flag.Parse() if flag.NArg() != 1 { flag.Usage() os.Exit(1) } config := common.ReadConfigOrExit(flag.Arg(0)) transitionSystem, ok := common.TransitionSystems[config.Parser.System] if !ok { log.Fatalf("Unknown transition system: %s", config.Parser.System) } labelNumberer := common.ReadTransitionsOrExit(config.Parser.Transitions, transitionSystem) var layers []*caffe.LayerParameter layers = []*caffe.LayerParameter{ createDataLayer("test/train-lmdb", caffe.Phase_TRAIN, caffe.DataParameter_LMDB), createDataLayer("test/validation-lmdb", caffe.Phase_TEST, caffe.DataParameter_LMDB), createDropoutLayer("data_dropout", "data", "data", 0.1), } ilas := common.ReadIlasOrExit(config.Parser.Inputs) vecs := common.ReadEmbeddingsOrExit(config.Embeddings.Word) tvecs := common.ReadEmbeddingsOrExit(config.Embeddings.Tag) rvecs := common.ReadEmbeddingsOrExit(config.Embeddings.DepRel) fvecs := common.ReadEmbeddingsOrExit(config.Embeddings.Feature) cvecs := common.ReadEmbeddingsOrExit(config.Embeddings.Char) layerVecs := map[addr.Layer]*go2vec.Embeddings{ addr.TOKEN: vecs, addr.TAG: tvecs, addr.DEPREL: rvecs, addr.FEATURE: fvecs, addr.CHAR: cvecs, } wordSplits, wordNames := splitLayers(layerVecs, ilas, addr.TOKEN, "words", *wordEmbeddings) tagSplits, tagNames := splitLayers(layerVecs, ilas, addr.TAG, "tags", *tagEmbeddings) relSplits, relNames := splitLayers(layerVecs, ilas, addr.DEPREL, "deprels", *deprelEmbeddings) featureSplits, featureNames := splitLayers(layerVecs, ilas, addr.FEATURE, "features", *featureEmbeddings) charSplits, charNames := splitCharLayers(layerVecs, ilas, "chars") splits := make([]uint, 0, len(wordSplits)+len(tagSplits)+len(relSplits)) splits = append(splits, wordSplits...) splits = append(splits, tagSplits...) splits = append(splits, relSplits...) splits = append(splits, featureSplits...) splits = append(splits, charSplits...) splitNames := make([]string, 0, len(wordNames)+len(tagNames)+len(relNames)+len(featureNames)) splitNames = append(splitNames, wordNames...) splitNames = append(splitNames, tagNames...) splitNames = append(splitNames, relNames...) splitNames = append(splitNames, featureNames...) splitNames = append(splitNames, charNames...) layers = append(layers, createSliceLayer("slice", "data", splits, splitNames)) concatLayers := make([]string, 0) if *wordEmbeddings { layers = append(layers, createEmbeddingLayers("words", "wordembed", wordSplits, 50, false)...) concatLayers = append(concatLayers, names("wordembed", wordSplits)...) } else { layers = append(layers, createActivationLayer("flat_words", "words", "flat_words", "Flatten")) concatLayers = append(concatLayers, "flat_words") } if *tagEmbeddings { layers = append(layers, createEmbeddingLayers("tags", "tagembed", tagSplits, 50, false)...) concatLayers = append(concatLayers, names("tagembed", tagSplits)...) } else { layers = append(layers, createActivationLayer("flat_tags", "tags", "flat_tags", "Flatten")) concatLayers = append(concatLayers, "flat_tags") } if *deprelEmbeddings { layers = append(layers, createEmbeddingLayers("deprels", "relembed", relSplits, 50, false)...) concatLayers = append(concatLayers, names("relembed", relSplits)...) } else { layers = append(layers, createActivationLayer("flat_deprels", "deprels", "flat_deprels", "Flatten")) concatLayers = append(concatLayers, "flat_deprels") } if *featureEmbeddings { layers = append(layers, createEmbeddingLayers("features", "featureembed", featureSplits, 50, false)...) concatLayers = append(concatLayers, names("featureembed", relSplits)...) } else if layerVecs[addr.FEATURE] != nil { layers = append(layers, createActivationLayer("flat_features", "features", "flat_features", "Flatten")) concatLayers = append(concatLayers, "flat_features") } layers = append(layers, createEmbeddingLayers("chars", "morph", charSplits, 210, true)...) for _, name := range names("morph", charSplits) { layers = append(layers, createActivationLayer(fmt.Sprintf("%s_activation", name), name, name, "Sigmoid")) } concatLayers = append(concatLayers, names("morph", charSplits)...) layers = append(layers, createConcatLayer("concat", "concat", concatLayers), createInnerProductLayer("ip", "concat", "ip", 200), createActivationLayer("tanh", "ip", "ip", "TanH"), createDropoutLayer("dropout", "ip", "ip", 0.05), createInnerProductLayer("ip2", "ip", "ip2", uint32(labelNumberer.Size())), createLossLayer("softmax", "ip2", "label", "softmax", "SoftmaxWithLoss"), createAccuracyLayer("accuracy", "ip2", "label", "accuracy", "Accuracy"), ) net := &caffe.NetParameter{ Name: proto.String("dparnn-net"), Layer: layers, } proto.MarshalText(os.Stdout, net) }