func initCommand(rnnData []byte) { rnn, err := rnn.DeserializeBidirectional(rnnData) if err != nil { panic(err) } Network = rnn }
func main() { if len(os.Args) != 3 { fmt.Fprintln(os.Stderr, "Usage: classify <rnn> <sample.wav>") os.Exit(1) } rnnData, err := ioutil.ReadFile(os.Args[1]) if err != nil { die(err) } seqFunc, err := rnn.DeserializeBidirectional(rnnData) if err != nil { die(err) } sample, err := cubewhisper.ReadAudioFile(os.Args[2]) if err != nil { die(err) } inSeq := make([]autofunc.Result, len(sample)) for i, x := range sample { inSeq[i] = &autofunc.Variable{Vector: x} } res := seqFunc.BatchSeqs([][]autofunc.Result{inSeq}) classification := ctc.PrefixSearch(res.OutputSeqs()[0], PrefixThreshold) labels := make([]cubewhisper.Label, len(classification)) for i, c := range classification { labels[i] = cubewhisper.Label(c) } fmt.Println("Raw labels:", labels) fmt.Println("Algorithm:", cubewhisper.LabelsToMoveString(labels)) }
func main() { if len(os.Args) != 3 { fmt.Fprintln(os.Stderr, "Usage: rate <rnn> <sample dir>") os.Exit(1) } rnnData, err := ioutil.ReadFile(os.Args[1]) if err != nil { die("Read RNN", err) } seqFunc, err := rnn.DeserializeBidirectional(rnnData) if err != nil { die("Deserialize RNN", err) } index, err := speechdata.LoadIndex(os.Args[2]) if err != nil { die("Load speech index", err) } log.Println("Crunching numbers...") var res results for _, sample := range index.Samples { if sample.File == "" { continue } label := cubewhisper.LabelsForMoveString(sample.Label) wavPath := filepath.Join(index.DirPath, sample.File) sampleSeq, err := cubewhisper.ReadAudioFile(wavPath) if err != nil { die("Load sample audio", err) } intLabel := make([]int, len(label)) for i, x := range label { intLabel[i] = int(x) } output := evalSample(seqFunc, sampleSeq) likelihood := ctc.LogLikelihood(output, intLabel).Output()[0] res.Likelihoods = append(res.Likelihoods, likelihood) res.SampleIDs = append(res.SampleIDs, sample.ID) } sort.Sort(&res) for i, id := range res.SampleIDs { likelihood := res.Likelihoods[i] fmt.Printf("%d. %s - %e\n", i, id, likelihood) } }
func Train(rnnFile, sampleDir string, stepSize float64) { log.Println("Loading samples...") samples, err := ReadSamples(sampleDir) if err != nil { fmt.Fprintln(os.Stderr, "Failed to read samples:", err) os.Exit(1) } var seqFunc *rnn.Bidirectional rnnData, err := ioutil.ReadFile(rnnFile) if err == nil { log.Println("Loaded network from file.") seqFunc, err = rnn.DeserializeBidirectional(rnnData) if err != nil { fmt.Fprintln(os.Stderr, "Failed to deserialize network:", err) os.Exit(1) } } else { log.Println("Created network.") seqFunc = createNetwork(samples) } crossLen := int(CrossRatio * float64(samples.Len())) log.Println("Using", samples.Len()-crossLen, "training and", crossLen, "validation samples...") // Always shuffle the samples in the same way. rand.Seed(123) sgd.ShuffleSampleSet(samples) validation := samples.Subset(0, crossLen) training := samples.Subset(crossLen, samples.Len()) gradienter := &sgd.Adam{ Gradienter: &ctc.RGradienter{ Learner: seqFunc, SeqFunc: seqFunc, MaxConcurrency: MaxConcurrency, MaxSubBatch: MaxSubBatch, }, } var epoch int toggleRegularization(seqFunc, true) sgd.SGDInteractive(gradienter, training, stepSize, BatchSize, func() bool { toggleRegularization(seqFunc, false) cost := ctc.TotalCost(seqFunc, training, CostBatchSize, MaxConcurrency) crossCost := ctc.TotalCost(seqFunc, validation, CostBatchSize, MaxConcurrency) toggleRegularization(seqFunc, true) log.Printf("Epoch %d: cost=%e cross=%e", epoch, cost, crossCost) epoch++ return true }) toggleRegularization(seqFunc, false) data, err := seqFunc.Serialize() if err != nil { fmt.Fprintln(os.Stderr, "Failed to serialize:", err) os.Exit(1) } if err := ioutil.WriteFile(rnnFile, data, 0755); err != nil { fmt.Fprintln(os.Stderr, "Failed to save:", err) os.Exit(1) } }