func main() { var ( in *fasta.Reader out, profile *os.File e error ) inName := flag.String("in", "", "Filename for input to be factorised. Defaults to stdin.") outName := flag.String("out", "", "Filename for output. Defaults to stdout.") k := flag.Int("k", 8, "kmer size to use.") cat := flag.Int("cat", 5, "number of categories.") iter := flag.Int("i", 1000, "iterations.") limit := flag.Int("time", 10, "time limit for NMF.") lo := flag.Int("lo", 1, "minimum number of kmer frequency to use in NMF.") hi := flag.Float64("hi", 0.5, "maximum proportion of kmer representation to use in NMF.") sf := flag.Float64("sf", 0.01, "factor for sparcity of estimating matrices for NMF.") tol := flag.Float64("tol", 0.001, "tolerance for NMF.") threads := flag.Int("threads", 2, "number of threads to use.") seed := flag.Int64("seed", -1, "seed for random number generator (-1 uses system clock).") cpuprofile := flag.String("cpuprofile", "", "write cpu profile to this file.") help := flag.Bool("help", false, "print this usage message.") flag.Parse() if *help { flag.Usage() os.Exit(1) } runtime.GOMAXPROCS(*threads) sparse.MaxProcs = *threads fmt.Fprintf(os.Stderr, "Using %d threads.\n", runtime.GOMAXPROCS(0)) if *cpuprofile != "" { if profile, e = os.Create(*cpuprofile); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.", e) os.Exit(0) } fmt.Fprintf(os.Stderr, "Writing CPU profile data to %s\n", *cpuprofile) pprof.StartCPUProfile(profile) defer pprof.StopCPUProfile() } if *inName == "" { fmt.Fprintln(os.Stderr, "Reading sequences from stdin.") in = fasta.NewReader(os.Stdin) } else if in, e = fasta.NewReaderName(*inName); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.", e) os.Exit(0) } else { fmt.Fprintf(os.Stderr, "Reading sequence from `%s'.\n", *inName) } defer in.Close() if *outName == "" { fmt.Fprintln(os.Stderr, "Writing output to stdout.") out = os.Stdout } else if out, e = os.Create(*outName); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.", e) } else { fmt.Fprintf(os.Stderr, "Writing output to `%s'.\n", *outName) } defer out.Close() totalkmers := make(map[kmerindex.Kmer]float64) kmerlists := make([]map[kmerindex.Kmer]float64, 0) seqTable := make([]string, 0) for { if sequence, err := in.Read(); err != nil { break } else { var freqs map[kmerindex.Kmer]float64 if kindex, e := kmerindex.New(*k, sequence); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.\n", e) fmt.Fprintln(os.Stderr) os.Exit(0) } else { freqs, _ = kindex.NormalisedKmerFrequencies() kmerlists = append(kmerlists, freqs) for kmer, freq := range freqs { totalkmers[kmer] += freq } } seqTable = append(seqTable, string(sequence.ID)) } } kmerArray := make([][]float64, 0) kmerTable := make([]kmerindex.Kmer, 0) for kmer, _ := range totalkmers { var count int for _, kmerlist := range kmerlists { if kmerlist[kmer] > 0 { count++ } } if count < *lo || float64(count)/float64(len(kmerlists)) > *hi { continue } row := make([]float64, len(kmerlists)) for i, kmerlist := range kmerlists { row[i] = float64(kmerlist[kmer]) } kmerArray = append(kmerArray, row) kmerTable = append(kmerTable, kmer) } var kmerMatrix *sparse.Sparse func() { defer func() { if r := recover(); r != nil { fmt.Fprintf(os.Stderr, "Error: %v.", r) os.Exit(0) } }() kmerMatrix = sparse.Matrix(kmerArray) }() f := func(i, j int, v float64) float64 { if kmerMatrix.At(i, j) != 0 { return 1 } return 0 } nonZero := kmerMatrix.Apply(f).Sum() r, c := kmerMatrix.Dims() density := nonZero / float64(r*c) if *seed == -1 { *seed = time.Now().UnixNano() } fmt.Fprintf(os.Stderr, "Using %v as random seed.\n", *seed) rand.Seed(*seed) rows, cols := kmerMatrix.Dims() Wo := sparse.Random(rows, *cat, density**sf) Ho := sparse.Random(*cat, cols, density**sf) fmt.Fprintf(os.Stderr, "Dimensions of Kmer matrix = (%v, %v)\nDensity = %.3f %%\n%v\n", r, c, (density)*100, kmerMatrix) W, H, ok := nmf.Factors(kmerMatrix, Wo, Ho, *tol, *iter, time.Duration(*limit)*1e9) fmt.Fprintf(os.Stderr, "norm(H) = %v norm(W) = %v\n\nFinished = %v\n\n", H.Norm(matrix.Fro), W.Norm(matrix.Fro), ok) printFeature(out, kmerMatrix, W, H, seqTable, kmerTable, *k) }
func main() { var in1, in2 *fasta.Reader inName1 := flag.String("1", "", "Filename for first input.") inName2 := flag.String("2", "", "Filename for second input.") k := flag.Int("k", 6, "kmer size.") help := flag.Bool("help", false, "Print this usage message.") flag.Parse() if *help { flag.Usage() os.Exit(1) } var err error if in1, err = fasta.NewReaderName(*inName1); err != nil { fmt.Fprintf(os.Stderr, "Error: %v.", err) os.Exit(0) } defer in1.Close() if in2, err = fasta.NewReaderName(*inName2); err != nil { fmt.Fprintf(os.Stderr, "Error: %v.", err) os.Exit(0) } defer in2.Close() var ( seq1, seq2 *seq.Seq kmerFreqs1, kmerFreqs2 map[kmerindex.Kmer]float64 ok bool ) if seq1, err = in1.Read(); err != nil { os.Exit(0) } if seq2, err = in2.Read(); err != nil { os.Exit(0) } if index, err := kmerindex.New(*k, seq1); err != nil { fmt.Println(err) os.Exit(0) } else { if kmerFreqs1, ok = index.NormalisedKmerFrequencies(); !ok { fmt.Printf("Unable to determine Kmer frequences for %s\n", seq1.ID) os.Exit(0) } } if index, err := kmerindex.New(*k, seq2); err != nil { fmt.Println(err) os.Exit(0) } else { if kmerFreqs2, ok = index.NormalisedKmerFrequencies(); !ok { fmt.Printf("Unable to determine Kmer frequences for %s\n", seq2.ID) os.Exit(0) } } fmt.Printf("Kmer distance between %s and %s is %f\n", seq1.ID, seq2.ID, kmerindex.Distance(kmerFreqs1, kmerFreqs2)) }
func main() { var ( in *fasta.Reader out *os.File e error ) inName := flag.String("in", "", "Filename for input. Defaults to stdin.") outName := flag.String("out", "", "Filename for output. Defaults to stdout.") k := flag.Int("k", 6, "kmer size.") start := flag.Int("s", 0, "Start site - mandatory parameter > 0.") chunk := flag.Int("chunk", 1000, "Chunk width - < 0 indicates sequence to end.") desch := flag.Bool("desch", false, "Use diagonal base arrangement described by Deschavanne et al., otherwise use orthogonal arrangement.") help := flag.Bool("help", false, "Print this usage message.") flag.Parse() kmerindex.MinKmerLen = *k if *help { flag.Usage() os.Exit(1) } if *start == 0 { fmt.Fprintln(os.Stderr, "Must specify s > 0") flag.Usage() os.Exit(0) } if *inName == "" { in = fasta.NewReader(os.Stdin) } else if in, e = fasta.NewReaderName(*inName); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.", e) os.Exit(0) } defer in.Close() if sequence, err := in.Read(); err != nil { os.Exit(0) } else { if *chunk < 0 { *chunk = sequence.Len() - *start - 1 } fmt.Fprintf(os.Stderr, "Indexing %s\n", sequence.ID) if index, err := kmerindex.New(*k, sequence); err != nil { fmt.Println(err) os.Exit(0) } else { base := color.HSVA{0, 1, 1, 1} cgr := kmercolor.NewCGR(index, base) fmt.Fprintf(os.Stderr, "Painting %s\n", sequence.ID) cgr.Paint(kmercolor.V|kmercolor.H, *desch, *start, *chunk) fmt.Fprintf(os.Stderr, "Writing %s\n", sequence.ID) if out, e = os.Create(fmt.Sprintf("%s.png", *outName)); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.", e) } png.Encode(out, cgr) out.Close() } } }