Esempio n. 1
0
func main() {
	var (
		in           *fasta.Reader
		out, profile *os.File
		e            error
	)

	inName := flag.String("in", "", "Filename for input to be factorised. Defaults to stdin.")
	outName := flag.String("out", "", "Filename for output. Defaults to stdout.")
	k := flag.Int("k", 8, "kmer size to use.")
	cat := flag.Int("cat", 5, "number of categories.")
	iter := flag.Int("i", 1000, "iterations.")
	limit := flag.Int("time", 10, "time limit for NMF.")
	lo := flag.Int("lo", 1, "minimum number of kmer frequency to use in NMF.")
	hi := flag.Float64("hi", 0.5, "maximum proportion of kmer representation to use in NMF.")
	sf := flag.Float64("sf", 0.01, "factor for sparcity of estimating matrices for NMF.")
	tol := flag.Float64("tol", 0.001, "tolerance for NMF.")
	threads := flag.Int("threads", 2, "number of threads to use.")
	seed := flag.Int64("seed", -1, "seed for random number generator (-1 uses system clock).")
	cpuprofile := flag.String("cpuprofile", "", "write cpu profile to this file.")
	help := flag.Bool("help", false, "print this usage message.")

	flag.Parse()

	if *help {
		flag.Usage()
		os.Exit(1)
	}

	runtime.GOMAXPROCS(*threads)
	sparse.MaxProcs = *threads
	fmt.Fprintf(os.Stderr, "Using %d threads.\n", runtime.GOMAXPROCS(0))
	if *cpuprofile != "" {
		if profile, e = os.Create(*cpuprofile); e != nil {
			fmt.Fprintf(os.Stderr, "Error: %v.", e)
			os.Exit(0)
		}
		fmt.Fprintf(os.Stderr, "Writing CPU profile data to %s\n", *cpuprofile)
		pprof.StartCPUProfile(profile)
		defer pprof.StopCPUProfile()
	}

	if *inName == "" {
		fmt.Fprintln(os.Stderr, "Reading sequences from stdin.")
		in = fasta.NewReader(os.Stdin)
	} else if in, e = fasta.NewReaderName(*inName); e != nil {
		fmt.Fprintf(os.Stderr, "Error: %v.", e)
		os.Exit(0)
	} else {
		fmt.Fprintf(os.Stderr, "Reading sequence from `%s'.\n", *inName)
	}
	defer in.Close()

	if *outName == "" {
		fmt.Fprintln(os.Stderr, "Writing output to stdout.")
		out = os.Stdout
	} else if out, e = os.Create(*outName); e != nil {
		fmt.Fprintf(os.Stderr, "Error: %v.", e)
	} else {
		fmt.Fprintf(os.Stderr, "Writing output to `%s'.\n", *outName)
	}
	defer out.Close()

	totalkmers := make(map[kmerindex.Kmer]float64)
	kmerlists := make([]map[kmerindex.Kmer]float64, 0)
	seqTable := make([]string, 0)

	for {
		if sequence, err := in.Read(); err != nil {
			break
		} else {
			var freqs map[kmerindex.Kmer]float64
			if kindex, e := kmerindex.New(*k, sequence); e != nil {
				fmt.Fprintf(os.Stderr, "Error: %v.\n", e)
				fmt.Fprintln(os.Stderr)
				os.Exit(0)
			} else {
				freqs, _ = kindex.NormalisedKmerFrequencies()
				kmerlists = append(kmerlists, freqs)
				for kmer, freq := range freqs {
					totalkmers[kmer] += freq
				}
			}
			seqTable = append(seqTable, string(sequence.ID))
		}
	}

	kmerArray := make([][]float64, 0)
	kmerTable := make([]kmerindex.Kmer, 0)

	for kmer, _ := range totalkmers {
		var count int
		for _, kmerlist := range kmerlists {
			if kmerlist[kmer] > 0 {
				count++
			}
		}
		if count < *lo || float64(count)/float64(len(kmerlists)) > *hi {
			continue
		}
		row := make([]float64, len(kmerlists))
		for i, kmerlist := range kmerlists {
			row[i] = float64(kmerlist[kmer])
		}
		kmerArray = append(kmerArray, row)
		kmerTable = append(kmerTable, kmer)
	}

	var kmerMatrix *sparse.Sparse
	func() {
		defer func() {
			if r := recover(); r != nil {
				fmt.Fprintf(os.Stderr, "Error: %v.", r)
				os.Exit(0)
			}
		}()
		kmerMatrix = sparse.Matrix(kmerArray)
	}()

	f := func(i, j int, v float64) float64 {
		if kmerMatrix.At(i, j) != 0 {
			return 1
		}
		return 0
	}
	nonZero := kmerMatrix.Apply(f).Sum()

	r, c := kmerMatrix.Dims()
	density := nonZero / float64(r*c)

	if *seed == -1 {
		*seed = time.Now().UnixNano()
	}
	fmt.Fprintf(os.Stderr, "Using %v as random seed.\n", *seed)
	rand.Seed(*seed)

	rows, cols := kmerMatrix.Dims()
	Wo := sparse.Random(rows, *cat, density**sf)
	Ho := sparse.Random(*cat, cols, density**sf)

	fmt.Fprintf(os.Stderr, "Dimensions of Kmer matrix = (%v, %v)\nDensity = %.3f %%\n%v\n", r, c, (density)*100, kmerMatrix)

	W, H, ok := nmf.Factors(kmerMatrix, Wo, Ho, *tol, *iter, time.Duration(*limit)*1e9)

	fmt.Fprintf(os.Stderr, "norm(H) = %v norm(W) = %v\n\nFinished = %v\n\n", H.Norm(matrix.Fro), W.Norm(matrix.Fro), ok)

	printFeature(out, kmerMatrix, W, H, seqTable, kmerTable, *k)
}
Esempio n. 2
0
func main() {
	var in1, in2 *fasta.Reader

	inName1 := flag.String("1", "", "Filename for first input.")
	inName2 := flag.String("2", "", "Filename for second input.")
	k := flag.Int("k", 6, "kmer size.")
	help := flag.Bool("help", false, "Print this usage message.")

	flag.Parse()

	if *help {
		flag.Usage()
		os.Exit(1)
	}

	var err error

	if in1, err = fasta.NewReaderName(*inName1); err != nil {
		fmt.Fprintf(os.Stderr, "Error: %v.", err)
		os.Exit(0)
	}
	defer in1.Close()

	if in2, err = fasta.NewReaderName(*inName2); err != nil {
		fmt.Fprintf(os.Stderr, "Error: %v.", err)
		os.Exit(0)
	}
	defer in2.Close()

	var (
		seq1, seq2             *seq.Seq
		kmerFreqs1, kmerFreqs2 map[kmerindex.Kmer]float64
		ok                     bool
	)

	if seq1, err = in1.Read(); err != nil {
		os.Exit(0)
	}
	if seq2, err = in2.Read(); err != nil {
		os.Exit(0)
	}

	if index, err := kmerindex.New(*k, seq1); err != nil {
		fmt.Println(err)
		os.Exit(0)
	} else {
		if kmerFreqs1, ok = index.NormalisedKmerFrequencies(); !ok {
			fmt.Printf("Unable to determine Kmer frequences for %s\n", seq1.ID)
			os.Exit(0)
		}
	}
	if index, err := kmerindex.New(*k, seq2); err != nil {
		fmt.Println(err)
		os.Exit(0)
	} else {
		if kmerFreqs2, ok = index.NormalisedKmerFrequencies(); !ok {
			fmt.Printf("Unable to determine Kmer frequences for %s\n", seq2.ID)
			os.Exit(0)
		}
	}

	fmt.Printf("Kmer distance between %s and %s is %f\n", seq1.ID, seq2.ID, kmerindex.Distance(kmerFreqs1, kmerFreqs2))
}
Esempio n. 3
0
func main() {
	var (
		in  *fasta.Reader
		out *os.File
		e   error
	)

	inName := flag.String("in", "", "Filename for input. Defaults to stdin.")
	outName := flag.String("out", "", "Filename for output. Defaults to stdout.")
	k := flag.Int("k", 6, "kmer size.")
	start := flag.Int("s", 0, "Start site - mandatory parameter > 0.")
	chunk := flag.Int("chunk", 1000, "Chunk width - < 0 indicates sequence to end.")
	desch := flag.Bool("desch", false, "Use diagonal base arrangement described by Deschavanne et al., otherwise use orthogonal arrangement.")
	help := flag.Bool("help", false, "Print this usage message.")

	flag.Parse()

	kmerindex.MinKmerLen = *k

	if *help {
		flag.Usage()
		os.Exit(1)
	}

	if *start == 0 {
		fmt.Fprintln(os.Stderr, "Must specify s > 0")
		flag.Usage()
		os.Exit(0)
	}

	if *inName == "" {
		in = fasta.NewReader(os.Stdin)
	} else if in, e = fasta.NewReaderName(*inName); e != nil {
		fmt.Fprintf(os.Stderr, "Error: %v.", e)
		os.Exit(0)
	}
	defer in.Close()

	if sequence, err := in.Read(); err != nil {
		os.Exit(0)
	} else {
		if *chunk < 0 {
			*chunk = sequence.Len() - *start - 1
		}
		fmt.Fprintf(os.Stderr, "Indexing %s\n", sequence.ID)
		if index, err := kmerindex.New(*k, sequence); err != nil {
			fmt.Println(err)
			os.Exit(0)
		} else {
			base := color.HSVA{0, 1, 1, 1}
			cgr := kmercolor.NewCGR(index, base)
			fmt.Fprintf(os.Stderr, "Painting %s\n", sequence.ID)
			cgr.Paint(kmercolor.V|kmercolor.H, *desch, *start, *chunk)
			fmt.Fprintf(os.Stderr, "Writing %s\n", sequence.ID)
			if out, e = os.Create(fmt.Sprintf("%s.png", *outName)); e != nil {
				fmt.Fprintf(os.Stderr, "Error: %v.", e)
			}
			png.Encode(out, cgr)
			out.Close()
		}
	}
}