Esempio n. 1
0
func main() {
	if len(os.Args) != 2 {
		panic("must provide sequence folder file.")
	}

	files, _ := ioutil.ReadDir(os.Args[1])
	gsm := make(map[int]int)

	numCores := runtime.NumCPU()
	runtime.GOMAXPROCS(numCores)

	kmer_len := 14
	for index, file := range files {
		genome := readGenome(os.Args[1] + "/" + file.Name())
		var wg sync.WaitGroup
		result := make(chan int, numCores)

		for i := 0; i < numCores; i++ {
			wg.Add(1)
			go func(i int) {
				defer wg.Done()
				start := len(genome) * i / numCores
				end := len(genome) * (i + 1) / numCores
				if start != 0 {
					start = start - kmer_len
				}
				fmt.Println(start, end)
				kmers.Slide(genome, kmer_len, start, end, result)
			}(i)
		}

		go func() {
			wg.Wait()
			close(result)
		}()
		for k := range result {
			if gsm[k] == 0 {
				gsm[k] = index + 1
			} else if gsm[k] == index+1 {

			} else {
				gsm[k] = -1
			}
		}
	}

	var keys []int
	for k := range gsm {
		keys = append(keys, k)
	}

	sort.Ints(keys)
	for _, k := range keys {
		fmt.Println("Key:", kmers.NumToKmer(k, kmer_len), k, "Value:", gsm[k], "end")
	}
}
Esempio n. 2
0
func main() {
	if len(os.Args) != 3 {
		panic("Must provide sequence folder and result file name.")
	}

	kmer_len := 16
	files, _ := ioutil.ReadDir(os.Args[1])

	resultfile, err := os.Create(os.Args[2] + ".csv")
	if err != nil {
		fmt.Printf("%v\n", err)
		os.Exit(1)
	}
	rw := csv.NewWriter(resultfile)
	head := make([]string, len(files)+1)
	head[0] = "kmer"
	for index, fi := range files {
		head[index+1] = fi.Name()
	}

	returnError := rw.Write(head)
	if returnError != nil {
		fmt.Println(returnError)
	}
	rw.Flush()

	numCores := runtime.NumCPU()
	runtime.GOMAXPROCS(numCores)

	globalOccu := make(map[int]int16)
	for _, file := range files {
		localFreq := make(map[int]int32)
		kmerFreq(file.Name(), localFreq, kmer_len)
		for k := range localFreq {
			globalOccu[k]++
		}
	}

	topKnum := 100
	var item *Item
	kmernum := topKnum * len(files)
	topK := make([]int, kmernum)
	for i := range topK {
		topK[i] = -1
	}
	m := 0
	for _, file := range files {
		fmt.Println(file.Name())
		localFreq := make(map[int]int32)
		kmerFreq(file.Name(), localFreq, kmer_len)
		pq := make(PriorityQueue, topKnum)
		k := 0
		for j := range localFreq {
			if !contains(topK, j) {
				if k < topKnum {
					pq[k] = &Item{
						value:    j,
						priority: []int{int(globalOccu[j]), int(localFreq[j])},
						index:    k,
					}
					k++
					if k == topKnum {
						heap.Init(&pq)
					}
				} else {
					item = &Item{
						value:    j,
						priority: []int{int(globalOccu[j]), int(localFreq[j])},
					}
					if compare(item.priority, pq[0].priority) {
						_ = heap.Pop(&pq).(*Item)
						heap.Push(&pq, item)
					}
				}
			}
		}
		for pq.Len() > 0 {
			item := heap.Pop(&pq).(*Item)
			fmt.Println(kmers.NumToKmer(item.value, kmer_len))
			topK[m] = item.value
			m++
		}
	}
	matrix := make([][]int, kmernum)
	for i := 0; i < kmernum; i++ {
		matrix[i] = make([]int, len(files))
	}
	for index, file := range files {
		localFreq := make(map[int]int32)
		kmerFreq(file.Name(), localFreq, kmer_len)
		for i := 0; i < kmernum; i++ {
			matrix[i][index] = int(localFreq[topK[i]])
		}
	}
	for i := 0; i < kmernum; i++ {
		head[0] = strconv.Itoa(topK[i])
		for j := 1; j < len(files)+1; j++ {
			head[j] = strconv.Itoa(matrix[i][j-1])
		}
		returnError := rw.Write(head)
		if returnError != nil {
			fmt.Println(returnError)
		}
		rw.Flush()
	}
	resultfile.Close()
}