func main() { if len(os.Args) != 2 { panic("must provide sequence folder file.") } files, _ := ioutil.ReadDir(os.Args[1]) gsm := make(map[int]int) numCores := runtime.NumCPU() runtime.GOMAXPROCS(numCores) kmer_len := 14 for index, file := range files { genome := readGenome(os.Args[1] + "/" + file.Name()) var wg sync.WaitGroup result := make(chan int, numCores) for i := 0; i < numCores; i++ { wg.Add(1) go func(i int) { defer wg.Done() start := len(genome) * i / numCores end := len(genome) * (i + 1) / numCores if start != 0 { start = start - kmer_len } fmt.Println(start, end) kmers.Slide(genome, kmer_len, start, end, result) }(i) } go func() { wg.Wait() close(result) }() for k := range result { if gsm[k] == 0 { gsm[k] = index + 1 } else if gsm[k] == index+1 { } else { gsm[k] = -1 } } } var keys []int for k := range gsm { keys = append(keys, k) } sort.Ints(keys) for _, k := range keys { fmt.Println("Key:", kmers.NumToKmer(k, kmer_len), k, "Value:", gsm[k], "end") } }
func main() { if len(os.Args) != 3 { panic("Must provide sequence folder and result file name.") } kmer_len := 16 files, _ := ioutil.ReadDir(os.Args[1]) resultfile, err := os.Create(os.Args[2] + ".csv") if err != nil { fmt.Printf("%v\n", err) os.Exit(1) } rw := csv.NewWriter(resultfile) head := make([]string, len(files)+1) head[0] = "kmer" for index, fi := range files { head[index+1] = fi.Name() } returnError := rw.Write(head) if returnError != nil { fmt.Println(returnError) } rw.Flush() numCores := runtime.NumCPU() runtime.GOMAXPROCS(numCores) globalOccu := make(map[int]int16) for _, file := range files { localFreq := make(map[int]int32) kmerFreq(file.Name(), localFreq, kmer_len) for k := range localFreq { globalOccu[k]++ } } topKnum := 100 var item *Item kmernum := topKnum * len(files) topK := make([]int, kmernum) for i := range topK { topK[i] = -1 } m := 0 for _, file := range files { fmt.Println(file.Name()) localFreq := make(map[int]int32) kmerFreq(file.Name(), localFreq, kmer_len) pq := make(PriorityQueue, topKnum) k := 0 for j := range localFreq { if !contains(topK, j) { if k < topKnum { pq[k] = &Item{ value: j, priority: []int{int(globalOccu[j]), int(localFreq[j])}, index: k, } k++ if k == topKnum { heap.Init(&pq) } } else { item = &Item{ value: j, priority: []int{int(globalOccu[j]), int(localFreq[j])}, } if compare(item.priority, pq[0].priority) { _ = heap.Pop(&pq).(*Item) heap.Push(&pq, item) } } } } for pq.Len() > 0 { item := heap.Pop(&pq).(*Item) fmt.Println(kmers.NumToKmer(item.value, kmer_len)) topK[m] = item.value m++ } } matrix := make([][]int, kmernum) for i := 0; i < kmernum; i++ { matrix[i] = make([]int, len(files)) } for index, file := range files { localFreq := make(map[int]int32) kmerFreq(file.Name(), localFreq, kmer_len) for i := 0; i < kmernum; i++ { matrix[i][index] = int(localFreq[topK[i]]) } } for i := 0; i < kmernum; i++ { head[0] = strconv.Itoa(topK[i]) for j := 1; j < len(files)+1; j++ { head[j] = strconv.Itoa(matrix[i][j-1]) } returnError := rw.Write(head) if returnError != nil { fmt.Println(returnError) } rw.Flush() } resultfile.Close() }