/
main.go
71 lines (55 loc) · 1.27 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
package main
import (
"code.google.com/p/biogo/alphabet"
"code.google.com/p/biogo/io/seqio/fasta"
"code.google.com/p/biogo/seq/linear"
"flag"
"fmt"
"math/rand"
"os"
//"strconv"
//"strings"
"time"
//"unsafe"
)
var numkeep *int = flag.Int("k", 100, "Number of sequences in the sample")
var total *int = flag.Int("t", 100, "Number of total sequences in the fasta file")
var in *string = flag.String("in", "in.fas", "Input file")
var out *string = flag.String("out", "out.fas", "Output file")
func main() {
flag.Parse()
// Now really print out the connected components
seqtype := alphabet.DNA
f1, err := os.Open(*in)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v.\n", err)
os.Exit(1)
}
defer f1.Close()
f2, err := os.Create(*out)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v.\n", err)
os.Exit(1)
}
defer f2.Close()
fastaSeqs := fasta.NewReader(f1, linear.NewSeq("", nil, seqtype))
fastaOut := fasta.NewWriter(f2, 80)
rand.Seed(time.Now().UnixNano())
list := rand.Perm(*total)
keep := make(map[int]bool)
for i := 0; i < *numkeep; i++ {
keep[list[i]] = true
}
counter := 0
for true {
seq, e := fastaSeqs.Read()
if e != nil {
break
}
if keep[counter] {
fastaOut.Write(seq)
}
counter++
}
fmt.Fprintf(os.Stderr, "Done.\n")
}