forked from TuftsBCB/flib
/
vectors.go
53 lines (44 loc) · 1.42 KB
/
vectors.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
package main
import (
"flag"
"fmt"
"strconv"
"strings"
"github.com/ndaniels/tools/util"
)
var cmdVectors = &command{
name: "vectors",
positionalUsage: "frag-lib bower-file [ bower-file ... ]",
shortHelp: "compute bag-of-words",
help: `
The vectors command returns the Fragbag bag-of-words (vector) for each
bower file given. The format returned is a tab-delimited CSV file where
the first column is the name of the entry and each subsequent column is
the corresponding frequency for each corresponding fragment in the library
given.
Every row is guaranteed to be the same length. Namely, each row will have
N+1 entries, where N is the number of fragments given in the library.
Note that if a weighted fragment library is given, then the frequencies
will be reported as floating point values.
Bower files may either be PDB files or FASTA files.
`,
flags: flag.NewFlagSet("vectors", flag.ExitOnError),
run: vectors,
}
func vectors(c *command) {
c.assertLeastNArg(2)
flib := util.Library(c.flags.Arg(0))
bowPaths := c.flags.Args()[1:]
tostrs := func(freqs []float32) []string {
strs := make([]string, len(freqs))
for i := range freqs {
strs[i] = strconv.FormatFloat(float64(freqs[i]), 'f', -1, 32)
}
return strs
}
results := util.ProcessBowers(bowPaths, flib, flagPairdistModels,
flagCpu, true)
for r := range results {
fmt.Printf("%s\t%s\n", r.Id, strings.Join(tostrs(r.Bow.Freqs), "\t"))
}
}