Beispiel #1
0
func mkWeighted(c *command) {
	c.assertLeastNArg(4)

	train := util.Library(c.flags.Arg(0))
	in := util.Library(c.flags.Arg(1))
	outPath := c.flags.Arg(2)
	bowPaths := c.flags.Args()[3:]

	util.AssertOverwritable(outPath, flagOverwrite)

	// The inverse-document-frequencies of each fragment in the "in" fragment
	// library.
	numFrags := in.Size()
	idfs := make([]float32, numFrags)
	for i := range idfs {
		idfs[i] = 1 // pseudocount
	}

	// Compute the BOWs for each bower against the training fragment lib.
	bows := util.ProcessBowers(bowPaths, train, false, flagCpu, util.FlagQuiet)

	// Now tally the number of bowers that each fragment occurred in.
	totalBows := float32(1) // for pseudocount correction
	for bow := range bows {
		totalBows += 1
		for fragi := 0; fragi < numFrags; fragi++ {
			if bow.Bow.Freqs[fragi] > 0 {
				idfs[fragi]++
			}
		}
	}

	// Compute the IDF using the frequencies against all the BOWs.
	for i := range idfs {
		idfs[i] = float32(math.Log(float64(totalBows / idfs[i])))
	}

	// Finally, wrap the given library as a weighted library and save it.
	wlib, err := fragbag.NewWeightedTfIdf(in, idfs)
	util.Assert(err)
	fragbag.Save(util.CreateFile(outPath), wlib)
}
Beispiel #2
0
func viewLib(c *command) {
	c.assertNArg(1)

	lib := util.Library(c.flags.Arg(0))

	fmt.Printf("Name: %s\n", lib.Name())
	fmt.Printf("Tag: %s\n", strings.Join(libraryTag(lib), "/"))
	fmt.Printf("Size: %d\n", lib.Size())
	fmt.Printf("Fragment Size: %d\n", lib.FragmentSize())
	fmt.Printf("IsStructure: %v\n", fragbag.IsStructure(lib))
	fmt.Printf("IsSequence: %v\n", fragbag.IsSequence(lib))
}
Beispiel #3
0
func mkPaired(c *command) {
	c.assertNArg(2)

	in := util.Library(c.flags.Arg(0))
	outPath := c.flags.Arg(1)
	util.AssertOverwritable(outPath, flagOverwrite)

	if _, ok := in.(fragbag.WeightedLibrary); ok {
		util.Fatalf("%s is a weighted library (not allowed)", in.Name())
	}

	name := fmt.Sprintf("paired-%s", in.Name())
	if fragbag.IsStructure(in) {
		var pairs [][]structure.Coords
		lib := in.(fragbag.StructureLibrary)
		nfrags := lib.Size()
		for i := 0; i < nfrags; i++ {
			for j := 0; j < nfrags; j++ {
				if i == j {
					continue
				}
				f1, f2 := lib.Atoms(i), lib.Atoms(j)
				pairs = append(pairs, append(f1, f2...))
			}
		}
		pairLib, err := fragbag.NewStructureAtoms(name, pairs)
		util.Assert(err)
		fragbag.Save(util.CreateFile(outPath), pairLib)
	} else if strings.Contains(in.Tag(), "hmm") {
		var pairs []*seq.HMM
		lib := in.(fragbag.SequenceLibrary)
		nfrags := lib.Size()
		for i := 0; i < nfrags; i++ {
			for j := 0; j < nfrags; j++ {
				if i == j {
					continue
				}
				f1, f2 := lib.Fragment(i).(*seq.HMM), lib.Fragment(j).(*seq.HMM)
				pairs = append(pairs, seq.HMMCat(f1, f2))
			}
		}
		pairLib, err := fragbag.NewSequenceHMM(name, pairs)
		util.Assert(err)
		fragbag.Save(util.CreateFile(outPath), pairLib)
	} else if strings.Contains(in.Tag(), "profile") {
		util.Fatalf("Sequence profiles not implemented.")
	} else {
		util.Fatalf("Unrecognized fragment library: %s", in.Tag())
	}
}
Beispiel #4
0
func mkBowDb(c *command) {
	c.assertLeastNArg(3)

	dbPath := c.flags.Arg(0)
	flib := util.Library(c.flags.Arg(1))
	bowPaths := c.flags.Args()[2:]

	util.AssertOverwritable(dbPath, flagOverwrite)

	db, err := bowdb.Create(flib, dbPath)
	util.Assert(err)

	bows := util.ProcessBowers(bowPaths, flib, false, flagCpu, util.FlagQuiet)
	for b := range bows {
		db.Add(b)
	}
	util.Assert(db.Close())
}
Beispiel #5
0
func vectors(c *command) {
	c.assertLeastNArg(2)
	flib := util.Library(c.flags.Arg(0))
	bowPaths := c.flags.Args()[1:]

	tostrs := func(freqs []float32) []string {
		strs := make([]string, len(freqs))
		for i := range freqs {
			strs[i] = strconv.FormatFloat(float64(freqs[i]), 'f', -1, 32)
		}
		return strs
	}

	results := util.ProcessBowers(bowPaths, flib, flagPairdistModels,
		flagCpu, true)
	for r := range results {
		fmt.Printf("%s\t%s\n", r.Id, strings.Join(tostrs(r.Bow.Freqs), "\t"))
	}
}
Beispiel #6
0
func pairdist(c *command) {
	c.assertLeastNArg(2)
	flib := util.Library(c.flags.Arg(0))
	bowPaths := c.flags.Args()[1:]

	bows := make([]bow.Bowed, 0, 1000)
	results := util.ProcessBowers(bowPaths, flib, flagPairdistModels,
		flagCpu, util.FlagQuiet)
	for r := range results {
		bows = append(bows, r)
	}
	for i := 0; i < len(bows); i++ {
		b1 := bows[i]
		for j := i + 1; j < len(bows); j++ {
			b2 := bows[j]
			dist := math.Abs(b1.Bow.Cosine(b2.Bow))
			fmt.Printf("%s\t%s\t%0.4f\n", b1.Id, b2.Id, dist)
		}
	}
}
Beispiel #7
0
func main() {
	//start := time.Now()
	rand.Seed(1)
	//fmt.Println("Loading query")
	flagCpu := runtime.NumCPU()
	fragmentLib := util.Library(json)
	pdbQueries := make([]string, 1)
	pdbQueries[0] = pdbQuery
	bows := util.ProcessBowers(pdbQueries, fragmentLib, false, flagCpu, util.FlagQuiet)
	// for b := range bows {
	//   searchQuery.Add(b)
	// }

	db_centers, _ := bowdb.Open(fragmentLibraryLoc)
	db_centers.ReadAll()
	//fmt.Println(fmt.Sprintf("\t%d",timer()))

	//fmt.Println("Unserializing gob")
	db_slices := dec_gob_ss_db(gobLoc)
	var m map[string]int
	m = make(map[string]int)
	for i, center := range db_centers.Entries {
		m[center.Id] = i
	}
	//fmt.Println(fmt.Sprintf("\t%d",timer()))

	sortBy := bowdb.SortByEuclid
	if metric == cosineDist {
		sortBy = bowdb.SortByCosine
	}

	var coarse_search = bowdb.SearchOptions{
		Limit:  -1,
		Min:    0.0,
		Max:    (float64(clusterRadius) + float64(maxRadius)),
		SortBy: sortBy,
		Order:  bowdb.OrderAsc,
	}

	//var fine_search = bowdb.SearchOptions{
	//Limit:  -1,
	//Min:    0.0,
	//Max:    float64(maxRadius),
	//SortBy: bowdb.SortByEuclid,
	// Order:  bowdb.OrderAsc,
	//}

	//fmt.Println("Computing coarse results")
	for b := range bows {
		var coarse_results []bowdb.SearchResult
		coarse_results = db_centers.Search(coarse_search, b)
		//coarse_results_time := timer()
		//fmt.Println(fmt.Sprintf("\t%d",coarse_results_time))
		//fmt.Println(fmt.Sprintf("\tCount: %d",len(coarse_results)))

		// fmt.Println("Computing fine results")
		var fine_results []bowdb.SearchResult
		for _, center := range coarse_results {
			for _, entry := range db_slices[m[center.Id]] {
				var dist float64
				switch metric {
				case cosineDist:
					dist = b.Bow.Cosine(entry.Bow)
				case euclideanDist:
					dist = b.Bow.Euclid(entry.Bow)
				}
				if dist <= float64(maxRadius) {
					result := newSearchResult(b, entry)
					fmt.Printf(entry.Id)
					fmt.Printf(" ")
					fmt.Printf("%v", dist)
					fmt.Printf(" ")
					fine_results = append(fine_results, result)
				}
			}
		}
	}

}