示例#1
0
文件: search.go 项目: ndaniels/flib
func search(c *command) {
	c.assertLeastNArg(2)

	// Some search options don't translate directly to command line parameters
	// specified by the flag package.
	if flagSearchDesc {
		flagSearchOpts.Order = bowdb.OrderDesc
	}
	switch flagSearchSort {
	case "cosine":
		flagSearchOpts.SortBy = bowdb.SortByCosine
	case "euclid":
		flagSearchOpts.SortBy = bowdb.SortByEuclid
	default:
		util.Fatalf("Unknown sort field '%s'.", flagSearchSort)
	}

	db := util.OpenBowDB(c.flags.Arg(0))
	bowPaths := c.flags.Args()[1:]

	_, err := db.ReadAll()
	util.Assert(err, "Could not read BOW database entries")

	// always hide the progress bar here.
	bows := util.ProcessBowers(bowPaths, db.Lib, false, flagCpu, true)
	out, outDone := outputter()

	// launch goroutines to search queries in parallel
	wgSearch := new(sync.WaitGroup)
	for i := 0; i < flagCpu; i++ {
		wgSearch.Add(1)
		go func() {
			defer wgSearch.Done()

			for b := range bows {
				sr := db.Search(flagSearchOpts, b)
				out <- searchResult{b, sr}
			}
		}()
	}

	wgSearch.Wait()
	close(out)
	<-outDone
	util.Assert(db.Close())
}
示例#2
0
文件: mk_bowdb.go 项目: ndaniels/flib
func mkBowDb(c *command) {
	c.assertLeastNArg(3)

	dbPath := c.flags.Arg(0)
	flib := util.Library(c.flags.Arg(1))
	bowPaths := c.flags.Args()[2:]

	util.AssertOverwritable(dbPath, flagOverwrite)

	db, err := bowdb.Create(flib, dbPath)
	util.Assert(err)

	bows := util.ProcessBowers(bowPaths, flib, false, flagCpu, util.FlagQuiet)
	for b := range bows {
		db.Add(b)
	}
	util.Assert(db.Close())
}
示例#3
0
文件: vectors.go 项目: ndaniels/flib
func vectors(c *command) {
	c.assertLeastNArg(2)
	flib := util.Library(c.flags.Arg(0))
	bowPaths := c.flags.Args()[1:]

	tostrs := func(freqs []float32) []string {
		strs := make([]string, len(freqs))
		for i := range freqs {
			strs[i] = strconv.FormatFloat(float64(freqs[i]), 'f', -1, 32)
		}
		return strs
	}

	results := util.ProcessBowers(bowPaths, flib, flagPairdistModels,
		flagCpu, true)
	for r := range results {
		fmt.Printf("%s\t%s\n", r.Id, strings.Join(tostrs(r.Bow.Freqs), "\t"))
	}
}
示例#4
0
func mkWeighted(c *command) {
	c.assertLeastNArg(4)

	train := util.Library(c.flags.Arg(0))
	in := util.Library(c.flags.Arg(1))
	outPath := c.flags.Arg(2)
	bowPaths := c.flags.Args()[3:]

	util.AssertOverwritable(outPath, flagOverwrite)

	// The inverse-document-frequencies of each fragment in the "in" fragment
	// library.
	numFrags := in.Size()
	idfs := make([]float32, numFrags)
	for i := range idfs {
		idfs[i] = 1 // pseudocount
	}

	// Compute the BOWs for each bower against the training fragment lib.
	bows := util.ProcessBowers(bowPaths, train, false, flagCpu, util.FlagQuiet)

	// Now tally the number of bowers that each fragment occurred in.
	totalBows := float32(1) // for pseudocount correction
	for bow := range bows {
		totalBows += 1
		for fragi := 0; fragi < numFrags; fragi++ {
			if bow.Bow.Freqs[fragi] > 0 {
				idfs[fragi]++
			}
		}
	}

	// Compute the IDF using the frequencies against all the BOWs.
	for i := range idfs {
		idfs[i] = float32(math.Log(float64(totalBows / idfs[i])))
	}

	// Finally, wrap the given library as a weighted library and save it.
	wlib, err := fragbag.NewWeightedTfIdf(in, idfs)
	util.Assert(err)
	fragbag.Save(util.CreateFile(outPath), wlib)
}
示例#5
0
文件: pairdist.go 项目: ndaniels/flib
func pairdist(c *command) {
	c.assertLeastNArg(2)
	flib := util.Library(c.flags.Arg(0))
	bowPaths := c.flags.Args()[1:]

	bows := make([]bow.Bowed, 0, 1000)
	results := util.ProcessBowers(bowPaths, flib, flagPairdistModels,
		flagCpu, util.FlagQuiet)
	for r := range results {
		bows = append(bows, r)
	}
	for i := 0; i < len(bows); i++ {
		b1 := bows[i]
		for j := i + 1; j < len(bows); j++ {
			b2 := bows[j]
			dist := math.Abs(b1.Bow.Cosine(b2.Bow))
			fmt.Printf("%s\t%s\t%0.4f\n", b1.Id, b2.Id, dist)
		}
	}
}
示例#6
0
func main() {
	//start := time.Now()
	rand.Seed(1)
	//fmt.Println("Loading query")
	flagCpu := runtime.NumCPU()
	fragmentLib := util.Library(json)
	pdbQueries := make([]string, 1)
	pdbQueries[0] = pdbQuery
	bows := util.ProcessBowers(pdbQueries, fragmentLib, false, flagCpu, util.FlagQuiet)
	// for b := range bows {
	//   searchQuery.Add(b)
	// }

	db_centers, _ := bowdb.Open(fragmentLibraryLoc)
	db_centers.ReadAll()
	//fmt.Println(fmt.Sprintf("\t%d",timer()))

	//fmt.Println("Unserializing gob")
	db_slices := dec_gob_ss_db(gobLoc)
	var m map[string]int
	m = make(map[string]int)
	for i, center := range db_centers.Entries {
		m[center.Id] = i
	}
	//fmt.Println(fmt.Sprintf("\t%d",timer()))

	sortBy := bowdb.SortByEuclid
	if metric == cosineDist {
		sortBy = bowdb.SortByCosine
	}

	var coarse_search = bowdb.SearchOptions{
		Limit:  -1,
		Min:    0.0,
		Max:    (float64(clusterRadius) + float64(maxRadius)),
		SortBy: sortBy,
		Order:  bowdb.OrderAsc,
	}

	//var fine_search = bowdb.SearchOptions{
	//Limit:  -1,
	//Min:    0.0,
	//Max:    float64(maxRadius),
	//SortBy: bowdb.SortByEuclid,
	// Order:  bowdb.OrderAsc,
	//}

	//fmt.Println("Computing coarse results")
	for b := range bows {
		var coarse_results []bowdb.SearchResult
		coarse_results = db_centers.Search(coarse_search, b)
		//coarse_results_time := timer()
		//fmt.Println(fmt.Sprintf("\t%d",coarse_results_time))
		//fmt.Println(fmt.Sprintf("\tCount: %d",len(coarse_results)))

		// fmt.Println("Computing fine results")
		var fine_results []bowdb.SearchResult
		for _, center := range coarse_results {
			for _, entry := range db_slices[m[center.Id]] {
				var dist float64
				switch metric {
				case cosineDist:
					dist = b.Bow.Cosine(entry.Bow)
				case euclideanDist:
					dist = b.Bow.Euclid(entry.Bow)
				}
				if dist <= float64(maxRadius) {
					result := newSearchResult(b, entry)
					fmt.Printf(entry.Id)
					fmt.Printf(" ")
					fmt.Printf("%v", dist)
					fmt.Printf(" ")
					fine_results = append(fine_results, result)
				}
			}
		}
	}

}