func loadConfig(input string, useStore bool, storeSize int, small bool, storeSigs int, useVPTree bool, myNumber uint, totalMachines uint) error { var store simstore.Storage if useStore { switch storeSize { case 3: if small { store = simstore.New3Small(storeSigs) } else { store = simstore.New3(storeSigs) } case 6: store = simstore.New6(storeSigs) default: return fmt.Errorf("unknown storage size: %d", storeSize) } log.Println("using simstore size", storeSize) } var vpt *vptree.VPTree f, err := os.Open(input) if err != nil { return fmt.Errorf("unable to load %q: %v", input, err) } defer f.Close() scanner := bufio.NewScanner(f) var items []vptree.Item var lines int for scanner.Scan() { fields := strings.Fields(scanner.Text()) id, err := strconv.Atoi(fields[0]) if err != nil { log.Printf("%d: error parsing id: %v", lines, err) continue } sig, err := strconv.ParseUint(fields[1], 16, 64) if err != nil { log.Printf("%d: error parsing signature: %v", lines, err) continue } if uint(sig)%totalMachines == myNumber { if useVPTree { items = append(items, vptree.Item{sig, uint64(id)}) } if useStore { store.Add(sig, uint64(id)) } } lines++ if lines%(1<<20) == 0 { log.Println("processed", lines) } } if err := scanner.Err(); err != nil { log.Println("error during scan: ", err) } log.Println("loaded", lines) Metrics.Signatures.Set(int64(lines)) if useStore { store.Finish() log.Println("simstore done") } if useVPTree { vpt = vptree.New(items) log.Println("vptree done") } UpdateConfig(&Config{store: store, vptree: vpt}) return nil }
func loadConfig(input string, useStore bool, storeSize int, small bool, useVPTree bool, myNumber int, totalMachines int) error { var store simstore.Storage totalLines, err := lineCounter(input) if err != nil { return fmt.Errorf("unable to load %q: %v", input, err) } var sigsEstimate = totalLines log.Printf("totalLines=%+v\n", totalLines) if totalMachines != 1 { // estimate how many signatures will land on this machine, plus a fudge sigsEstimate = totalLines/totalMachines + int(float64(totalLines)*0.05) } log.Printf("preallocating for %d estimated signatures\n", sigsEstimate) if useStore { switch storeSize { case 3: if small { store = simstore.New3Small(sigsEstimate) } else { store = simstore.New3(sigsEstimate) } case 6: store = simstore.New6(sigsEstimate) default: return fmt.Errorf("unknown storage size: %d", storeSize) } log.Println("using simstore size", storeSize) } var vpt *vptree.VPTree f, err := os.Open(input) if err != nil { return fmt.Errorf("unable to load %q: %v", input, err) } defer f.Close() scanner := bufio.NewScanner(f) var items []vptree.Item var lines int var signatures int for scanner.Scan() { fields := strings.Fields(scanner.Text()) id, err := strconv.Atoi(fields[0]) if err != nil { log.Printf("%d: error parsing id: %v", lines, err) continue } sig, err := strconv.ParseUint(fields[1], 16, 64) if err != nil { log.Printf("%d: error parsing signature: %v", lines, err) continue } if sig%uint64(totalMachines) == uint64(myNumber) { if useVPTree { items = append(items, vptree.Item{sig, uint64(id)}) } if useStore { store.Add(sig, uint64(id)) } signatures++ } lines++ if lines%(1<<20) == 0 { log.Printf("processed %d of %d", lines, totalLines) } } if err := scanner.Err(); err != nil { log.Println("error during scan: ", err) } log.Printf("loaded %d lines, %d signatues (%f%% of estimated)", lines, signatures, 100*float64(signatures)/float64(sigsEstimate)) Metrics.Signatures.Set(int64(signatures)) if useStore { store.Finish() log.Println("simstore done") } if useVPTree { vpt = vptree.New(items) log.Println("vptree done") } UpdateConfig(&Config{store: store, vptree: vpt}) return nil }