Example #1
0
func loadConfig(input string, useStore bool, storeSize int, small bool, storeSigs int, useVPTree bool, myNumber uint, totalMachines uint) error {
	var store simstore.Storage
	if useStore {
		switch storeSize {
		case 3:
			if small {
				store = simstore.New3Small(storeSigs)
			} else {
				store = simstore.New3(storeSigs)
			}
		case 6:
			store = simstore.New6(storeSigs)
		default:
			return fmt.Errorf("unknown storage size: %d", storeSize)
		}

		log.Println("using simstore size", storeSize)
	}

	var vpt *vptree.VPTree

	f, err := os.Open(input)
	if err != nil {
		return fmt.Errorf("unable to load %q: %v", input, err)
	}
	defer f.Close()

	scanner := bufio.NewScanner(f)
	var items []vptree.Item
	var lines int
	for scanner.Scan() {

		fields := strings.Fields(scanner.Text())

		id, err := strconv.Atoi(fields[0])
		if err != nil {
			log.Printf("%d: error parsing id: %v", lines, err)
			continue
		}

		sig, err := strconv.ParseUint(fields[1], 16, 64)
		if err != nil {
			log.Printf("%d: error parsing signature: %v", lines, err)
			continue
		}

		if uint(sig)%totalMachines == myNumber {
			if useVPTree {
				items = append(items, vptree.Item{sig, uint64(id)})
			}
			if useStore {
				store.Add(sig, uint64(id))
			}
		}
		lines++

		if lines%(1<<20) == 0 {
			log.Println("processed", lines)
		}
	}

	if err := scanner.Err(); err != nil {
		log.Println("error during scan: ", err)
	}

	log.Println("loaded", lines)
	Metrics.Signatures.Set(int64(lines))
	if useStore {
		store.Finish()
		log.Println("simstore done")
	}

	if useVPTree {
		vpt = vptree.New(items)
		log.Println("vptree done")
	}

	UpdateConfig(&Config{store: store, vptree: vpt})
	return nil
}
Example #2
0
func loadConfig(input string, useStore bool, storeSize int, small bool, useVPTree bool, myNumber int, totalMachines int) error {
	var store simstore.Storage

	totalLines, err := lineCounter(input)
	if err != nil {
		return fmt.Errorf("unable to load %q: %v", input, err)
	}

	var sigsEstimate = totalLines

	log.Printf("totalLines=%+v\n", totalLines)

	if totalMachines != 1 {
		// estimate how many signatures will land on this machine, plus a fudge
		sigsEstimate = totalLines/totalMachines + int(float64(totalLines)*0.05)
	}

	log.Printf("preallocating for %d estimated signatures\n", sigsEstimate)

	if useStore {
		switch storeSize {
		case 3:
			if small {
				store = simstore.New3Small(sigsEstimate)
			} else {
				store = simstore.New3(sigsEstimate)
			}
		case 6:
			store = simstore.New6(sigsEstimate)
		default:
			return fmt.Errorf("unknown storage size: %d", storeSize)
		}

		log.Println("using simstore size", storeSize)
	}

	var vpt *vptree.VPTree

	f, err := os.Open(input)
	if err != nil {
		return fmt.Errorf("unable to load %q: %v", input, err)
	}
	defer f.Close()

	scanner := bufio.NewScanner(f)
	var items []vptree.Item
	var lines int
	var signatures int
	for scanner.Scan() {

		fields := strings.Fields(scanner.Text())

		id, err := strconv.Atoi(fields[0])
		if err != nil {
			log.Printf("%d: error parsing id: %v", lines, err)
			continue
		}

		sig, err := strconv.ParseUint(fields[1], 16, 64)
		if err != nil {
			log.Printf("%d: error parsing signature: %v", lines, err)
			continue
		}

		if sig%uint64(totalMachines) == uint64(myNumber) {
			if useVPTree {
				items = append(items, vptree.Item{sig, uint64(id)})
			}
			if useStore {
				store.Add(sig, uint64(id))
			}
			signatures++
		}
		lines++

		if lines%(1<<20) == 0 {
			log.Printf("processed %d of %d", lines, totalLines)
		}
	}

	if err := scanner.Err(); err != nil {
		log.Println("error during scan: ", err)
	}

	log.Printf("loaded %d lines, %d signatues (%f%% of estimated)", lines, signatures, 100*float64(signatures)/float64(sigsEstimate))
	Metrics.Signatures.Set(int64(signatures))
	if useStore {
		store.Finish()
		log.Println("simstore done")
	}

	if useVPTree {
		vpt = vptree.New(items)
		log.Println("vptree done")
	}

	UpdateConfig(&Config{store: store, vptree: vpt})
	return nil
}