Esempio n. 1
0
func processImporter(importer censysdata.Importer, db *sqldb.EntriesDatabase, wg *sync.WaitGroup) error {
	entryChan := make(chan censysdata.CensysEntry, 100)
	defer close(entryChan)
	statusChan := make(chan OperationStatus, 1)
	defer close(statusChan)
	wg.Add(1)
	defer wg.Done()

	displayProgress(statusChan, wg)
	numWorkers := *config.NumThreads * runtime.NumCPU()
	for i := 0; i < numWorkers; i++ {
		go insertCensysWorker(entryChan, db, wg)
	}

	startOffset := *config.OffsetByte

	//
	// Fast forward
	//
	if *config.Offset > 0 && *config.OffsetByte > 0 {
		return fmt.Errorf("You may not set both offset and offsetByte")
	}

	if *config.Offset > 0 {
		err := importer.SeekLine(*config.Offset)
		if err != nil {
			return err
		}
	}

	if *config.OffsetByte > 0 {
		err := importer.SeekByte(*config.OffsetByte)
		if err != nil {
			return err
		}
	}

	var maxOffset uint64

	log.Printf("Starting import from offset=%d, line_limit=%d", importer.ByteOffset(), *config.Limit)

	// We've already fast-forwarded, so start at 0.
	for count := uint64(0); ; count++ {
		if *config.Limit > uint64(0) && count >= *config.Limit {
			return nil
		}
		ent, err := importer.NextEntry()
		if err != nil || ent == nil {
			return err
		}
		if count%128 == 0 {
			// Lazily obtain the max offset
			if maxOffset < 1 {
				maxOffset, err = importer.Size()
				if err != nil {
					return err
				}
			}

			statusChan <- OperationStatus{int64(startOffset), int64(ent.Offset), int64(maxOffset)}
		}
		entryChan <- *ent
	}

	return nil
}