Esempio n. 1
0
func (bhcs *httpBatchStore) sendReadRequests(req chunks.ReadRequest, queue <-chan chunks.ReadRequest, getter batchGetter) {
	batch := chunks.ReadBatch{}
	hashes := hash.HashSet{}

	count := 0
	addReq := func(req chunks.ReadRequest) {
		hash := req.Hash()
		batch[hash] = append(batch[hash], req.Outstanding())
		hashes.Insert(hash)
		count++
	}

	addReq(req)
	for drained := false; !drained && len(hashes) < readBufferSize; {
		select {
		case req := <-queue:
			addReq(req)
		default:
			drained = true
		}
	}

	bhcs.rateLimit <- struct{}{}
	go func() {
		defer func() {
			bhcs.requestWg.Add(-count)
			batch.Close()
		}()

		getter(hashes, batch)
		<-bhcs.rateLimit
	}()
}
Esempio n. 2
0
func (h *RefByHeight) Unique() {
	seen := hash.HashSet{}
	result := make(RefByHeight, 0, cap(*h))
	for _, r := range *h {
		target := r.TargetHash()
		if !seen.Has(target) {
			result = append(result, r)
		}
		seen.Insert(target)
	}
	*h = result
}
Esempio n. 3
0
// ExtractChunks can be called from any goroutine to write Chunks referenced by the given hashes to w. The chunks are ordered by ref-height. Chunks of the same height are written in an unspecified order, relative to one another.
func (p *orderedChunkCache) ExtractChunks(hashes hash.HashSet, chunkChan chan *chunks.Chunk) error {
	iter := p.orderedChunks.NewIterator(nil, nil)
	defer iter.Release()
	for iter.Next() {
		_, hash := fromDbKey(iter.Key())
		if !hashes.Has(hash) {
			continue
		}
		compressed := iter.Value()
		data, err := snappy.Decode(nil, compressed)
		d.Chk.NoError(err)
		c := chunks.NewChunkWithHash(hash, data)
		chunkChan <- &c
	}
	return nil
}
Esempio n. 4
0
func (bhcs *httpBatchStore) batchPutRequests() {
	bhcs.workerWg.Add(1)
	go func() {
		defer bhcs.workerWg.Done()

		hints := types.Hints{}
		hashes := hash.HashSet{}
		handleRequest := func(wr writeRequest) {
			if !wr.justHints {
				if hashes.Has(wr.hash) {
					bhcs.requestWg.Done() // Already have a put enqueued for wr.hash.
				} else {
					hashes.Insert(wr.hash)
				}
			}
			for hint := range wr.hints {
				hints[hint] = struct{}{}
			}
		}
		for done := false; !done; {
			drainAndSend := false
			select {
			case wr := <-bhcs.writeQueue:
				handleRequest(wr)
			case <-bhcs.flushChan:
				drainAndSend = true
			case <-bhcs.finishedChan:
				drainAndSend = true
				done = true
			}

			if drainAndSend {
				for drained := false; !drained; {
					select {
					case wr := <-bhcs.writeQueue:
						handleRequest(wr)
					default:
						drained = true
						bhcs.sendWriteRequests(hashes, hints) // Takes ownership of hashes, hints
						hints = types.Hints{}
						hashes = hash.HashSet{}
					}
				}
			}
		}
	}()
}
Esempio n. 5
0
func (suite *LevelDBPutCacheSuite) TestExtractChunksOrder() {
	maxHeight := len(suite.chnx)
	orderedHashes := make(hash.HashSlice, maxHeight)
	toExtract := hash.HashSet{}
	heights := rand.Perm(maxHeight)
	for hash, c := range suite.chnx {
		toExtract.Insert(hash)
		orderedHashes[heights[0]] = hash
		suite.cache.Insert(c, uint64(heights[0]))
		heights = heights[1:]
	}

	chunkChan := suite.extractChunks(toExtract)
	for c := range chunkChan {
		suite.Equal(orderedHashes[0], c.Hash())
		orderedHashes = orderedHashes[1:]
	}
	suite.Len(orderedHashes, 0)
}
Esempio n. 6
0
func (suite *LevelDBPutCacheSuite) TestReaderSubset() {
	toExtract := hash.HashSet{}
	for hash, c := range suite.chnx {
		if len(toExtract) < 2 {
			toExtract.Insert(hash)
		}
		suite.cache.Insert(c, 1)
	}

	// Only iterate over the first 2 elements in the DB
	chunkChan := suite.extractChunks(toExtract)
	count := 0
	for c := range chunkChan {
		if suite.Contains(toExtract, c.Hash()) {
			count++
		}
	}
	suite.Equal(len(toExtract), count)
}
Esempio n. 7
0
// Pull objects that descends from sourceRef from srcDB to sinkDB. sinkHeadRef should point to a Commit (in sinkDB) that's an ancestor of sourceRef. This allows the algorithm to figure out which portions of data are already present in sinkDB and skip copying them.
func Pull(srcDB, sinkDB Database, sourceRef, sinkHeadRef types.Ref, concurrency int, progressCh chan PullProgress) {
	srcQ, sinkQ := &types.RefByHeight{sourceRef}, &types.RefByHeight{sinkHeadRef}

	// We generally expect that sourceRef descends from sinkHeadRef, so that walking down from sinkHeadRef yields useful hints. If it's not even in the srcDB, then just clear out sinkQ right now and don't bother.
	if !srcDB.has(sinkHeadRef.TargetHash()) {
		sinkQ.PopBack()
	}

	// Since we expect sourceRef to descend from sinkHeadRef, we assume srcDB has a superset of the data in sinkDB. There are some cases where, logically, the code wants to read data it knows to be in sinkDB. In this case, it doesn't actually matter which Database the data comes from, so as an optimization we use whichever is a LocalDatabase -- if either is.
	mostLocalDB := srcDB
	if _, ok := sinkDB.(*LocalDatabase); ok {
		mostLocalDB = sinkDB
	}
	// traverseWorker below takes refs off of {src,sink,com}Chan, processes them to figure out what reachable refs should be traversed, and then sends the results to {srcRes,sinkRes,comRes}Chan.
	// sending to (or closing) the 'done' channel causes traverseWorkers to exit.
	srcChan := make(chan types.Ref)
	sinkChan := make(chan types.Ref)
	comChan := make(chan types.Ref)
	srcResChan := make(chan traverseResult)
	sinkResChan := make(chan traverseResult)
	comResChan := make(chan traverseResult)
	done := make(chan struct{})

	workerWg := &sync.WaitGroup{}
	defer func() {
		close(done)
		workerWg.Wait()

		close(srcChan)
		close(sinkChan)
		close(comChan)
		close(srcResChan)
		close(sinkResChan)
		close(comResChan)
	}()
	traverseWorker := func() {
		workerWg.Add(1)
		go func() {
			for {
				select {
				case srcRef := <-srcChan:
					srcResChan <- traverseSource(srcRef, srcDB, sinkDB)
				case sinkRef := <-sinkChan:
					sinkResChan <- traverseSink(sinkRef, mostLocalDB)
				case comRef := <-comChan:
					comResChan <- traverseCommon(comRef, sinkHeadRef, mostLocalDB)
				case <-done:
					workerWg.Done()
					return
				}
			}
		}()
	}
	for i := 0; i < concurrency; i++ {
		traverseWorker()
	}

	var doneCount, knownCount, doneBytes uint64
	updateProgress := func(moreDone, moreKnown, moreBytes uint64) {
		if progressCh == nil {
			return
		}
		doneCount, knownCount, doneBytes = doneCount+moreDone, knownCount+moreKnown, doneBytes+moreBytes
		progressCh <- PullProgress{doneCount, knownCount + uint64(srcQ.Len()), doneBytes}
	}

	// hc and reachableChunks aren't goroutine-safe, so only write them here.
	hc := hintCache{}
	reachableChunks := hash.HashSet{}
	for !srcQ.Empty() {
		srcRefs, sinkRefs, comRefs := planWork(srcQ, sinkQ)
		srcWork, sinkWork, comWork := len(srcRefs), len(sinkRefs), len(comRefs)
		if srcWork+comWork > 0 {
			updateProgress(0, uint64(srcWork+comWork), 0)
		}

		// These goroutines send work to traverseWorkers, blocking when all are busy. They self-terminate when they've sent all they have.
		go sendWork(srcChan, srcRefs)
		go sendWork(sinkChan, sinkRefs)
		go sendWork(comChan, comRefs)
		//  Don't use srcRefs, sinkRefs, or comRefs after this point. The goroutines above own them.

		for srcWork+sinkWork+comWork > 0 {
			select {
			case res := <-srcResChan:
				for _, reachable := range res.reachables {
					srcQ.PushBack(reachable)
					reachableChunks.Insert(reachable.TargetHash())
				}
				if !res.readHash.IsEmpty() {
					reachableChunks.Remove(res.readHash)
				}
				srcWork--
				updateProgress(1, 0, uint64(res.readBytes))
			case res := <-sinkResChan:
				for _, reachable := range res.reachables {
					sinkQ.PushBack(reachable)
					hc[reachable.TargetHash()] = res.readHash
				}
				sinkWork--
			case res := <-comResChan:
				isHeadOfSink := res.readHash == sinkHeadRef.TargetHash()
				for _, reachable := range res.reachables {
					sinkQ.PushBack(reachable)
					if !isHeadOfSink {
						srcQ.PushBack(reachable)
					}
					hc[reachable.TargetHash()] = res.readHash
				}
				comWork--
				updateProgress(1, 0, uint64(res.readBytes))
			}
		}
		sort.Sort(sinkQ)
		sort.Sort(srcQ)
		sinkQ.Unique()
		srcQ.Unique()
	}

	hints := types.Hints{}
	for hash := range reachableChunks {
		if hint, present := hc[hash]; present {
			hints[hint] = struct{}{}
		}
	}
	sinkDB.validatingBatchStore().AddHints(hints)
}