Пример #1
0
// CopyContainer copies from one container to the other. Combined with fspool
// and blockpool, it can be used to split a container into blocks or join it back
// into regular files.
func CopyContainer(container *tlc.Container, outPool wsync.WritablePool, inPool wsync.Pool, consumer *state.Consumer) error {
	copyFile := func(byteOffset int64, fileIndex int64) error {
		r, err := inPool.GetReader(fileIndex)
		if err != nil {
			return err
		}

		w, err := outPool.GetWriter(fileIndex)
		if err != nil {
			return err
		}

		cw := counter.NewWriterCallback(func(count int64) {
			alpha := float64(byteOffset+count) / float64(container.Size)
			consumer.Progress(alpha)
		}, w)

		_, err = io.Copy(cw, r)
		if err != nil {
			return err
		}

		err = w.Close()
		if err != nil {
			return err
		}

		return nil
	}

	byteOffset := int64(0)

	for fileIndex, f := range container.Files {
		consumer.ProgressLabel(f.Path)

		err := copyFile(byteOffset, int64(fileIndex))
		if err != nil {
			return err
		}

		byteOffset += f.Size
	}

	return nil
}
Пример #2
0
// ComputeSignatureToWriter is a variant of ComputeSignature that writes hashes
// to a callback
func ComputeSignatureToWriter(container *tlc.Container, pool wsync.Pool, consumer *state.Consumer, sigWriter wsync.SignatureWriter) error {
	var err error

	defer func() {
		if pErr := pool.Close(); pErr != nil && err == nil {
			err = errors.Wrap(pErr, 1)
		}
	}()

	sctx := mksync()

	totalBytes := container.Size
	fileOffset := int64(0)

	onRead := func(count int64) {
		consumer.Progress(float64(fileOffset+count) / float64(totalBytes))
	}

	for fileIndex, f := range container.Files {
		consumer.ProgressLabel(f.Path)
		fileOffset = f.Offset

		var reader io.Reader
		reader, err = pool.GetReader(int64(fileIndex))
		if err != nil {
			return errors.Wrap(err, 1)
		}

		cr := counter.NewReaderCallback(onRead, reader)
		err = sctx.CreateSignature(int64(fileIndex), cr, sigWriter)
		if err != nil {
			return errors.Wrap(err, 1)
		}
	}

	if err != nil {
		return errors.Wrap(err, 1)
	}
	return nil
}
Пример #3
0
// Faster Suffix Sorting, see: http://www.larsson.dogma.net/ssrev-tr.pdf
// Output `I` is a sorted suffix array.
// TODO: implement parallel sorting as a faster alternative for high-RAM environments
// see http://www.zbh.uni-hamburg.de/pubs/pdf/FutAluKur2001.pdf
func qsufsort(obuf []byte, ctx *DiffContext, consumer *state.Consumer) []int32 {
	parallel := ctx.SuffixSortConcurrency != 0
	numWorkers := ctx.SuffixSortConcurrency
	if numWorkers < 1 {
		numWorkers += runtime.NumCPU()
	}

	var buckets [256]int32
	var i, h int32
	var obuflen = int32(len(obuf))

	I := make([]int32, obuflen+1)
	V := make([]int32, obuflen+1)

	for _, c := range obuf {
		buckets[c]++
	}
	for i = 1; i < 256; i++ {
		buckets[i] += buckets[i-1]
	}
	copy(buckets[1:], buckets[:])
	buckets[0] = 0

	for i, c := range obuf {
		buckets[c]++
		I[buckets[c]] = int32(i)
	}

	I[0] = obuflen
	for i, c := range obuf {
		V[i] = buckets[c]
	}

	V[obuflen] = 0
	for i = 1; i < 256; i++ {
		if buckets[i] == buckets[i-1]+1 {
			I[buckets[i]] = -1
		}
	}
	I[0] = -1

	const progressInterval = 64 * 1024

	var V2 []int32
	var marks []mark

	if parallel {
		consumer.Debugf("parallel suffix sorting (%d workers)", numWorkers)
		V2 = append([]int32{}, V...)
		marks = make([]mark, 0)
	} else {
		consumer.Debugf("single-core suffix sorting")
	}

	// we buffer the tasks channel so that we can queue workloads (and
	// combine sorted groups) faster than workers can handle them: this helps throughput.
	// picking a value too small would lower core utilization.
	// picking a value too large would add overhead, negating the benefits.
	taskBufferSize := numWorkers * 4

	done := make(chan bool)
	var copyStart time.Time
	var copyDuration time.Duration

	for h = 1; I[0] != -(obuflen + 1); h += h {
		// in practice, h < 32, so this is a calculated waste of memory
		tasks := make(chan sortTask, taskBufferSize)

		if parallel {
			// in parallel mode, fan-out sorting tasks to a few workers
			for i := 0; i < numWorkers; i++ {
				go func() {
					for task := range tasks {
						// see split's definition for why V and V2 are necessary
						split(I, V, V2, task.start, task.length, task.h)
					}
					done <- true
				}()
			}

			// keep track of combined groups we found while scanning I
			marks = marks[:0]
		}

		consumer.ProgressLabel(fmt.Sprintf("Suffix sorting (%d-order)", h))

		// used to combine adjacent sorted groups into a single, bigger sorted group
		// eventually we'll be left with a single sorted group of size len(obuf)+1
		var n int32

		// total number of suffixes sorted at the end of this pass
		var nTotal int32

		// last index at which we emitted progress info
		var lastI int32

		for i = 0; i < obuflen+1; {
			if i-lastI > progressInterval {
				// calling Progress on every iteration woudl slow down diff significantly
				progress := float64(i) / float64(obuflen)
				consumer.Progress(progress)
				lastI = i
			}

			if I[i] < 0 {
				// found a combined-sorted group
				// n accumulates adjacent combined-sorted groups
				n -= I[i]

				// count towards total number of suffixes sorted
				nTotal -= I[i]

				// skip over it, since it's already sorted
				i -= I[i]
			} else {
				if n != 0 {
					// before we encountered this group, we had "-n" sorted suffixes
					// (potentially from different groups), merge them into a single group
					if parallel {
						// if working in parallel, only mark after tasks are done, otherwise
						// it messes with indices the quicksort is relying on
						marks = append(marks, mark{index: i - n, value: -n})
					} else {
						// if working sequentially, we can mark them immediately.
						I[i-n] = -n
					}
				}

				// retrieve size of group to sort (g - f + 1), where g is the group number
				// and f is the index of the start of the group (i, here)
				n = V[I[i]] + 1 - i

				// only hand out sorts to other cores if:
				//   - we're doing a parallel suffix sort,
				//   - the array to sort is big enough
				// otherwise, the overhead cancels the performance gains.
				// this means not all cores will always be maxed out
				// (especially in later passes), but we'll still complete sooner
				if parallel && n > 128 {
					tasks <- sortTask{
						start:  i,
						length: n,
						h:      h,
					}
				} else {
					if parallel {
						// other groups might be sorted in parallel, still need to use V and V2
						split(I, V, V2, i, n, h)
					} else {
						// no need for V2 in sequential mode, only one core ever reads/write to V
						split(I, V, V, i, n, h)
					}
				}

				// advance over entire group
				i += n
				// reset "combined sorted group" length accumulator
				n = 0
			}
		}

		if parallel {
			// this will break out of the "for-range" of the workers when
			// the channel's buffer is empty
			close(tasks)
			for i := 0; i < numWorkers; i++ {
				// workers cannot err, only panic, we're just looking for completion here
				<-done
			}

			// we can now safely mark groups as sorted
			for _, mark := range marks {
				// consumer.Debugf("Setting I[%d] to %d", I[i-n], -n)
				I[mark.index] = mark.value
			}
		}

		if n != 0 {
			// eventually, this will write I[0] = -(len(obuf) + 1), when
			// all suffixes are sorted. until then, it'll catch the last combined
			// sorted group
			I[i-n] = -n
		}

		// consumer.Debugf("%d/%d was already done (%.2f%%)", doneI, (obuflen + 1),
		// 	100.0*float64(doneI)/float64(obuflen+1))

		if parallel {
			if ctx.MeasureParallelOverhead {
				copyStart = time.Now()
				copy(V, V2)
				copyDuration += time.Since(copyStart)
			} else {
				copy(V, V2)
			}
		}
	}

	if parallel && ctx.MeasureParallelOverhead {
		consumer.Debugf("Parallel copy overhead: %s", copyDuration)
	}

	// at this point, V[i] contains the group number of the ith suffix:
	// all groups are now of size 1, so V[i] is the final position of the
	// suffix in the list of indices of sorted suffixes. Commit it to I,
	// our result.
	for i = 0; i < obuflen+1; i++ {
		I[V[i]] = i
	}
	return I
}
Пример #4
0
// Do computes the difference between old and new, according to the bsdiff
// algorithm, and writes the result to patch.
func (ctx *DiffContext) Do(old, new io.Reader, writeMessage WriteMessageFunc, consumer *state.Consumer) error {
	var memstats *runtime.MemStats

	if ctx.MeasureMem {
		memstats = &runtime.MemStats{}
		runtime.ReadMemStats(memstats)
		consumer.Debugf("Allocated bytes at start of bsdiff: %s (%s total)", humanize.IBytes(uint64(memstats.Alloc)), humanize.IBytes(uint64(memstats.TotalAlloc)))
	}

	if ctx.db == nil {
		ctx.db = make([]byte, MaxMessageSize)
	}
	if ctx.eb == nil {
		ctx.eb = make([]byte, MaxMessageSize)
	}

	obuf, err := ioutil.ReadAll(old)
	if err != nil {
		return err
	}
	if int64(len(obuf)) > MaxFileSize {
		return fmt.Errorf("bsdiff: old file too large (%s > %s)", humanize.IBytes(uint64(len(obuf))), humanize.IBytes(uint64(MaxFileSize)))
	}
	obuflen := int32(len(obuf))

	nbuf, err := ioutil.ReadAll(new)
	if err != nil {
		return err
	}
	if int64(len(nbuf)) > MaxFileSize {
		// TODO: provide a different (int64) codepath for >=2GB files
		return fmt.Errorf("bsdiff: new file too large (%s > %s)", humanize.IBytes(uint64(len(nbuf))), humanize.IBytes(uint64(MaxFileSize)))
	}
	nbuflen := int32(len(nbuf))

	if ctx.MeasureMem {
		runtime.ReadMemStats(memstats)
		consumer.Debugf("Allocated bytes after ReadAll: %s (%s total)", humanize.IBytes(uint64(memstats.Alloc)), humanize.IBytes(uint64(memstats.TotalAlloc)))
	}

	var lenf int32
	startTime := time.Now()

	I := qsufsort(obuf, ctx, consumer)

	duration := time.Since(startTime)
	consumer.Debugf("Suffix sorting done in %s", duration)

	if ctx.MeasureMem {
		runtime.ReadMemStats(memstats)
		consumer.Debugf("Allocated bytes after qsufsort: %s (%s total)", humanize.IBytes(uint64(memstats.Alloc)), humanize.IBytes(uint64(memstats.TotalAlloc)))
	}

	// FIXME: the streaming format allows us to allocate less than that
	db := make([]byte, len(nbuf))
	eb := make([]byte, len(nbuf))

	bsdc := &Control{}

	consumer.ProgressLabel("Scanning...")

	// Compute the differences, writing ctrl as we go
	var scan, pos, length int32
	var lastscan, lastpos, lastoffset int32
	for scan < nbuflen {
		var oldscore int32
		scan += length

		progress := float64(scan) / float64(nbuflen)
		consumer.Progress(progress)

		for scsc := scan; scan < nbuflen; scan++ {
			pos, length = search(I, obuf, nbuf[scan:], 0, obuflen)

			for ; scsc < scan+length; scsc++ {
				if scsc+lastoffset < obuflen &&
					obuf[scsc+lastoffset] == nbuf[scsc] {
					oldscore++
				}
			}

			if (length == oldscore && length != 0) || length > oldscore+8 {
				break
			}

			if scan+lastoffset < obuflen && obuf[scan+lastoffset] == nbuf[scan] {
				oldscore--
			}
		}

		if length != oldscore || scan == nbuflen {
			var s, Sf int32
			lenf = 0
			for i := int32(0); lastscan+i < scan && lastpos+i < obuflen; {
				if obuf[lastpos+i] == nbuf[lastscan+i] {
					s++
				}
				i++
				if s*2-i > Sf*2-lenf {
					Sf = s
					lenf = i
				}
			}

			lenb := int32(0)
			if scan < nbuflen {
				var s, Sb int32
				for i := int32(1); (scan >= lastscan+i) && (pos >= i); i++ {
					if obuf[pos-i] == nbuf[scan-i] {
						s++
					}
					if s*2-i > Sb*2-lenb {
						Sb = s
						lenb = i
					}
				}
			}

			if lastscan+lenf > scan-lenb {
				overlap := (lastscan + lenf) - (scan - lenb)
				s := int32(0)
				Ss := int32(0)
				lens := int32(0)
				for i := int32(0); i < overlap; i++ {
					if nbuf[lastscan+lenf-overlap+i] == obuf[lastpos+lenf-overlap+i] {
						s++
					}
					if nbuf[scan-lenb+i] == obuf[pos-lenb+i] {
						s--
					}
					if s > Ss {
						Ss = s
						lens = i + 1
					}
				}

				lenf += lens - overlap
				lenb -= lens
			}

			for i := int32(0); i < lenf; i++ {
				db[i] = nbuf[lastscan+i] - obuf[lastpos+i]
			}
			for i := int32(0); i < (scan-lenb)-(lastscan+lenf); i++ {
				eb[i] = nbuf[lastscan+lenf+i]
			}

			bsdc.Add = db[:lenf]
			bsdc.Copy = eb[:(scan-lenb)-(lastscan+lenf)]
			bsdc.Seek = int64((pos - lenb) - (lastpos + lenf))

			err := writeMessage(bsdc)
			if err != nil {
				return err
			}

			lastscan = scan - lenb
			lastpos = pos - lenb
			lastoffset = pos - scan
		}
	}

	if ctx.MeasureMem {
		runtime.ReadMemStats(memstats)
		consumer.Debugf("Allocated bytes after scan: %s (%s total)", humanize.IBytes(uint64(memstats.Alloc)), humanize.IBytes(uint64(memstats.TotalAlloc)))
	}

	bsdc.Reset()
	bsdc.Eof = true
	err = writeMessage(bsdc)
	if err != nil {
		return err
	}

	return nil
}