Esempio n. 1
0
func identifyRdr(w writer, s *siegfried.Siegfried, r io.Reader, sz int64, path, mime, mod string) {
	lg.set(path)
	c, err := s.Identify(r, path, mime)
	lg.err(err)
	if c == nil {
		w.writeFile(path, sz, mod, nil, err, nil)
		lg.reset()
		return
	}
	var b *siegreader.Buffer
	var cs []byte
	if checksum != nil {
		b = s.Buffer()
		var i int64
		l := checksum.BlockSize()
		for ; ; i += int64(l) {
			buf, _ := b.Slice(i, l)
			if buf == nil {
				break
			}
			checksum.Write(buf)
		}
		cs = checksum.Sum(nil)
		checksum.Reset()
	}
	a := w.writeFile(path, sz, mod, cs, err, idChan(c))
	lg.reset()
	if !*archive || a == config.None {
		return
	}
	var d decompressor
	if b == nil {
		b = s.Buffer()
	}
	switch a {
	case config.Zip:
		d, err = newZip(siegreader.ReaderFrom(b), path, sz)
	case config.Gzip:
		d, err = newGzip(b, path)
	case config.Tar:
		d, err = newTar(siegreader.ReaderFrom(b), path)
	case config.ARC:
		d, err = newARC(siegreader.ReaderFrom(b), path)
	case config.WARC:
		d, err = newWARC(siegreader.ReaderFrom(b), path)
	}
	if err != nil {
		writeError(w, path, sz, mod, fmt.Errorf("failed to decompress, got: %v", err))
		return
	}
	for err = d.next(); err == nil; err = d.next() {
		if *droido {
			for _, v := range d.dirs() {
				w.writeFile(v, -1, "", nil, nil, nil)
			}
		}
		identifyRdr(w, s, d.reader(), d.size(), d.path(), d.mime(), d.mod())
	}
}
Esempio n. 2
0
func (fs *frameSet) index(buf *siegreader.Buffer, rev bool, quit chan struct{}) chan fsmatch {
	ret := make(chan fsmatch)
	go func() {
		var i int
		for {
			select {
			case <-quit:
				close(ret)
				return
			default:
			}
			if i >= len(fs.set) {
				close(ret)
				return
			}
			f := fs.set[i]
			var match bool
			var matches []int
			if rev {
				slc, err := buf.EofSlice(0, frames.TotalLength(f))
				if err != nil {
					close(ret)
					return
				}
				match, matches = f.MatchR(slc)
			} else {
				slc, err := buf.Slice(0, frames.TotalLength(f))
				if err != nil {
					close(ret)
					return
				}
				match, matches = f.Match(slc)
			}
			if match {
				var min int
				if !rev {
					min, _ = f.Length()
				}
				for _, off := range matches {
					ret <- fsmatch{i, int64(off - min), min}
				}
			}
			i++
		}
		close(ret)
	}()
	return ret
}
Esempio n. 3
0
func (m Matcher) Identify(n string, b siegreader.Buffer) (chan core.Result, error) {
	res := make(chan core.Result)
	// check trigger
	buf, err := b.Slice(0, 8)
	if err != nil {
		close(res)
		return res, nil
	}
	for _, c := range m {
		if c.trigger(buf) {
			rdr, err := c.rdr(b)
			if err != nil {
				close(res)
				return res, err
			}
			go c.identify(n, rdr, res)
			return res, nil
		}
	}
	// nothing ... move on
	close(res)
	return res, nil
}
Esempio n. 4
0
func (b *Matcher) scorer(buf *siegreader.Buffer, q chan struct{}, r chan<- core.Result) chan<- strike {
	incoming := make(chan strike)
	waitSet := b.priorities.WaitSet()
	hits := make(map[int]*hitItem)
	strikes := make(map[int]*strikeItem)

	var bof int64
	var eof int64

	var quitting bool
	quit := func() {
		close(q)
		quitting = true
	}

	newHit := func(i int) *hitItem {
		l := len(b.keyFrames[i])
		hit := &hitItem{
			potentialIdxs: make([]int, l),
			partials:      make([][][2]int64, l),
		}
		hits[i] = hit
		return hit
	}

	// given the current bof and eof, is there anything worth waiting for?
	continueWaiting := func(w []int) bool {
		var keepScanning bool
		// now for each of the possible signatures we are either waiting on or have partial/potential matches for, check whether there are live contenders
		for _, v := range w {
			kf := b.keyFrames[v]
			for i, f := range kf {
				off := bof
				if f.typ > frames.PREV {
					off = eof
				}
				var waitfor, excludable bool
				if f.key.pMax == -1 || f.key.pMax+int64(f.key.lMax) > off {
					waitfor = true
				} else if hit, ok := hits[v]; ok {
					if hit.partials[i] != nil {
						waitfor = true
					} else if hit.potentialIdxs[i] > 0 && strikes[hit.potentialIdxs[i]-1].hasPotential() {
						waitfor, excludable = true, true
					}
				}
				// if we've got to the end of the signature, and have determined this is a live one - return immediately & continue scan
				if waitfor {
					if i == len(kf)-1 {
						if !config.Slow() || !config.Checkpoint(bof) {
							return true
						}
						keepScanning = true
						fmt.Fprintf(config.Out(), "waiting on: %d, potentially excludable: %t\n", v, excludable)
					}
					continue
				}
				break
			}
		}
		return keepScanning
	}

	testStrike := func(st strike) []kfHit {
		// the offsets we *record* are always BOF offsets - these can be interpreted as EOF offsets when necessary
		off := st.offset
		if st.reverse {
			off = buf.Size() - st.offset - int64(st.length)
		}
		// grab the relevant testTree
		t := b.tests[st.idxa+st.idxb]
		res := make([]kfHit, 0, 10)
		// immediately apply key frames for the completes
		for _, kf := range t.complete {
			if b.keyFrames[kf[0]][kf[1]].check(st.offset) && waitSet.Check(kf[0]) {
				res = append(res, kfHit{kf, off, st.length})
			}
		}
		// if there are no incompletes, we are done
		if len(t.incomplete) < 1 {
			return res
		}
		// see what incompletes are worth pursuing
		var checkl, checkr bool
		for _, v := range t.incomplete {
			if checkl && checkr {
				break
			}
			if b.keyFrames[v.kf[0]][v.kf[1]].check(st.offset) && waitSet.Check(v.kf[0]) {
				if v.l {
					checkl = true
				}
				if v.r {
					checkr = true
				}
			}
		}
		if !checkl && !checkr {
			return res
		}
		// calculate the offset and lengths for the left and right test slices
		var lslc, rslc []byte
		var lpos, rpos int64
		var llen, rlen int
		if st.reverse {
			lpos, llen = st.offset+int64(st.length), t.maxLeftDistance
			rpos, rlen = st.offset-int64(t.maxRightDistance), t.maxRightDistance
			if rpos < 0 {
				rlen = rlen + int(rpos)
				rpos = 0
			}
		} else {
			lpos, llen = st.offset-int64(t.maxLeftDistance), t.maxLeftDistance
			rpos, rlen = st.offset+int64(st.length), t.maxRightDistance
			if lpos < 0 {
				llen = llen + int(lpos)
				lpos = 0
			}
		}
		//  the partials slice has a mirror entry for each of the testTree incompletes
		partials := make([]partial, len(t.incomplete))
		// test left (if there are valid left tests to try)
		if checkl {
			if st.reverse {
				lslc, _ = buf.EofSlice(lpos, llen)
			} else {
				lslc, _ = buf.Slice(lpos, llen)
			}
			left := matchTestNodes(t.left, lslc, true)
			for _, lp := range left {
				if partials[lp.followUp].l {
					partials[lp.followUp].ldistances = append(partials[lp.followUp].ldistances, lp.distances...)
				} else {
					partials[lp.followUp].l = true
					partials[lp.followUp].ldistances = lp.distances
				}
			}
		}
		// test right (if there are valid right tests to try)
		if checkr {
			if st.reverse {
				rslc, _ = buf.EofSlice(rpos, rlen)
			} else {
				rslc, _ = buf.Slice(rpos, rlen)
			}
			right := matchTestNodes(t.right, rslc, false)
			for _, rp := range right {
				if partials[rp.followUp].r {
					partials[rp.followUp].rdistances = append(partials[rp.followUp].rdistances, rp.distances...)
				} else {
					partials[rp.followUp].r = true
					partials[rp.followUp].rdistances = rp.distances
				}
			}
		}
		// now iterate through the partials, checking whether they fulfil any of the incompletes
		for i, p := range partials {
			if p.l == t.incomplete[i].l && p.r == t.incomplete[i].r {
				kf := t.incomplete[i].kf
				if b.keyFrames[kf[0]][kf[1]].check(st.offset) && waitSet.Check(kf[0]) {
					if !p.l {
						p.ldistances = []int{0}
					}
					if !p.r {
						p.rdistances = []int{0}
					}
					if oneEnough(kf[1], b.keyFrames[kf[0]]) {
						res = append(res, kfHit{kf, off - int64(p.ldistances[0]), p.ldistances[0] + st.length + p.rdistances[0]})
						continue
					}
					for _, ldistance := range p.ldistances {
						for _, rdistance := range p.rdistances {
							res = append(res, kfHit{kf, off - int64(ldistance), ldistance + st.length + rdistance})
						}
					}
				}
			}
		}
		return res
	}

	applyKeyFrame := func(hit kfHit) (bool, string) {
		kfs := b.keyFrames[hit.id[0]]
		if len(kfs) == 1 {
			return true, fmt.Sprintf("byte match at %d, %d", hit.offset, hit.length)
		}
		h, ok := hits[hit.id[0]]
		if !ok {
			h = newHit(hit.id[0])
		}
		if h.partials[hit.id[1]] == nil {
			h.partials[hit.id[1]] = [][2]int64{[2]int64{hit.offset, int64(hit.length)}}
		} else {
			h.partials[hit.id[1]] = append(h.partials[hit.id[1]], [2]int64{hit.offset, int64(hit.length)})
		}
		for _, p := range h.partials {
			if p == nil {
				return false, ""
			}
		}
		prevOff := h.partials[0]
		basis := make([][][2]int64, len(kfs))
		basis[0] = prevOff
		prevKf := kfs[0]
		ok = false
		for i, kf := range kfs[1:] {
			var nextKf keyFrame
			if i+2 < len(kfs) {
				nextKf = kfs[i+2]
			}
			thisOff := h.partials[i+1]
			prevOff, ok = kf.checkRelated(prevKf, nextKf, thisOff, prevOff)
			if !ok {
				return false, ""
			}
			basis[i+1] = prevOff
			prevKf = kf
		}
		return true, fmt.Sprintf("byte match at %v", basis)
	}

	go func() {
		for in := range incoming {
			// if we've got a postive result, drain any remaining strikes from the matchers
			if quitting {
				continue
			}
			// if the strike reports progress, check if we should be continuing to wait
			if in.idxa == -1 {
				// update with the latest offset
				if in.reverse {
					eof = in.offset
				} else {
					bof = in.offset
				}
				w := waitSet.WaitingOn()
				// if any of the waitlists are nil, we will continue - unless we are past the known bof and known eof (points at which we *should* have got at least partial matches), in which case we will check if any partial/potential matches are live
				if w == nil {
					// keep going if we don't have a maximum known bof, or if our current bof/eof are less than the maximum known bof/eof
					if b.knownBOF < 0 || int64(b.knownBOF) > bof || int64(b.knownEOF) > eof {
						continue
					}
					// if we don't have a waitlist, and we are past the known bof and known eof, grab all the partials and potentials to check if any are live
					w = all(hits)
				}
				// exhausted all contenders, we can stop scanning
				if !continueWaiting(w) {
					quit()
				}
				continue
			}
			// now cache or satisfy the strike
			var hasPotential bool
			potentials := filterKF(b.tests[in.idxa+in.idxb].keyFrames(), waitSet)
			for _, pot := range potentials {
				// if any of the signatures are single keyframe we can satisfy immediately and skip cache
				if len(b.keyFrames[pot[0]]) == 1 {
					hasPotential = true
					break
				}
				if hit, ok := hits[pot[0]]; ok && hit.potentiallyComplete(pot[1], strikes) {
					hasPotential = true
					break
				}
			}
			if !hasPotential {
				// cache the strike
				s, ok := strikes[in.idxa+in.idxb]
				if !ok {
					s = &strikeItem{in, -1, nil}
					strikes[in.idxa+in.idxb] = s
				} else {
					if s.successive == nil {
						s.successive = make([][2]int64, 0, 10)
					}
					s.successive = append(s.successive, [2]int64{in.offset, int64(in.length)})
				}
				// range over the potentials, linking to the strike
				for _, pot := range potentials {
					if b.keyFrames[pot[0]][pot[1]].check(in.offset) {
						hit, ok := hits[pot[0]]
						if !ok {
							hit = newHit(pot[0])
						}
						hit.potentialIdxs[pot[1]] = in.idxa + in.idxb + 1
					}
				}
				goto end
			}
			// satisfy the strike
			for {
				ks := testStrike(in)
				for _, k := range ks {
					if match, basis := applyKeyFrame(k); match {
						if waitSet.Check(k.id[0]) {
							r <- result{k.id[0], basis}
							if waitSet.Put(k.id[0]) {
								quit()
								goto end
							}
						}
						if h, ok := hits[k.id[0]]; ok {
							h.matched = true
						}
					}
				}
				potentials = filterKF(potentials, waitSet)
				var ok bool
				for _, pot := range potentials {
					in, ok = hits[pot[0]].nextPotential(strikes)
					if ok {
						break
					}
				}
				if !ok {
					break
				}
			}
		end: // keep looping until incoming is closed
		}
		close(r)
	}()
	return incoming
}