func newGzip(b *siegreader.Buffer, path string) (decompressor, error) { b.Quit = make(chan struct{}) // in case a stream with a closed quit channel, make a new one _ = b.SizeNow() // in case a stream, force full read buf, err := b.EofSlice(0, 4) // gzip stores uncompressed size in last 4 bytes of the stream if err != nil { return nil, err } sz := int64(uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24) g, err := gzip.NewReader(siegreader.ReaderFrom(b)) return &gzipD{sz: sz, p: path, rdr: g}, err }
func (fs *frameSet) index(buf *siegreader.Buffer, rev bool, quit chan struct{}) chan fsmatch { ret := make(chan fsmatch) go func() { for i, f := range fs.set { select { case <-quit: close(ret) return default: } var match bool var matches []int if rev { slc, err := buf.EofSlice(0, frames.TotalLength(f)) if err != nil { close(ret) return } match, matches = f.MatchR(slc) } else { slc, err := buf.Slice(0, frames.TotalLength(f)) if err != nil { close(ret) return } match, matches = f.Match(slc) } if match { var min int if !rev { min, _ = f.Length() } for _, off := range matches { ret <- fsmatch{i, int64(off - min), min} } } } close(ret) }() return ret }
func (b *Matcher) scorer(buf *siegreader.Buffer, waitSet *priority.WaitSet, q chan struct{}, r chan<- core.Result) chan<- strike { incoming := make(chan strike) hits := make(map[int]*hitItem) strikes := make(map[int]*strikeItem) var bof int64 var eof int64 var quitting bool quit := func() { close(q) quitting = true } newHit := func(i int) *hitItem { l := len(b.keyFrames[i]) hit := &hitItem{ potentialIdxs: make([]int, l), partials: make([][][2]int64, l), } hits[i] = hit return hit } // given the current bof and eof, is there anything worth waiting for? continueWaiting := func(w []int) bool { var keepScanning bool // now for each of the possible signatures we are either waiting on or have partial/potential matches for, check whether there are live contenders for _, v := range w { kf := b.keyFrames[v] for i, f := range kf { off := bof if f.typ > frames.PREV { off = eof } var waitfor, excludable bool if f.key.pMax == -1 || f.key.pMax+int64(f.key.lMax) > off { waitfor = true } else if hit, ok := hits[v]; ok { if hit.partials[i] != nil { waitfor = true } else if hit.potentialIdxs[i] > 0 && strikes[hit.potentialIdxs[i]-1].hasPotential() { waitfor, excludable = true, true } } // if we've got to the end of the signature, and have determined this is a live one - return immediately & continue scan if waitfor { if i == len(kf)-1 { if !config.Slow() || !config.Checkpoint(bof) { return true } keepScanning = true fmt.Fprintf(config.Out(), "waiting on: %d, potentially excludable: %t\n", v, excludable) } continue } break } } return keepScanning } testStrike := func(st strike) []kfHit { // the offsets we *record* are always BOF offsets - these can be interpreted as EOF offsets when necessary off := st.offset if st.reverse { off = buf.Size() - st.offset - int64(st.length) } // grab the relevant testTree t := b.tests[st.idxa+st.idxb] res := make([]kfHit, 0, 10) // immediately apply key frames for the completes for _, kf := range t.complete { if b.keyFrames[kf[0]][kf[1]].check(st.offset) && waitSet.Check(kf[0]) { res = append(res, kfHit{kf, off, st.length}) } } // if there are no incompletes, we are done if len(t.incomplete) < 1 { return res } // see what incompletes are worth pursuing var checkl, checkr bool for _, v := range t.incomplete { if checkl && checkr { break } if b.keyFrames[v.kf[0]][v.kf[1]].check(st.offset) && waitSet.Check(v.kf[0]) { if v.l { checkl = true } if v.r { checkr = true } } } if !checkl && !checkr { return res } // calculate the offset and lengths for the left and right test slices var lslc, rslc []byte var lpos, rpos int64 var llen, rlen int if st.reverse { lpos, llen = st.offset+int64(st.length), t.maxLeftDistance rpos, rlen = st.offset-int64(t.maxRightDistance), t.maxRightDistance if rpos < 0 { rlen = rlen + int(rpos) rpos = 0 } } else { lpos, llen = st.offset-int64(t.maxLeftDistance), t.maxLeftDistance rpos, rlen = st.offset+int64(st.length), t.maxRightDistance if lpos < 0 { llen = llen + int(lpos) lpos = 0 } } // the partials slice has a mirror entry for each of the testTree incompletes partials := make([]partial, len(t.incomplete)) // test left (if there are valid left tests to try) if checkl { if st.reverse { lslc, _ = buf.EofSlice(lpos, llen) } else { lslc, _ = buf.Slice(lpos, llen) } left := matchTestNodes(t.left, lslc, true) for _, lp := range left { if partials[lp.followUp].l { partials[lp.followUp].ldistances = append(partials[lp.followUp].ldistances, lp.distances...) } else { partials[lp.followUp].l = true partials[lp.followUp].ldistances = lp.distances } } } // test right (if there are valid right tests to try) if checkr { if st.reverse { rslc, _ = buf.EofSlice(rpos, rlen) } else { rslc, _ = buf.Slice(rpos, rlen) } right := matchTestNodes(t.right, rslc, false) for _, rp := range right { if partials[rp.followUp].r { partials[rp.followUp].rdistances = append(partials[rp.followUp].rdistances, rp.distances...) } else { partials[rp.followUp].r = true partials[rp.followUp].rdistances = rp.distances } } } // now iterate through the partials, checking whether they fulfil any of the incompletes for i, p := range partials { if p.l == t.incomplete[i].l && p.r == t.incomplete[i].r { kf := t.incomplete[i].kf if b.keyFrames[kf[0]][kf[1]].check(st.offset) && waitSet.Check(kf[0]) { if !p.l { p.ldistances = []int{0} } if !p.r { p.rdistances = []int{0} } if oneEnough(kf[1], b.keyFrames[kf[0]]) { res = append(res, kfHit{kf, off - int64(p.ldistances[0]), p.ldistances[0] + st.length + p.rdistances[0]}) continue } for _, ldistance := range p.ldistances { for _, rdistance := range p.rdistances { res = append(res, kfHit{kf, off - int64(ldistance), ldistance + st.length + rdistance}) } } } } } return res } applyKeyFrame := func(hit kfHit) (bool, string) { kfs := b.keyFrames[hit.id[0]] if len(kfs) == 1 { return true, fmt.Sprintf("byte match at %d, %d", hit.offset, hit.length) } h, ok := hits[hit.id[0]] if !ok { h = newHit(hit.id[0]) } if h.partials[hit.id[1]] == nil { h.partials[hit.id[1]] = [][2]int64{{hit.offset, int64(hit.length)}} } else { h.partials[hit.id[1]] = append(h.partials[hit.id[1]], [2]int64{hit.offset, int64(hit.length)}) } for _, p := range h.partials { if p == nil { return false, "" } } prevOff := h.partials[0] basis := make([][][2]int64, len(kfs)) basis[0] = prevOff prevKf := kfs[0] ok = false for i, kf := range kfs[1:] { var nextKf keyFrame if i+2 < len(kfs) { nextKf = kfs[i+2] } thisOff := h.partials[i+1] prevOff, ok = kf.checkRelated(prevKf, nextKf, thisOff, prevOff) if !ok { return false, "" } basis[i+1] = prevOff prevKf = kf } return true, fmt.Sprintf("byte match at %v", basis) } go func() { for in := range incoming { // if we've got a positive result, drain any remaining strikes from the matchers if quitting { continue } // if the strike reports progress, check if we should be continuing to wait if in.idxa == -1 { // update with the latest offset if in.reverse { eof = in.offset } else { bof = in.offset } w := waitSet.WaitingOnAt(bof, eof) // if any of the waitlists are nil, we will continue - unless we are past the known bof and known eof (points at which we *should* have got at least partial matches), in which case we will check if any partial/potential matches are live if w == nil { // keep going if we don't have a maximum known bof, or if our current bof/eof are less than the maximum known bof/eof if b.knownBOF < 0 || int64(b.knownBOF) > bof || int64(b.knownEOF) > eof { continue } // if we don't have a waitlist, and we are past the known bof and known eof, grab all the partials and potentials to check if any are live w = all(hits) } // exhausted all contenders, we can stop scanning if !continueWaiting(w) { quit() } continue } // now cache or satisfy the strike var hasPotential bool potentials := filterKF(b.tests[in.idxa+in.idxb].keyFrames(), waitSet) for _, pot := range potentials { // if any of the signatures are single keyframe we can satisfy immediately and skip cache if len(b.keyFrames[pot[0]]) == 1 { hasPotential = true break } if hit, ok := hits[pot[0]]; ok && hit.potentiallyComplete(pot[1], strikes) { hasPotential = true break } } if !hasPotential { // cache the strike s, ok := strikes[in.idxa+in.idxb] if !ok { s = &strikeItem{in, -1, nil} strikes[in.idxa+in.idxb] = s } else { if s.successive == nil { s.successive = make([][2]int64, 0, 10) } s.successive = append(s.successive, [2]int64{in.offset, int64(in.length)}) } // range over the potentials, linking to the strike for _, pot := range potentials { if b.keyFrames[pot[0]][pot[1]].check(in.offset) { hit, ok := hits[pot[0]] if !ok { hit = newHit(pot[0]) } hit.potentialIdxs[pot[1]] = in.idxa + in.idxb + 1 } } goto end } // satisfy the strike for { ks := testStrike(in) for _, k := range ks { if match, basis := applyKeyFrame(k); match { if waitSet.Check(k.id[0]) { r <- result{k.id[0], basis} if waitSet.PutAt(k.id[0], bof, eof) { quit() goto end } } if h, ok := hits[k.id[0]]; ok { h.matched = true } } } potentials = filterKF(potentials, waitSet) var ok bool for _, pot := range potentials { in, ok = hits[pot[0]].nextPotential(strikes) if ok { break } } if !ok { break } } end: // keep looping until incoming is closed } close(r) }() return incoming }