func newGzip(b *siegreader.Buffer, path string) (decompressor, error) { b.Quit = make(chan struct{}) // in case a stream with a closed quit channel, make a new one _ = b.SizeNow() // in case a stream, force full read buf, err := b.EofSlice(0, 4) // gzip stores uncompressed size in last 4 bytes of the stream if err != nil { return nil, err } sz := int64(uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24) g, err := gzip.NewReader(siegreader.ReaderFrom(b)) return &gzipD{sz: sz, p: path, rdr: g}, err }
func (m *Matcher) Identify(na string, buf *siegreader.Buffer, exclude ...int) (chan core.Result, error) { if *m > 0 { tt := buf.Text() if tt != characterize.DATA { res := make(chan core.Result, *m) for i := 1; i < int(*m)+1; i++ { res <- result{ idx: i, basis: "text match " + tt.String(), } } close(res) return res, nil } } res := make(chan core.Result) close(res) return res, nil }
func (fs *frameSet) index(buf *siegreader.Buffer, rev bool, quit chan struct{}) chan fsmatch { ret := make(chan fsmatch) go func() { for i, f := range fs.set { select { case <-quit: close(ret) return default: } var match bool var matches []int if rev { slc, err := buf.EofSlice(0, frames.TotalLength(f)) if err != nil { close(ret) return } match, matches = f.MatchR(slc) } else { slc, err := buf.Slice(0, frames.TotalLength(f)) if err != nil { close(ret) return } match, matches = f.Match(slc) } if match { var min int if !rev { min, _ = f.Length() } for _, off := range matches { ret <- fsmatch{i, int64(off - min), min} } } } close(ret) }() return ret }
func (m Matcher) Identify(n string, b *siegreader.Buffer, exclude ...int) (chan core.Result, error) { res := make(chan core.Result) // check trigger buf, err := b.Slice(0, 8) if err != nil { close(res) return res, nil } for _, c := range m { if c.trigger(buf) { rdr, err := c.rdr(b) if err != nil { close(res) return res, err } go c.identify(n, rdr, res) return res, nil } } // nothing ... move on close(res) return res, nil }
// identify function - brings a new matcher into existence func (b *Matcher) identify(buf *siegreader.Buffer, quit chan struct{}, r chan core.Result, exclude ...int) { buf.Quit = quit waitSet := b.priorities.WaitSet(exclude...) var maxBOF, maxEOF int if len(exclude) > 0 { maxBOF, maxEOF = waitSet.MaxOffsets() } else { maxBOF, maxEOF = b.maxBOF, b.maxEOF } incoming := b.scorer(buf, waitSet, quit, r) rdr := siegreader.LimitReaderFrom(buf, maxBOF) // First test BOF frameset bfchan := b.bofFrames.index(buf, false, quit) for bf := range bfchan { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true}) } incoming <- strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true} } select { case <-quit: // the matcher has called quit for range bfchan { } // drain first close(incoming) return default: } // Do an initial check of BOF sequences b.start(true) // start bof matcher if not yet started var bchan chan wac.Result bchan = b.bAho.Index(rdr) for br := range bchan { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) if br.Offset > 131072 && (maxBOF < 0 || maxBOF > maxEOF*5) { // del buf.Stream 2^16 65536 2^17 131072 break } } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } select { case <-quit: // the matcher has called quit for range bchan { } // drain first close(incoming) return default: } // Setup EOF tests efchan := b.eofFrames.index(buf, true, quit) b.start(false) rrdr := siegreader.LimitReverseReaderFrom(buf, maxEOF) echan := b.eAho.Index(rrdr) // if we have a maximum value on EOF do a sequential search if maxEOF >= 0 { if maxEOF != 0 { _, _ = buf.CanSeek(0, true) // force a full read to enable EOF scan to proceed for streams } for ef := range efchan { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true}) } incoming <- strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true} } // Scan complete EOF for er := range echan { if er.Index[0] == -1 { incoming <- progressStrike(er.Offset, true) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false}) } incoming <- strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false} } } // send a final progress strike with the maximum EOF incoming <- progressStrike(int64(maxEOF), true) // Finally, finish BOF scan for br := range bchan { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } close(incoming) return } // If no maximum on EOF do a parallel search for { select { case br, ok := <-bchan: if !ok { if maxBOF < 0 && maxEOF != 0 { _, _ = buf.CanSeek(0, true) // if we've a limit BOF reader, force a full read to enable EOF scan to proceed for streams } bchan = nil } else { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } case ef, ok := <-efchan: if !ok { efchan = nil } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true}) } incoming <- strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true} } case er, ok := <-echan: if !ok { echan = nil } else { if er.Index[0] == -1 { incoming <- progressStrike(er.Offset, true) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false}) } incoming <- strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false} } } } if bchan == nil && efchan == nil && echan == nil { close(incoming) return } } }
func (b *Matcher) scorer(buf *siegreader.Buffer, waitSet *priority.WaitSet, q chan struct{}, r chan<- core.Result) chan<- strike { incoming := make(chan strike) hits := make(map[int]*hitItem) strikes := make(map[int]*strikeItem) var bof int64 var eof int64 var quitting bool quit := func() { close(q) quitting = true } newHit := func(i int) *hitItem { l := len(b.keyFrames[i]) hit := &hitItem{ potentialIdxs: make([]int, l), partials: make([][][2]int64, l), } hits[i] = hit return hit } // given the current bof and eof, is there anything worth waiting for? continueWaiting := func(w []int) bool { var keepScanning bool // now for each of the possible signatures we are either waiting on or have partial/potential matches for, check whether there are live contenders for _, v := range w { kf := b.keyFrames[v] for i, f := range kf { off := bof if f.typ > frames.PREV { off = eof } var waitfor, excludable bool if f.key.pMax == -1 || f.key.pMax+int64(f.key.lMax) > off { waitfor = true } else if hit, ok := hits[v]; ok { if hit.partials[i] != nil { waitfor = true } else if hit.potentialIdxs[i] > 0 && strikes[hit.potentialIdxs[i]-1].hasPotential() { waitfor, excludable = true, true } } // if we've got to the end of the signature, and have determined this is a live one - return immediately & continue scan if waitfor { if i == len(kf)-1 { if !config.Slow() || !config.Checkpoint(bof) { return true } keepScanning = true fmt.Fprintf(config.Out(), "waiting on: %d, potentially excludable: %t\n", v, excludable) } continue } break } } return keepScanning } testStrike := func(st strike) []kfHit { // the offsets we *record* are always BOF offsets - these can be interpreted as EOF offsets when necessary off := st.offset if st.reverse { off = buf.Size() - st.offset - int64(st.length) } // grab the relevant testTree t := b.tests[st.idxa+st.idxb] res := make([]kfHit, 0, 10) // immediately apply key frames for the completes for _, kf := range t.complete { if b.keyFrames[kf[0]][kf[1]].check(st.offset) && waitSet.Check(kf[0]) { res = append(res, kfHit{kf, off, st.length}) } } // if there are no incompletes, we are done if len(t.incomplete) < 1 { return res } // see what incompletes are worth pursuing var checkl, checkr bool for _, v := range t.incomplete { if checkl && checkr { break } if b.keyFrames[v.kf[0]][v.kf[1]].check(st.offset) && waitSet.Check(v.kf[0]) { if v.l { checkl = true } if v.r { checkr = true } } } if !checkl && !checkr { return res } // calculate the offset and lengths for the left and right test slices var lslc, rslc []byte var lpos, rpos int64 var llen, rlen int if st.reverse { lpos, llen = st.offset+int64(st.length), t.maxLeftDistance rpos, rlen = st.offset-int64(t.maxRightDistance), t.maxRightDistance if rpos < 0 { rlen = rlen + int(rpos) rpos = 0 } } else { lpos, llen = st.offset-int64(t.maxLeftDistance), t.maxLeftDistance rpos, rlen = st.offset+int64(st.length), t.maxRightDistance if lpos < 0 { llen = llen + int(lpos) lpos = 0 } } // the partials slice has a mirror entry for each of the testTree incompletes partials := make([]partial, len(t.incomplete)) // test left (if there are valid left tests to try) if checkl { if st.reverse { lslc, _ = buf.EofSlice(lpos, llen) } else { lslc, _ = buf.Slice(lpos, llen) } left := matchTestNodes(t.left, lslc, true) for _, lp := range left { if partials[lp.followUp].l { partials[lp.followUp].ldistances = append(partials[lp.followUp].ldistances, lp.distances...) } else { partials[lp.followUp].l = true partials[lp.followUp].ldistances = lp.distances } } } // test right (if there are valid right tests to try) if checkr { if st.reverse { rslc, _ = buf.EofSlice(rpos, rlen) } else { rslc, _ = buf.Slice(rpos, rlen) } right := matchTestNodes(t.right, rslc, false) for _, rp := range right { if partials[rp.followUp].r { partials[rp.followUp].rdistances = append(partials[rp.followUp].rdistances, rp.distances...) } else { partials[rp.followUp].r = true partials[rp.followUp].rdistances = rp.distances } } } // now iterate through the partials, checking whether they fulfil any of the incompletes for i, p := range partials { if p.l == t.incomplete[i].l && p.r == t.incomplete[i].r { kf := t.incomplete[i].kf if b.keyFrames[kf[0]][kf[1]].check(st.offset) && waitSet.Check(kf[0]) { if !p.l { p.ldistances = []int{0} } if !p.r { p.rdistances = []int{0} } if oneEnough(kf[1], b.keyFrames[kf[0]]) { res = append(res, kfHit{kf, off - int64(p.ldistances[0]), p.ldistances[0] + st.length + p.rdistances[0]}) continue } for _, ldistance := range p.ldistances { for _, rdistance := range p.rdistances { res = append(res, kfHit{kf, off - int64(ldistance), ldistance + st.length + rdistance}) } } } } } return res } applyKeyFrame := func(hit kfHit) (bool, string) { kfs := b.keyFrames[hit.id[0]] if len(kfs) == 1 { return true, fmt.Sprintf("byte match at %d, %d", hit.offset, hit.length) } h, ok := hits[hit.id[0]] if !ok { h = newHit(hit.id[0]) } if h.partials[hit.id[1]] == nil { h.partials[hit.id[1]] = [][2]int64{{hit.offset, int64(hit.length)}} } else { h.partials[hit.id[1]] = append(h.partials[hit.id[1]], [2]int64{hit.offset, int64(hit.length)}) } for _, p := range h.partials { if p == nil { return false, "" } } prevOff := h.partials[0] basis := make([][][2]int64, len(kfs)) basis[0] = prevOff prevKf := kfs[0] ok = false for i, kf := range kfs[1:] { var nextKf keyFrame if i+2 < len(kfs) { nextKf = kfs[i+2] } thisOff := h.partials[i+1] prevOff, ok = kf.checkRelated(prevKf, nextKf, thisOff, prevOff) if !ok { return false, "" } basis[i+1] = prevOff prevKf = kf } return true, fmt.Sprintf("byte match at %v", basis) } go func() { for in := range incoming { // if we've got a positive result, drain any remaining strikes from the matchers if quitting { continue } // if the strike reports progress, check if we should be continuing to wait if in.idxa == -1 { // update with the latest offset if in.reverse { eof = in.offset } else { bof = in.offset } w := waitSet.WaitingOnAt(bof, eof) // if any of the waitlists are nil, we will continue - unless we are past the known bof and known eof (points at which we *should* have got at least partial matches), in which case we will check if any partial/potential matches are live if w == nil { // keep going if we don't have a maximum known bof, or if our current bof/eof are less than the maximum known bof/eof if b.knownBOF < 0 || int64(b.knownBOF) > bof || int64(b.knownEOF) > eof { continue } // if we don't have a waitlist, and we are past the known bof and known eof, grab all the partials and potentials to check if any are live w = all(hits) } // exhausted all contenders, we can stop scanning if !continueWaiting(w) { quit() } continue } // now cache or satisfy the strike var hasPotential bool potentials := filterKF(b.tests[in.idxa+in.idxb].keyFrames(), waitSet) for _, pot := range potentials { // if any of the signatures are single keyframe we can satisfy immediately and skip cache if len(b.keyFrames[pot[0]]) == 1 { hasPotential = true break } if hit, ok := hits[pot[0]]; ok && hit.potentiallyComplete(pot[1], strikes) { hasPotential = true break } } if !hasPotential { // cache the strike s, ok := strikes[in.idxa+in.idxb] if !ok { s = &strikeItem{in, -1, nil} strikes[in.idxa+in.idxb] = s } else { if s.successive == nil { s.successive = make([][2]int64, 0, 10) } s.successive = append(s.successive, [2]int64{in.offset, int64(in.length)}) } // range over the potentials, linking to the strike for _, pot := range potentials { if b.keyFrames[pot[0]][pot[1]].check(in.offset) { hit, ok := hits[pot[0]] if !ok { hit = newHit(pot[0]) } hit.potentialIdxs[pot[1]] = in.idxa + in.idxb + 1 } } goto end } // satisfy the strike for { ks := testStrike(in) for _, k := range ks { if match, basis := applyKeyFrame(k); match { if waitSet.Check(k.id[0]) { r <- result{k.id[0], basis} if waitSet.PutAt(k.id[0], bof, eof) { quit() goto end } } if h, ok := hits[k.id[0]]; ok { h.matched = true } } } potentials = filterKF(potentials, waitSet) var ok bool for _, pot := range potentials { in, ok = hits[pot[0]].nextPotential(strikes) if ok { break } } if !ok { break } } end: // keep looping until incoming is closed } close(r) }() return incoming }
func (m Matcher) Identify(na string, b *siegreader.Buffer, exclude ...int) (chan core.Result, error) { buf, err := b.Slice(0, 8) if err != nil || buf[0] != 'R' || buf[1] != 'I' || buf[2] != 'F' || buf[3] != 'F' { res := make(chan core.Result) close(res) return res, nil } rcc, rrdr, err := riff.NewReader(siegreader.ReaderFrom(b)) if err != nil { res := make(chan core.Result) close(res) return res, nil } // now make structures for testing uniqs := make(map[riff.FourCC]bool) res := make(chan core.Result) waitset := m.priorities.WaitSet(exclude...) // send and report if satisified send := func(cc riff.FourCC) bool { if config.Debug() { fmt.Fprintf(config.Out(), "riff match %s\n", string(cc[:])) } if uniqs[cc] { return false } uniqs[cc] = true for _, hit := range m.riffs[cc] { if waitset.Check(hit) { if config.Debug() { fmt.Fprintf(config.Out(), "sending riff match %s\n", string(cc[:])) } res <- result{hit, cc} if waitset.Put(hit) { return true } } } return false } // riff walk var descend func(*riff.Reader) bool descend = func(r *riff.Reader) bool { for { chunkID, chunkLen, chunkData, err := r.Next() if err != nil || send(chunkID) { return true } if chunkID == riff.LIST { listType, list, err := riff.NewListReader(chunkLen, chunkData) if err != nil || send(listType) { return true } if descend(list) { return true } } } } // go time go func() { if send(rcc) { close(res) return } descend(rrdr) close(res) }() return res, nil }