func newLogger(opts string) error { l := &logger{w: os.Stderr} for _, o := range strings.Split(opts, ",") { switch o { case "stderr": case "stdout", "out", "o": l.w = os.Stdout case "progress", "p": l.progress = true case "time", "t": l.start = time.Now() case "error", "err", "e": l.e = true case "warning", "warn", "w": l.warn = true case "debug", "d": l.debug, l.progress = true, true config.SetDebug() case "slow", "s": l.slow, l.progress = true, true config.SetSlow() case "unknown", "u": l.unknown = true case "known", "k": l.known = true default: return fmt.Errorf("unknown -log input %s; expect be comma-separated list of stdout,out,o,progress,p,error,err,e,warning,warn,w,debug,d,slow,s,unknown,u,known,k", opts) } } if config.Debug() || config.Slow() { config.SetOut(l.w) } lg = l return nil }
func (c *ContainerMatcher) identify(n string, rdr Reader, res chan core.Result) { // safe to call on a nil matcher (i.e. container matching switched off) if c == nil { close(res) return } id := c.newIdentifier(len(c.parts)) var err error for err = rdr.Next(); err == nil; err = rdr.Next() { ct, ok := c.nameCTest[rdr.Name()] if !ok { continue } if config.Debug() { fmt.Printf("{Name match - %s (container %d))}\n", rdr.Name(), c.conType) } // name has matched, let's test the CTests // ct.identify will generate a slice of hits which pass to // processHits which will return true if we can stop if c.processHits(ct.identify(c, id, rdr, rdr.Name()), id, ct, rdr.Name(), res) { break } } // send a default hit if no result and extension matches if c.extension != "" && !id.result && filepath.Ext(n) == "."+c.extension { res <- defaultHit(-1 - int(c.conType)) } close(res) }
// Identify identifies a stream or file object. // It takes the name of the file/stream (if unknown, give an empty string) and an io.Reader // It returns a channel of identifications and an error func (s *Siegfried) Identify(n string, r io.Reader) (chan core.Identification, error) { buffer, err := s.buffers.Get(r) if err != nil && err != io.EOF { return nil, fmt.Errorf("siegfried: error reading file; got %v", err) } res := make(chan core.Identification) recs := make([]core.Recorder, len(s.ids)) for i, v := range s.ids { recs[i] = v.Recorder() } // Extension Matcher if len(n) > 0 { ems, _ := s.em.Identify(n, nil) // we don't care about an error here for v := range ems { for _, rec := range recs { if rec.Record(core.ExtensionMatcher, v) { break } } } } // Container Matcher if s.cm != nil { if config.Debug() { fmt.Println(">>START CONTAINER MATCHER") } cms, cerr := s.cm.Identify(n, buffer) for v := range cms { for _, rec := range recs { if rec.Record(core.ContainerMatcher, v) { break } } } err = cerr } satisfied := true for _, rec := range recs { if !rec.Satisfied(core.ByteMatcher) { satisfied = false } } // Byte Matcher if !satisfied { if config.Debug() { fmt.Println(">>START BYTE MATCHER") } ids, _ := s.bm.Identify("", buffer) // we don't care about an error here for v := range ids { for _, rec := range recs { if rec.Record(core.ByteMatcher, v) { break } } } } satisfied = true for _, rec := range recs { if !rec.Satisfied(core.TextMatcher) { satisfied = false break } } // Text Matcher if !satisfied { ids, _ := s.tm.Identify("", buffer) // we don't care about an error here for v := range ids { for _, rec := range recs { if rec.Record(core.TextMatcher, v) { break } } } } s.buffers.Put(buffer) go func() { for _, rec := range recs { rec.Report(res) } close(res) }() return res, err }
func main() { flag.Parse() /*//UNCOMMENT TO RUN PROFILER go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }()*/ if *home != config.Home() { config.SetHome(*home) } if *sig != config.SignatureBase() { config.SetSignature(*sig) } if *version { version := config.Version() fmt.Printf("siegfried version: %d.%d.%d\n", version[0], version[1], version[2]) return } if *update { msg, err := updateSigs() if err != nil { log.Fatalf("Error: failed to update signature file, %v", err) } fmt.Println(msg) return } // during parallel scanning or in server mode, unsafe to access the last read buffer - so can't unzip or hash if *multi > 1 || *serve != "" { if *archive { log.Fatalln("Error: cannot scan archive formats when running in parallel mode") } if *hashf != "" { log.Fatalln("Error: cannot calculate file checksum when running in parallel mode") } } if err := setHash(); err != nil { log.Fatal(err) } if *serve != "" || *fprflag { s, err := siegfried.Load(config.Signature()) if err != nil { log.Fatalf("Error: error loading signature file, got: %v", err) } if *serve != "" { log.Printf("Starting server at %s. Use CTRL-C to quit.\n", *serve) listen(*serve, s) return } log.Printf("FPR server started at %s. Use CTRL-C to quit.\n", config.Fpr()) serveFpr(config.Fpr(), s) return } if flag.NArg() != 1 { log.Fatalln("Error: expecting a single file or directory argument") } s, err := siegfried.Load(config.Signature()) if err != nil { log.Fatalf("Error: error loading signature file, got: %v", err) } var w writer switch { case *debug: config.SetDebug() w = debugWriter{} case *slow: config.SetSlow() w = &slowWriter{os.Stdout} case *csvo: w = newCSV(os.Stdout) case *jsono: w = newJSON(os.Stdout) case *droido: w = newDroid(os.Stdout) case *knowno: w = &knownWriter{true, os.Stdout} case *unknowno: w = &knownWriter{false, os.Stdout} default: w = newYAML(os.Stdout) } // support reading list files from stdin if flag.Arg(0) == "-" { w.writeHead(s) scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { info, err := os.Stat(scanner.Text()) if err != nil || info.IsDir() { w.writeFile(scanner.Text(), 0, "", nil, fmt.Errorf("failed to identify %s (in scanning mode, inputs must all be files and not directories), got: %v", scanner.Text(), err), nil) } else { identifyFile(w, s, scanner.Text(), info.Size(), info.ModTime().Format(time.RFC3339)) } } w.writeTail() os.Exit(0) } info, err := os.Stat(flag.Arg(0)) if err != nil { log.Fatalf("Error: error getting info for %v, got: %v", flag.Arg(0), err) } if info.IsDir() { if config.Debug() { log.Fatalln("Error: when scanning in debug mode, give a file rather than a directory argument") } w.writeHead(s) if *multi > 16 { *multi = 16 } if *multi > 1 { multiIdentifyP(w, s, flag.Arg(0), *nr) } else { multiIdentifyS(w, s, flag.Arg(0), *nr) } w.writeTail() os.Exit(0) } w.writeHead(s) identifyFile(w, s, flag.Arg(0), info.Size(), info.ModTime().Format(time.RFC3339)) w.writeTail() os.Exit(0) }
// Identify function - brings a new matcher into existence func (b *Matcher) identify(buf siegreader.Buffer, quit chan struct{}, r chan core.Result) { buf.SetQuit(quit) incoming := b.newScorer(buf, quit, r) rdr := siegreader.LimitReaderFrom(buf, b.maxBOF) // First test BOF frameset bfchan := b.bofFrames.index(buf, false, quit) for bf := range bfchan { if config.Debug() { fmt.Println(strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true, true}) } incoming <- strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true, true} } select { case <-quit: // the matcher has called quit close(incoming) return default: } // Do an initial check of BOF sequences b.start(true) // start bof matcher if not yet started var bchan chan wac.Result bchan = b.bAho.Index(rdr) for br := range bchan { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) if br.Offset > 2048 { break } } else { if config.Debug() { fmt.Println(strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false, br.Final}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false, br.Final} } } select { case <-quit: // the matcher has called quit for _ = range bchan { } // drain first close(incoming) return default: } // Check EOF frame tests efchan := b.eofFrames.index(buf, true, quit) for ef := range efchan { if config.Debug() { fmt.Println(strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true, true}) } incoming <- strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true, true} } // Setup EOF sequences test b.start(false) rrdr := siegreader.LimitReverseReaderFrom(buf, b.maxEOF) echan := b.eAho.Index(rrdr) // Now enter main search loop for { select { case br, ok := <-bchan: if !ok { bchan = nil } else { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) } else { if config.Debug() { fmt.Println(strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false, br.Final}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false, br.Final} } } case er, ok := <-echan: if !ok { echan = nil } else { if er.Index[0] == -1 { incoming <- progressStrike(er.Offset, true) } else { if config.Debug() { fmt.Println(strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false, er.Final}) } incoming <- strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false, er.Final} } } } if bchan == nil && echan == nil { close(incoming) return } } }
// identify function - brings a new matcher into existence func (b *Matcher) identify(buf *siegreader.Buffer, quit chan struct{}, r chan core.Result) { buf.Quit = quit incoming := b.scorer(buf, quit, r) rdr := siegreader.LimitReaderFrom(buf, b.maxBOF) // First test BOF frameset bfchan := b.bofFrames.index(buf, false, quit) for bf := range bfchan { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true}) } incoming <- strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true} } select { case <-quit: // the matcher has called quit for _ = range bfchan { } // drain first close(incoming) return default: } // Do an initial check of BOF sequences b.start(true) // start bof matcher if not yet started var bchan chan wac.Result bchan = b.bAho.Index(rdr) for br := range bchan { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) if br.Offset > 131072 && (b.maxBOF < 0 || b.maxBOF > b.maxEOF*5) { // del buf.Stream break } } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } select { case <-quit: // the matcher has called quit for _ = range bchan { } // drain first close(incoming) return default: } // Setup EOF tests efchan := b.eofFrames.index(buf, true, quit) b.start(false) rrdr := siegreader.LimitReverseReaderFrom(buf, b.maxEOF) echan := b.eAho.Index(rrdr) // if we have a maximum value on EOF do a sequential search if b.maxEOF >= 0 { if b.maxEOF != 0 { _, _ = buf.CanSeek(0, true) // force a full read to enable EOF scan to proceed for streams } for ef := range efchan { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true}) } incoming <- strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true} } // Scan complete EOF for er := range echan { if er.Index[0] == -1 { incoming <- progressStrike(er.Offset, true) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false}) } incoming <- strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false} } } // send a final progress strike with the maximum EOF incoming <- progressStrike(int64(b.maxEOF), true) // Finally, finish BOF scan for br := range bchan { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } close(incoming) return } // If no maximum on EOF do a parallel search for { select { case br, ok := <-bchan: if !ok { if b.maxBOF < 0 && b.maxEOF != 0 { _, _ = buf.CanSeek(0, true) // if we've a limit BOF reader, force a full read to enable EOF scan to proceed for streams } bchan = nil } else { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } case ef, ok := <-efchan: if !ok { efchan = nil } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true}) } incoming <- strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true} } case er, ok := <-echan: if !ok { echan = nil } else { if er.Index[0] == -1 { incoming <- progressStrike(er.Offset, true) } else { if config.Debug() { fmt.Fprintln(config.Out(), strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false}) } incoming <- strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false} } } } if bchan == nil && efchan == nil && echan == nil { close(incoming) return } } }
// Identify identifies a stream or file object. // It takes the name of the file/stream (if unknown, give an empty string) and an io.Reader // It returns a channel of identifications and an error. func (s *Siegfried) Identify(r io.Reader, name, mime string) (chan core.Identification, error) { buffer, err := s.buffers.Get(r) if err == io.EOF { err = nil } if err != nil && err != siegreader.ErrEmpty { return nil, fmt.Errorf("siegfried: error reading file; got %v", err) } res := make(chan core.Identification) recs := make([]core.Recorder, len(s.ids)) for i, v := range s.ids { recs[i] = v.Recorder() if name != "" { recs[i].Active(core.ExtensionMatcher) } if mime != "" { recs[i].Active(core.MIMEMatcher) } if err == nil { //recs[i].Active(core.ContainerMatcher) //recs[i].Active(core.ByteMatcher) recs[i].Active(core.TextMatcher) } } // Extension Matcher if len(name) > 0 { ext := stringmatcher.NormaliseExt(name) ems, _ := s.em.Identify(ext, nil) // we don't care about an error here for v := range ems { for _, rec := range recs { if rec.Record(core.ExtensionMatcher, stringmatcher.ExtResult{v}) { break } } } } // MIME Matcher if len(mime) > 0 { mime = stringmatcher.NormaliseMIME(mime) mms, _ := s.mm.Identify(mime, nil) // we don't care about an error here for v := range mms { for _, rec := range recs { if rec.Record(core.MIMEMatcher, stringmatcher.MIMEResult{v}) { break } } } } // Container Matcher if s.cm != nil { if config.Debug() { fmt.Fprintln(config.Out(), ">>START CONTAINER MATCHER") } cms, cerr := s.cm.Identify(name, buffer) for v := range cms { for _, rec := range recs { if rec.Record(core.ContainerMatcher, v) { break } } } if err == nil { err = cerr } } satisfied := true for _, rec := range recs { if !rec.Satisfied(core.ByteMatcher) { satisfied = false } } // Byte Matcher if !satisfied { if config.Debug() { fmt.Fprintln(config.Out(), ">>START BYTE MATCHER") } ids, _ := s.bm.Identify("", buffer) // we don't care about an error here for v := range ids { for _, rec := range recs { if rec.Record(core.ByteMatcher, v) { break } } } } satisfied = true for _, rec := range recs { if !rec.Satisfied(core.TextMatcher) { satisfied = false break } } // Text Matcher if !satisfied { ids, _ := s.tm.Identify("", buffer) // we don't care about an error here for v := range ids { for _, rec := range recs { if rec.Record(core.TextMatcher, v) { break } } } } s.buffers.Put(buffer) go func() { for _, rec := range recs { rec.Report(res) } close(res) }() return res, err }
// Identify function - brings a new matcher into existence func (b *Matcher) identify(buf siegreader.Buffer, quit chan struct{}, r chan core.Result) { buf.SetQuit(quit) incoming := b.scorer(buf, quit, r) rdr := siegreader.LimitReaderFrom(buf, b.maxBOF) // First test BOF frameset bfchan := b.bofFrames.index(buf, false, quit) for bf := range bfchan { if config.Debug() { fmt.Println(strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true}) } incoming <- strike{b.bofFrames.testTreeIndex[bf.idx], 0, bf.off, bf.length, false, true} } select { case <-quit: // the matcher has called quit for _ = range bfchan { } // drain first close(incoming) return default: } // Do an initial check of BOF sequences b.start(true) // start bof matcher if not yet started var bchan chan wac.Result bchan = b.bAho.Index(rdr) for br := range bchan { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) if !buf.Stream() && br.Offset > 131072 && (b.maxBOF < 0 || b.maxBOF > b.maxEOF*5) { break } } else { if config.Debug() { fmt.Println(strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } select { case <-quit: // the matcher has called quit for _ = range bchan { } // drain first close(incoming) return default: } // Setup EOF tests efchan := b.eofFrames.index(buf, true, quit) b.start(false) rrdr := siegreader.LimitReverseReaderFrom(buf, b.maxEOF) echan := b.eAho.Index(rrdr) // if we have a maximum value on EOF do a sequential search if b.maxEOF >= 0 { for ef := range efchan { if config.Debug() { fmt.Println(strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true}) } incoming <- strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true} } // Scan complete EOF for er := range echan { if er.Index[0] == -1 { incoming <- progressStrike(er.Offset, true) } else { if config.Debug() { fmt.Println(strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false}) } incoming <- strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false} } } // let the scorer known we have reached the end of the EOF scan incoming <- progressStrike(-1, true) // Finally, finish BOF scan for br := range bchan { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) } else { if config.Debug() { fmt.Println(strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } close(incoming) return } // If no maximum on EOF do a parallel search for { select { case br, ok := <-bchan: if !ok { bchan = nil } else { if br.Index[0] == -1 { incoming <- progressStrike(br.Offset, false) } else { if config.Debug() { fmt.Println(strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false}) } incoming <- strike{b.bofSeq.testTreeIndex[br.Index[0]], br.Index[1], br.Offset, br.Length, false, false} } } case ef, ok := <-efchan: if !ok { efchan = nil } else { if config.Debug() { fmt.Println(strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true}) } incoming <- strike{b.eofFrames.testTreeIndex[ef.idx], 0, ef.off, ef.length, true, true} } case er, ok := <-echan: if !ok { echan = nil } else { if er.Index[0] == -1 { incoming <- progressStrike(er.Offset, true) } else { if config.Debug() { fmt.Println(strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false}) } incoming <- strike{b.eofSeq.testTreeIndex[er.Index[0]], er.Index[1], er.Offset, er.Length, true, false} } } } if bchan == nil && efchan == nil && echan == nil { close(incoming) return } } }