func updateSigs() (string, error) { url, _, _ := config.UpdateOptions() if url == "" { return "Update is not available for this distribution of siegfried", nil } response, err := getHttp(url) if err != nil { return "", err } var u Update if err := json.Unmarshal(response, &u); err != nil { return "", err } version := config.Version() if version[0] < u.Version[0] || (version[0] == u.Version[0] && version[1] < u.Version[1]) || // if the version is out of date u.Version == [3]int{0, 0, 0} || u.Created == "" || u.Size == 0 || u.Path == "" { // or if the unmarshalling hasn't worked and we have blank values return "Your version of siegfried is out of date; please install latest from http://www.itforarchivists.com/siegfried before continuing.", nil } s, err := siegfried.Load(config.Signature()) if err == nil { if !s.Update(u.Created) { return "You are already up to date!", nil } } else { // this hairy bit of golang exception handling is thanks to Ross! :) if _, err = os.Stat(config.Home()); err != nil { if os.IsNotExist(err) { err = os.MkdirAll(config.Home(), os.ModePerm) if err != nil { return "", fmt.Errorf("Siegfried: cannot create home directory %s, %v", config.Home(), err) } } else { return "", fmt.Errorf("Siegfried: error opening directory %s, %v", config.Home(), err) } } } fmt.Println("... downloading latest signature file ...") response, err = getHttp(u.Path) if err != nil { return "", fmt.Errorf("Siegfried: error retrieving %s.\nThis may be a network or firewall issue. See https://github.com/richardlehane/siegfried/wiki/Getting-started for manual instructions.\nSystem error: %v", config.SignatureBase(), err) } if len(response) != u.Size { return "", fmt.Errorf("Siegfried: error retrieving %s; expecting %d bytes, got %d bytes", config.SignatureBase(), u.Size, len(response)) } err = ioutil.WriteFile(config.Signature(), response, os.ModePerm) if err != nil { return "", fmt.Errorf("Siegfried: error writing to directory, %v", err) } fmt.Printf("... writing %s ...\n", config.Signature()) return "Your signature file has been updated", nil }
func blameSig(i int) error { if *inspectHome != config.Home() { config.SetHome(*inspectHome) } s, err := siegfried.Load(config.Signature()) if err != nil { return err } fmt.Println(s.Blame(i, *inspectCType, *inspectCName)) return nil }
func inspectSig(t core.MatcherType) error { if *inspectHome != config.Home() { config.SetHome(*inspectHome) } s, err := siegfried.Load(config.Signature()) if err != nil { return err } fmt.Print(s.Inspect(t)) return nil }
func setHarvestOptions() { if *harvestHome != config.Home() { config.SetHome(*harvestHome) } if *harvestDroid != config.Droid() { config.SetDroid(*harvestDroid)() } if *timeout != htimeout { config.SetHarvestTimeout(*timeout) } if *throttlef > 0 { config.SetHarvestThrottle(*throttlef) } }
func initSets() error { // load all json files in the sets directory and add them to a single map sets = make(map[string][]string) wf := func(path string, info os.FileInfo, err error) error { if err != nil { return errors.New("error walking sets directory, must have a 'sets' directory in siegfried home: " + err.Error()) } if info.IsDir() { return nil } switch filepath.Ext(path) { default: return nil // ignore non json files case ".json": } set := make(map[string][]string) byts, err := ioutil.ReadFile(path) if err != nil { return errors.New("error loading " + path + " " + err.Error()) } err = json.Unmarshal(byts, &set) if err != nil { return errors.New("error unmarshalling " + path + " " + err.Error()) } for k, v := range set { k = stripComment(k) v = stripComments(v) sort.Strings(v) m, ok := sets[k] if !ok { sets[k] = v } else { // if we already have this key, add any new items in its list to the existing list for _, w := range v { idx := sort.SearchStrings(m, w) if idx == len(m) || m[idx] != w { m = append(m, w) } } sort.Strings(m) sets[k] = m } } return nil } return filepath.Walk(filepath.Join(config.Home(), "sets"), wf) }
func main() { flag.Parse() /*//UNCOMMENT TO RUN PROFILER go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }()*/ // configure home and signature if not default if *home != config.Home() { config.SetHome(*home) } if *sig != config.SignatureBase() { config.SetSignature(*sig) } // handle -update if *update { msg, err := updateSigs() if err != nil { log.Fatalf("[FATAL] failed to update signature file, %v", err) } fmt.Println(msg) return } // handle -hash error hashT := getHash(*hashf) if *hashf != "" && hashT < 0 { log.Fatalf("[FATAL] invalid hash type; choose from %s", hashChoices) } // load and handle signature errors s, err := siegfried.Load(config.Signature()) if err != nil { log.Fatalf("[FATAL] error loading signature file, got: %v", err) } // handle -version if *version { version := config.Version() fmt.Printf("siegfried %d.%d.%d\n%s", version[0], version[1], version[2], s) return } // handle -fpr if *fprflag { log.Printf("FPR server started at %s. Use CTRL-C to quit.\n", config.Fpr()) serveFpr(config.Fpr(), s) return } // check -multi if *multi > maxMulti || *multi < 1 || (*archive && *multi > 1) { log.Println("[WARN] -multi must be > 0 and =< 1024. If -z, -multi must be 1. Resetting -multi to 1") *multi = 1 } // start logger lg, err := newLogger(*logf) if err != nil { log.Fatalln(err) } if config.Slow() || config.Debug() { if *serve != "" || *fprflag { log.Fatalln("[FATAL] debug and slow logging cannot be run in server mode") } } // start throttle if *throttlef != 0 { throttle = time.NewTicker(*throttlef) defer throttle.Stop() } // start the printer lenCtxts := *multi if lenCtxts == 1 { lenCtxts = 8 } ctxts := make(chan *context, lenCtxts) go printer(ctxts, lg) // set default writer var w writer switch { case *csvo: w = newCSV(os.Stdout) case *jsono: w = newJSON(os.Stdout) case *droido: w = newDroid(os.Stdout) if len(s.Fields()) != 1 || len(s.Fields()[0]) != 7 { close(ctxts) log.Fatalln("[FATAL] DROID output is limited to signature files with a single PRONOM identifier") } default: w = newYAML(os.Stdout) } // overrite writer with nil writer if logging is to stdout if lg != nil && lg.w == os.Stdout { w = logWriter{} } // setup default waitgroup wg := &sync.WaitGroup{} // setup context pool setCtxPool(s, w, wg, hashT, *archive) // handle -serve if *serve != "" { log.Printf("Starting server at %s. Use CTRL-C to quit.\n", *serve) listen(*serve, s, ctxts) return } // handle no file/directory argument if flag.NArg() != 1 { close(ctxts) log.Fatalln("[FATAL] expecting a single file or directory argument") } w.writeHead(s, hashT) // support reading list files from stdin if flag.Arg(0) == "-" { scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { info, err := os.Stat(scanner.Text()) if err != nil { info, err = retryStat(scanner.Text(), err) } if err != nil || info.IsDir() { ctx := getCtx(scanner.Text(), "", "", 0) ctx.res <- results{fmt.Errorf("failed to identify %s (in scanning mode, inputs must all be files and not directories), got: %v", scanner.Text(), err), nil, nil} ctx.wg.Add(1) ctxts <- ctx } else { identifyFile(getCtx(scanner.Text(), "", info.ModTime().Format(time.RFC3339), info.Size()), ctxts, getCtx) } } } else { err = identify(ctxts, flag.Arg(0), "", *nr, getCtx) } wg.Wait() close(ctxts) w.writeTail() // log time elapsed if !lg.start.IsZero() { fmt.Fprintf(lg.w, "%s %v\n", timeString, time.Since(lg.start)) } if err != nil { log.Fatal(err) } os.Exit(0) }
errString = "[ERROR]" warnString = "[WARN]" timeString = "[TIME]" ) // flags var ( update = flag.Bool("update", false, "update or install the default signature file") version = flag.Bool("version", false, "display version information") logf = flag.String("log", "error", "log errors, warnings, debug or slow output, knowns or unknowns to stderr or stdout e.g. -log error,warn,unknown,stdout") nr = flag.Bool("nr", false, "prevent automatic directory recursion") csvo = flag.Bool("csv", false, "CSV output format") jsono = flag.Bool("json", false, "JSON output format") droido = flag.Bool("droid", false, "DROID CSV output format") sig = flag.String("sig", config.SignatureBase(), "set the signature file") home = flag.String("home", config.Home(), "override the default home directory") serve = flag.String("serve", "", "start siegfried server e.g. -serve localhost:5138") multi = flag.Int("multi", 1, "set number of parallel file ID processes") archive = flag.Bool("z", false, "scan archive formats (zip, tar, gzip, warc, arc)") hashf = flag.String("hash", "", "calculate file checksum with hash algorithm; options "+hashChoices) throttlef = flag.Duration("throttle", 0, "set a time to wait between scanning files e.g. 50ms") ) var ( throttle *time.Ticker ctxPool *sync.Pool ) type WalkError struct { path string err error
func getOptions() []config.Option { if *home != config.Home() { config.SetHome(*home) } if *inspectHome != config.Home() { config.SetHome(*inspectHome) } opts := []config.Option{} // build options if *droid != config.Droid() { opts = append(opts, config.SetDroid(*droid)) } if *container != config.Container() { opts = append(opts, config.SetContainer(*container)) } if *mi != "" { opts = append(opts, config.SetMIMEInfo(*mi)) } if *fdd != "" { opts = append(opts, config.SetLOC(*fdd)) } if *locfdd { opts = append(opts, config.SetLOC("")) } if *nopronom { opts = append(opts, config.SetNoPRONOM()) } if *name != "" { opts = append(opts, config.SetName(*name)) } if *details != config.Details() { opts = append(opts, config.SetDetails(*details)) } if *extend != "" { opts = append(opts, config.SetExtend(expandSets(*extend))) } if *extendc != "" { if *extend == "" { fmt.Println( `roy: warning! Unless the container extension only extends formats defined in the DROID signature file you should also include a regular signature extension (-extend) that includes a FileFormatCollection element describing the new formats.`) } opts = append(opts, config.SetExtendC(expandSets(*extendc))) } if *include != "" { opts = append(opts, config.SetLimit(expandSets(*include))) } if *exclude != "" { opts = append(opts, config.SetExclude(expandSets(*exclude))) } if *bof != 0 { opts = append(opts, config.SetBOF(*bof)) } if *eof != 0 { opts = append(opts, config.SetEOF(*eof)) } if *noeof { opts = append(opts, config.SetNoEOF()) } if *multi != "" { opts = append(opts, config.SetMulti(strings.ToLower(*multi))) } if *nocontainer { opts = append(opts, config.SetNoContainer()) } if *notext { opts = append(opts, config.SetNoText()) } if *noname { opts = append(opts, config.SetNoName()) } if *nomime { opts = append(opts, config.SetNoMIME()) } if *noxml { opts = append(opts, config.SetNoXML()) } if *noriff { opts = append(opts, config.SetNoRIFF()) } if *noreports { opts = append(opts, config.SetNoReports()) } if *doubleup { opts = append(opts, config.SetDoubleUp()) } if *rng != config.Range() { opts = append(opts, config.SetRange(*rng)) } if *distance != config.Distance() { opts = append(opts, config.SetDistance(*distance)) } if *choices != config.Choices() { opts = append(opts, config.SetChoices(*choices)) } // inspect options if *inspectMI != "" { opts = append(opts, config.SetMIMEInfo(*inspectMI)) } if *inspectFDD != "" { opts = append(opts, config.SetLOC(*fdd)) } if *inspectLOC { opts = append(opts, config.SetLOC("")) } if *inspectInclude != "" { opts = append(opts, config.SetLimit(expandSets(*inspectInclude))) } if *inspectExclude != "" { opts = append(opts, config.SetExclude(expandSets(*inspectExclude))) } if *inspectExtend != "" { opts = append(opts, config.SetExtend(expandSets(*inspectExtend))) } if *inspectExtendc != "" { if *inspectExtend == "" { fmt.Println( `roy: warning! Unless the container extension only extends formats defined in the DROID signature file you should also include a regular signature extension (-extend) that includes a FileFormatCollection element describing the new formats.`) } opts = append(opts, config.SetExtendC(expandSets(*inspectExtendc))) } return opts }
E.g. roy inspect -limit @pdfa priorities -mi, -loc, -fdd Specify particular MIME-info or LOC FDD signature files for inspecting formats or viewing priorities. -reports Build from PRONOM reports files (rather than just using the DROID XML file as input). A bit slower but can be more accurate for a small set of formats like FLAC. -home Use a different siegfried home directory. ` var ( // BUILD, ADD flag sets build = flag.NewFlagSet("build | add", flag.ExitOnError) home = build.String("home", config.Home(), "override the default home directory") droid = build.String("droid", config.Droid(), "set name/path for DROID signature file") mi = build.String("mi", "", "set name/path for MIMEInfo signature file") fdd = build.String("fdd", "", "set name/path for LOC FDD signature file") locfdd = build.Bool("loc", false, "build a LOC FDD signature file") nopronom = build.Bool("nopronom", false, "don't include PRONOM sigs with LOC signature file") container = build.String("container", config.Container(), "set name/path for Droid Container signature file") name = build.String("name", "", "set identifier name") details = build.String("details", config.Details(), "set identifier details") extend = build.String("extend", "", "comma separated list of additional signatures") extendc = build.String("extendc", "", "comma separated list of additional container signatures") include = build.String("limit", "", "comma separated list of PRONOM signatures to include") exclude = build.String("exclude", "", "comma separated list of PRONOM signatures to exclude") bof = build.Int("bof", 0, "define a maximum BOF offset") eof = build.Int("eof", 0, "define a maximum EOF offset") noeof = build.Bool("noeof", false, "ignore EOF segments in signatures")