func updateSigs() (string, error) { url, _, _ := config.UpdateOptions() if url == "" { return "Update is not available for this distribution of Siegfried", nil } response, err := getHttp(url) if err != nil { return "", err } var u Update if err := json.Unmarshal(response, &u); err != nil { return "", err } version := config.Version() if version[0] < u.SfVersion[0] || (u.SfVersion[0] == version[0] && version[1] < u.SfVersion[1]) { return "Your version of Siegfried is out of date; please install latest from http://www.itforarchivists.com/siegfried before continuing.", nil } s, err := siegfried.Load(config.Signature()) if err == nil { if !s.Update(u.SigCreated) { return "You are already up to date!", nil } } else { // this hairy bit of golang exception handling is thanks to Ross! :) if _, err = os.Stat(config.Home()); err != nil { if os.IsNotExist(err) { err = os.MkdirAll(config.Home(), os.ModePerm) if err != nil { return "", fmt.Errorf("Siegfried: cannot create home directory %s, %v", config.Home(), err) } } else { return "", fmt.Errorf("Siegfried: error opening directory %s, %v", config.Home(), err) } } } fmt.Println("... downloading latest signature file ...") response, err = getHttp(u.LatestURL) if err != nil { return "", fmt.Errorf("Siegfried: error retrieving pronom.gob.\nThis may be a network or firewall issue. See https://github.com/richardlehane/siegfried/wiki/Getting-started for manual instructions.\nSystem error: %v", err) } if len(response) != u.GobSize { return "", fmt.Errorf("Siegfried: error retrieving pronom.gob; expecting %d bytes, got %d bytes", u.GobSize, len(response)) } err = ioutil.WriteFile(config.Signature(), response, os.ModePerm) if err != nil { return "", fmt.Errorf("Siegfried: error writing to directory, %v", err) } fmt.Printf("... writing %s ...\n", config.Signature()) return "Your signature file has been updated", nil }
func setInspectOptions() { if *inspectHome != config.Home() { config.SetHome(*inspectHome) } if *inspectReports != config.Reports() { config.SetReports(*inspectReports)() } }
func setHarvestOptions() { if *harvestHome != config.Home() { config.SetHome(*harvestHome) } if *harvestDroid != config.Droid() { config.SetDroid(*harvestDroid)() } if *harvestReports != config.Reports() { config.SetReports(*harvestReports)() } if *timeout != htimeout { config.SetHarvestTimeout(*timeout) } }
func initSets() error { // load all json files in the sets directory and add them to a single map sets = make(map[string][]string) wf := func(path string, info os.FileInfo, err error) error { if err != nil { return errors.New("error walking sets directory, must have a 'sets' directory in siegfried home: " + err.Error()) } if info.IsDir() { return nil } switch filepath.Ext(path) { default: return nil // ignore non json files case ".json": } set := make(map[string][]string) byts, err := ioutil.ReadFile(path) if err != nil { return errors.New("error loading " + path + " " + err.Error()) } err = json.Unmarshal(byts, &set) if err != nil { return errors.New("error unmarshalling " + path + " " + err.Error()) } for k, v := range set { k = stripComment(k) v = stripComments(v) sort.Strings(v) m, ok := sets[k] if !ok { sets[k] = v } else { // if we already have this key, add any new items in its list to the existing list for _, w := range v { idx := sort.SearchStrings(m, w) if idx == len(m) || m[idx] != w { m = append(m, w) } } sort.Strings(m) sets[k] = m } } return nil } return filepath.Walk(filepath.Join(config.Home(), "sets"), wf) }
func buildOptions() []config.Option { if *home != config.Home() { config.SetHome(*home) } opts := []config.Option{} if *droid != config.Droid() { opts = append(opts, config.SetDroid(*droid)) } if *container != config.Container() { opts = append(opts, config.SetContainer(*container)) } if *reports != config.Reports() { opts = append(opts, config.SetReports(*reports)) } if *name != config.Name() { opts = append(opts, config.SetName(*name)) } if *details != config.Details() { opts = append(opts, config.SetDetails(*details)) } if *extend != "" { opts = append(opts, config.SetExtend(expandSets(*extend))) } if *extendc != "" { if *extend == "" { fmt.Println( `roy: warning! Unless the container extension only extends formats defined in the DROID signature file you should also include a regular signature extension (-extend) that includes a FileFormatCollection element defining the new formats.`) } opts = append(opts, config.SetExtendC(expandSets(*extendc))) } if *include != "" { opts = append(opts, config.SetLimit(expandSets(*include))) } if *exclude != "" { opts = append(opts, config.SetExclude(expandSets(*exclude))) } if *bof != 0 { opts = append(opts, config.SetBOF(*bof)) } if *eof != 0 { opts = append(opts, config.SetEOF(*eof)) } if *noeof { opts = append(opts, config.SetNoEOF()) } if *nopriority { opts = append(opts, config.SetNoPriority()) } if *nocontainer { opts = append(opts, config.SetNoContainer()) } if *notext { opts = append(opts, config.SetNoText()) } if *noext { opts = append(opts, config.SetNoExt()) } if *noreports { opts = append(opts, config.SetNoReports()) } if *doubleup { opts = append(opts, config.SetDoubleUp()) } if *rng != config.Range() { opts = append(opts, config.SetRange(*rng)) } if *distance != config.Distance() { opts = append(opts, config.SetDistance(*distance)) } if *choices != config.Choices() { opts = append(opts, config.SetChoices(*choices)) } return opts }
"fmt" "log" "os" "path/filepath" "strconv" "strings" "github.com/richardlehane/siegfried" "github.com/richardlehane/siegfried/config" "github.com/richardlehane/siegfried/pkg/pronom" ) var ( // BUILD, ADD flag sets build = flag.NewFlagSet("build | add", flag.ExitOnError) home = build.String("home", config.Home(), "override the default home directory") droid = build.String("droid", config.Droid(), "set name/path for DROID signature file") container = build.String("container", config.Container(), "set name/path for Droid Container signature file") reports = build.String("reports", config.Reports(), "set path for PRONOM reports directory") name = build.String("name", config.Name(), "set identifier name") details = build.String("details", config.Details(), "set identifier details") extend = build.String("extend", "", "comma separated list of additional signatures") extendc = build.String("extendc", "", "comma separated list of additional container signatures") include = build.String("limit", "", "comma separated list of PRONOM signatures to include") exclude = build.String("exclude", "", "comma separated list of PRONOM signatures to exclude") bof = build.Int("bof", 0, "define a maximum BOF offset") eof = build.Int("eof", 0, "define a maximum EOF offset") noeof = build.Bool("noeof", false, "ignore EOF segments in signatures") nopriority = build.Bool("nopriority", false, "ignore priority rules when recording results") nocontainer = build.Bool("nocontainer", false, "skip container signatures") notext = build.Bool("notext", false, "skip text matcher")
func main() { flag.Parse() /*//UNCOMMENT TO RUN PROFILER go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }()*/ if *home != config.Home() { config.SetHome(*home) } if *sig != config.SignatureBase() { config.SetSignature(*sig) } if *version { version := config.Version() fmt.Printf("siegfried version: %d.%d.%d\n", version[0], version[1], version[2]) return } if *update { msg, err := updateSigs() if err != nil { log.Fatalf("Error: failed to update signature file, %v", err) } fmt.Println(msg) return } // during parallel scanning or in server mode, unsafe to access the last read buffer - so can't unzip or hash if *multi > 1 || *serve != "" { if *archive { log.Fatalln("Error: cannot scan archive formats when running in parallel mode") } if *hashf != "" { log.Fatalln("Error: cannot calculate file checksum when running in parallel mode") } } if err := setHash(); err != nil { log.Fatal(err) } if *serve != "" || *fprflag { s, err := siegfried.Load(config.Signature()) if err != nil { log.Fatalf("Error: error loading signature file, got: %v", err) } if *serve != "" { log.Printf("Starting server at %s. Use CTRL-C to quit.\n", *serve) listen(*serve, s) return } log.Printf("FPR server started at %s. Use CTRL-C to quit.\n", config.Fpr()) serveFpr(config.Fpr(), s) return } if flag.NArg() != 1 { log.Fatalln("Error: expecting a single file or directory argument") } s, err := siegfried.Load(config.Signature()) if err != nil { log.Fatalf("Error: error loading signature file, got: %v", err) } var w writer switch { case *debug: config.SetDebug() w = debugWriter{} case *slow: config.SetSlow() w = &slowWriter{os.Stdout} case *csvo: w = newCSV(os.Stdout) case *jsono: w = newJSON(os.Stdout) case *droido: w = newDroid(os.Stdout) case *knowno: w = &knownWriter{true, os.Stdout} case *unknowno: w = &knownWriter{false, os.Stdout} default: w = newYAML(os.Stdout) } // support reading list files from stdin if flag.Arg(0) == "-" { w.writeHead(s) scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { info, err := os.Stat(scanner.Text()) if err != nil || info.IsDir() { w.writeFile(scanner.Text(), 0, "", nil, fmt.Errorf("failed to identify %s (in scanning mode, inputs must all be files and not directories), got: %v", scanner.Text(), err), nil) } else { identifyFile(w, s, scanner.Text(), info.Size(), info.ModTime().Format(time.RFC3339)) } } w.writeTail() os.Exit(0) } info, err := os.Stat(flag.Arg(0)) if err != nil { log.Fatalf("Error: error getting info for %v, got: %v", flag.Arg(0), err) } if info.IsDir() { if config.Debug() { log.Fatalln("Error: when scanning in debug mode, give a file rather than a directory argument") } w.writeHead(s) if *multi > 16 { *multi = 16 } if *multi > 1 { multiIdentifyP(w, s, flag.Arg(0), *nr) } else { multiIdentifyS(w, s, flag.Arg(0), *nr) } w.writeTail() os.Exit(0) } w.writeHead(s) identifyFile(w, s, flag.Arg(0), info.Size(), info.ModTime().Format(time.RFC3339)) w.writeTail() os.Exit(0) }
const PROCS = -1 // flags var ( update = flag.Bool("update", false, "update or install the default signature file") version = flag.Bool("version", false, "display version information") debug = flag.Bool("debug", false, "scan in debug mode") slow = flag.Bool("slow", false, "scan and report slow signatures") nr = flag.Bool("nr", false, "prevent automatic directory recursion") csvo = flag.Bool("csv", false, "CSV output format") jsono = flag.Bool("json", false, "JSON output format") droido = flag.Bool("droid", false, "DROID CSV output format") knowno = flag.Bool("known", false, "Output list of known files") unknowno = flag.Bool("unknown", false, "Output list of unknown files") sig = flag.String("sig", config.SignatureBase(), "set the signature file") home = flag.String("home", config.Home(), "override the default home directory") serve = flag.String("serve", "", "start siegfried server e.g. -serve localhost:5138") multi = flag.Int("multi", 1, "set number of file ID processes") archive = flag.Bool("z", false, "scan archive formats (zip, tar, gzip)") hashf = flag.String("hash", "", "calculate file checksum with hash algorithm; options "+hashChoices) ) type res struct { path string sz int64 mod string c iterableID err error } func printer(w writer, resc chan chan res, wg *sync.WaitGroup) {
func buildOptions() []config.Option { if *home != config.Home() { config.SetHome(*home) } opts := []config.Option{} if *droid != config.Droid() { opts = append(opts, config.SetDroid(*droid)) } if *container != config.Container() { opts = append(opts, config.SetContainer(*container)) } if *reports != config.Reports() { opts = append(opts, config.SetReports(*reports)) } if *name != config.Name() { opts = append(opts, config.SetName(*name)) } if *details != config.Details() { opts = append(opts, config.SetDetails(*details)) } if *extend != "" { opts = append(opts, config.SetExtend(expandSets(*extend))) } if *extendc != "" { opts = append(opts, config.SetExtendC(expandSets(*extendc))) } if *include != "" { opts = append(opts, config.SetLimit(expandSets(*include))) } if *exclude != "" { opts = append(opts, config.SetExclude(expandSets(*exclude))) } if *bof != 0 { opts = append(opts, config.SetBOF(*bof)) } if *eof != 0 { opts = append(opts, config.SetEOF(*eof)) } if *noeof { opts = append(opts, config.SetNoEOF()) } if *nopriority { opts = append(opts, config.SetNoPriority()) } if *nocontainer { opts = append(opts, config.SetNoContainer()) } if *notext { opts = append(opts, config.SetNoText()) } if *noreports { opts = append(opts, config.SetNoReports()) } if *rng != config.Range() { opts = append(opts, config.SetRange(*rng)) } if *distance != config.Distance() { opts = append(opts, config.SetDistance(*distance)) } if *choices != config.Choices() { opts = append(opts, config.SetChoices(*choices)) } return opts }
func main() { flag.Parse() /*//UNCOMMENT TO RUN PROFILER go func() { log.Println(http.ListenAndServe("localhost:6060", nil)) }()*/ if *version { version := config.Version() fmt.Printf("siegfried %d.%d.%d\n", version[0], version[1], version[2]) s, err := siegfried.Load(config.Signature()) if err != nil { fmt.Println(err) return } fmt.Print(s) return } if *home != config.Home() { config.SetHome(*home) } if *sig != config.SignatureBase() { config.SetSignature(*sig) } if *update { msg, err := updateSigs() if err != nil { log.Fatalf("[FATAL] failed to update signature file, %v", err) } fmt.Println(msg) return } // during parallel scanning or in server mode, unsafe to access the last read buffer - so can't unzip or hash if *multi > 1 || *serve != "" { if *archive { log.Fatalln("[FATAL] cannot scan archive formats when running in parallel or server mode") } if *hashf != "" { log.Fatalln("[FATAL] cannot calculate file checksum when running in parallel or server mode") } } if *logf != "" { if *multi > 1 && *logf != "error" { log.Fatalln("[FATAL] cannot log in parallel mode") } if err := newLogger(*logf); err != nil { log.Fatalln(err) } } if err := setHash(); err != nil { log.Fatal(err) } if *serve != "" || *fprflag { s, err := siegfried.Load(config.Signature()) if err != nil { log.Fatalf("[FATAL] error loading signature file, got: %v", err) } if *serve != "" { log.Printf("Starting server at %s. Use CTRL-C to quit.\n", *serve) listen(*serve, s) return } log.Printf("FPR server started at %s. Use CTRL-C to quit.\n", config.Fpr()) serveFpr(config.Fpr(), s) return } if flag.NArg() != 1 { log.Fatalln("[FATAL] expecting a single file or directory argument") } s, err := siegfried.Load(config.Signature()) if err != nil { log.Fatalf("[FATAL] error loading signature file, got: %v", err) } var w writer switch { case *csvo: w = newCSV(os.Stdout) case *jsono: w = newJSON(os.Stdout) case *droido: w = newDroid(os.Stdout) default: w = newYAML(os.Stdout) } if lg != nil && lg.w == os.Stdout { w = logWriter{} } // support reading list files from stdin if flag.Arg(0) == "-" { w.writeHead(s) scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { info, err := os.Stat(scanner.Text()) if err != nil { info, err = retryStat(scanner.Text(), err) } if err != nil || info.IsDir() { writeError(w, scanner.Text(), 0, "", fmt.Errorf("failed to identify %s (in scanning mode, inputs must all be files and not directories), got: %v", scanner.Text(), err)) } else { identifyFile(w, s, scanner.Text(), info.Size(), info.ModTime().Format(time.RFC3339)) } } w.writeTail() lg.printElapsed() os.Exit(0) } info, err := os.Stat(flag.Arg(0)) if err != nil { info, err = retryStat(flag.Arg(0), err) if err != nil { log.Fatalf("[FATAL] cannot get info for %v, got: %v", flag.Arg(0), err) } } if info.IsDir() { w.writeHead(s) if *multi > 16 { *multi = 16 } if *multi > 1 { err = multiIdentifyP(w, s, flag.Arg(0), *nr) } else { if *throttlef != 0 { throttle = time.NewTicker(*throttlef) defer throttle.Stop() } err = multiIdentifyS(w, s, flag.Arg(0), "", *nr) } w.writeTail() if err != nil { log.Fatalf("[FATAL] %v\n", err) } lg.printElapsed() os.Exit(0) } w.writeHead(s) identifyFile(w, s, flag.Arg(0), info.Size(), info.ModTime().Format(time.RFC3339)) w.writeTail() lg.printElapsed() os.Exit(0) }