Esempio n. 1
0
func updateSigs() (string, error) {
	url, _, _ := config.UpdateOptions()
	if url == "" {
		return "Update is not available for this distribution of Siegfried", nil
	}
	response, err := getHttp(url)
	if err != nil {
		return "", err
	}
	var u Update
	if err := json.Unmarshal(response, &u); err != nil {
		return "", err
	}
	version := config.Version()
	if version[0] < u.SfVersion[0] || (u.SfVersion[0] == version[0] && version[1] < u.SfVersion[1]) {
		return "Your version of Siegfried is out of date; please install latest from http://www.itforarchivists.com/siegfried before continuing.", nil
	}
	s, err := siegfried.Load(config.Signature())
	if err == nil {
		if !s.Update(u.SigCreated) {
			return "You are already up to date!", nil
		}
	} else {
		// this hairy bit of golang exception handling is thanks to Ross! :)
		if _, err = os.Stat(config.Home()); err != nil {
			if os.IsNotExist(err) {
				err = os.MkdirAll(config.Home(), os.ModePerm)
				if err != nil {
					return "", fmt.Errorf("Siegfried: cannot create home directory %s, %v", config.Home(), err)
				}
			} else {
				return "", fmt.Errorf("Siegfried: error opening directory %s, %v", config.Home(), err)
			}
		}
	}
	fmt.Println("... downloading latest signature file ...")
	response, err = getHttp(u.LatestURL)
	if err != nil {
		return "", fmt.Errorf("Siegfried: error retrieving pronom.gob.\nThis may be a network or firewall issue. See https://github.com/richardlehane/siegfried/wiki/Getting-started for manual instructions.\nSystem error: %v", err)
	}
	if len(response) != u.GobSize {
		return "", fmt.Errorf("Siegfried: error retrieving pronom.gob; expecting %d bytes, got %d bytes", u.GobSize, len(response))
	}
	err = ioutil.WriteFile(config.Signature(), response, os.ModePerm)
	if err != nil {
		return "", fmt.Errorf("Siegfried: error writing to directory, %v", err)
	}
	fmt.Printf("... writing %s ...\n", config.Signature())
	return "Your signature file has been updated", nil
}
Esempio n. 2
0
func setInspectOptions() {
	if *inspectHome != config.Home() {
		config.SetHome(*inspectHome)
	}
	if *inspectReports != config.Reports() {
		config.SetReports(*inspectReports)()
	}
}
Esempio n. 3
0
func setHarvestOptions() {
	if *harvestHome != config.Home() {
		config.SetHome(*harvestHome)
	}
	if *harvestDroid != config.Droid() {
		config.SetDroid(*harvestDroid)()
	}
	if *harvestReports != config.Reports() {
		config.SetReports(*harvestReports)()
	}
	if *timeout != htimeout {
		config.SetHarvestTimeout(*timeout)
	}
}
Esempio n. 4
0
func initSets() error {
	//  load all json files in the sets directory and add them to a single map
	sets = make(map[string][]string)
	wf := func(path string, info os.FileInfo, err error) error {
		if err != nil {
			return errors.New("error walking sets directory, must have a 'sets' directory in siegfried home: " + err.Error())
		}
		if info.IsDir() {
			return nil
		}
		switch filepath.Ext(path) {
		default:
			return nil // ignore non json files
		case ".json":
		}
		set := make(map[string][]string)
		byts, err := ioutil.ReadFile(path)
		if err != nil {
			return errors.New("error loading " + path + " " + err.Error())
		}
		err = json.Unmarshal(byts, &set)
		if err != nil {
			return errors.New("error unmarshalling " + path + " " + err.Error())
		}
		for k, v := range set {
			k = stripComment(k)
			v = stripComments(v)
			sort.Strings(v)
			m, ok := sets[k]
			if !ok {
				sets[k] = v
			} else {
				// if we already have this key, add any new items in its list to the existing list
				for _, w := range v {
					idx := sort.SearchStrings(m, w)
					if idx == len(m) || m[idx] != w {
						m = append(m, w)
					}
				}
				sort.Strings(m)
				sets[k] = m
			}
		}
		return nil
	}
	return filepath.Walk(filepath.Join(config.Home(), "sets"), wf)
}
Esempio n. 5
0
func buildOptions() []config.Option {
	if *home != config.Home() {
		config.SetHome(*home)
	}
	opts := []config.Option{}
	if *droid != config.Droid() {
		opts = append(opts, config.SetDroid(*droid))
	}
	if *container != config.Container() {
		opts = append(opts, config.SetContainer(*container))
	}
	if *reports != config.Reports() {
		opts = append(opts, config.SetReports(*reports))
	}
	if *name != config.Name() {
		opts = append(opts, config.SetName(*name))
	}
	if *details != config.Details() {
		opts = append(opts, config.SetDetails(*details))
	}
	if *extend != "" {
		opts = append(opts, config.SetExtend(expandSets(*extend)))
	}
	if *extendc != "" {
		if *extend == "" {
			fmt.Println(
				`roy: warning! Unless the container extension only extends formats defined in 
the DROID signature file you should also include a regular signature extension 
(-extend) that includes a FileFormatCollection element defining the new formats.`)
		}
		opts = append(opts, config.SetExtendC(expandSets(*extendc)))
	}
	if *include != "" {
		opts = append(opts, config.SetLimit(expandSets(*include)))
	}
	if *exclude != "" {
		opts = append(opts, config.SetExclude(expandSets(*exclude)))
	}
	if *bof != 0 {
		opts = append(opts, config.SetBOF(*bof))
	}
	if *eof != 0 {
		opts = append(opts, config.SetEOF(*eof))
	}
	if *noeof {
		opts = append(opts, config.SetNoEOF())
	}
	if *nopriority {
		opts = append(opts, config.SetNoPriority())
	}
	if *nocontainer {
		opts = append(opts, config.SetNoContainer())
	}
	if *notext {
		opts = append(opts, config.SetNoText())
	}
	if *noext {
		opts = append(opts, config.SetNoExt())
	}
	if *noreports {
		opts = append(opts, config.SetNoReports())
	}
	if *doubleup {
		opts = append(opts, config.SetDoubleUp())
	}
	if *rng != config.Range() {
		opts = append(opts, config.SetRange(*rng))
	}
	if *distance != config.Distance() {
		opts = append(opts, config.SetDistance(*distance))
	}
	if *choices != config.Choices() {
		opts = append(opts, config.SetChoices(*choices))
	}
	return opts
}
Esempio n. 6
0
	"fmt"
	"log"
	"os"
	"path/filepath"
	"strconv"
	"strings"

	"github.com/richardlehane/siegfried"
	"github.com/richardlehane/siegfried/config"
	"github.com/richardlehane/siegfried/pkg/pronom"
)

var (
	// BUILD, ADD flag sets
	build       = flag.NewFlagSet("build | add", flag.ExitOnError)
	home        = build.String("home", config.Home(), "override the default home directory")
	droid       = build.String("droid", config.Droid(), "set name/path for DROID signature file")
	container   = build.String("container", config.Container(), "set name/path for Droid Container signature file")
	reports     = build.String("reports", config.Reports(), "set path for PRONOM reports directory")
	name        = build.String("name", config.Name(), "set identifier name")
	details     = build.String("details", config.Details(), "set identifier details")
	extend      = build.String("extend", "", "comma separated list of additional signatures")
	extendc     = build.String("extendc", "", "comma separated list of additional container signatures")
	include     = build.String("limit", "", "comma separated list of PRONOM signatures to include")
	exclude     = build.String("exclude", "", "comma separated list of PRONOM signatures to exclude")
	bof         = build.Int("bof", 0, "define a maximum BOF offset")
	eof         = build.Int("eof", 0, "define a maximum EOF offset")
	noeof       = build.Bool("noeof", false, "ignore EOF segments in signatures")
	nopriority  = build.Bool("nopriority", false, "ignore priority rules when recording results")
	nocontainer = build.Bool("nocontainer", false, "skip container signatures")
	notext      = build.Bool("notext", false, "skip text matcher")
Esempio n. 7
0
func main() {

	flag.Parse()

	/*//UNCOMMENT TO RUN PROFILER
	go func() {
		log.Println(http.ListenAndServe("localhost:6060", nil))
	}()*/

	if *home != config.Home() {
		config.SetHome(*home)
	}

	if *sig != config.SignatureBase() {
		config.SetSignature(*sig)
	}

	if *version {
		version := config.Version()
		fmt.Printf("siegfried version: %d.%d.%d\n", version[0], version[1], version[2])
		return
	}

	if *update {
		msg, err := updateSigs()
		if err != nil {
			log.Fatalf("Error: failed to update signature file, %v", err)
		}
		fmt.Println(msg)
		return
	}

	// during parallel scanning or in server mode, unsafe to access the last read buffer - so can't unzip or hash
	if *multi > 1 || *serve != "" {
		if *archive {
			log.Fatalln("Error: cannot scan archive formats when running in parallel mode")
		}
		if *hashf != "" {
			log.Fatalln("Error: cannot calculate file checksum when running in parallel mode")
		}
	}

	if err := setHash(); err != nil {
		log.Fatal(err)
	}

	if *serve != "" || *fprflag {
		s, err := siegfried.Load(config.Signature())
		if err != nil {
			log.Fatalf("Error: error loading signature file, got: %v", err)

		}
		if *serve != "" {
			log.Printf("Starting server at %s. Use CTRL-C to quit.\n", *serve)
			listen(*serve, s)
			return
		}
		log.Printf("FPR server started at %s. Use CTRL-C to quit.\n", config.Fpr())
		serveFpr(config.Fpr(), s)
		return
	}

	if flag.NArg() != 1 {
		log.Fatalln("Error: expecting a single file or directory argument")
	}

	s, err := siegfried.Load(config.Signature())
	if err != nil {
		log.Fatalf("Error: error loading signature file, got: %v", err)

	}

	var w writer
	switch {
	case *debug:
		config.SetDebug()
		w = debugWriter{}
	case *slow:
		config.SetSlow()
		w = &slowWriter{os.Stdout}
	case *csvo:
		w = newCSV(os.Stdout)
	case *jsono:
		w = newJSON(os.Stdout)
	case *droido:
		w = newDroid(os.Stdout)
	case *knowno:
		w = &knownWriter{true, os.Stdout}
	case *unknowno:
		w = &knownWriter{false, os.Stdout}
	default:
		w = newYAML(os.Stdout)
	}

	// support reading list files from stdin
	if flag.Arg(0) == "-" {
		w.writeHead(s)
		scanner := bufio.NewScanner(os.Stdin)
		for scanner.Scan() {
			info, err := os.Stat(scanner.Text())
			if err != nil || info.IsDir() {
				w.writeFile(scanner.Text(), 0, "", nil, fmt.Errorf("failed to identify %s (in scanning mode, inputs must all be files and not directories), got: %v", scanner.Text(), err), nil)
			} else {
				identifyFile(w, s, scanner.Text(), info.Size(), info.ModTime().Format(time.RFC3339))
			}
		}
		w.writeTail()
		os.Exit(0)
	}

	info, err := os.Stat(flag.Arg(0))
	if err != nil {
		log.Fatalf("Error: error getting info for %v, got: %v", flag.Arg(0), err)
	}

	if info.IsDir() {
		if config.Debug() {
			log.Fatalln("Error: when scanning in debug mode, give a file rather than a directory argument")
		}
		w.writeHead(s)
		if *multi > 16 {
			*multi = 16
		}
		if *multi > 1 {
			multiIdentifyP(w, s, flag.Arg(0), *nr)
		} else {
			multiIdentifyS(w, s, flag.Arg(0), *nr)
		}
		w.writeTail()
		os.Exit(0)
	}

	w.writeHead(s)
	identifyFile(w, s, flag.Arg(0), info.Size(), info.ModTime().Format(time.RFC3339))
	w.writeTail()
	os.Exit(0)
}
Esempio n. 8
0
const PROCS = -1

// flags
var (
	update   = flag.Bool("update", false, "update or install the default signature file")
	version  = flag.Bool("version", false, "display version information")
	debug    = flag.Bool("debug", false, "scan in debug mode")
	slow     = flag.Bool("slow", false, "scan and report slow signatures")
	nr       = flag.Bool("nr", false, "prevent automatic directory recursion")
	csvo     = flag.Bool("csv", false, "CSV output format")
	jsono    = flag.Bool("json", false, "JSON output format")
	droido   = flag.Bool("droid", false, "DROID CSV output format")
	knowno   = flag.Bool("known", false, "Output list of known files")
	unknowno = flag.Bool("unknown", false, "Output list of unknown files")
	sig      = flag.String("sig", config.SignatureBase(), "set the signature file")
	home     = flag.String("home", config.Home(), "override the default home directory")
	serve    = flag.String("serve", "", "start siegfried server e.g. -serve localhost:5138")
	multi    = flag.Int("multi", 1, "set number of file ID processes")
	archive  = flag.Bool("z", false, "scan archive formats (zip, tar, gzip)")
	hashf    = flag.String("hash", "", "calculate file checksum with hash algorithm; options "+hashChoices)
)

type res struct {
	path string
	sz   int64
	mod  string
	c    iterableID
	err  error
}

func printer(w writer, resc chan chan res, wg *sync.WaitGroup) {
Esempio n. 9
0
func buildOptions() []config.Option {
	if *home != config.Home() {
		config.SetHome(*home)
	}
	opts := []config.Option{}
	if *droid != config.Droid() {
		opts = append(opts, config.SetDroid(*droid))
	}
	if *container != config.Container() {
		opts = append(opts, config.SetContainer(*container))
	}
	if *reports != config.Reports() {
		opts = append(opts, config.SetReports(*reports))
	}
	if *name != config.Name() {
		opts = append(opts, config.SetName(*name))
	}
	if *details != config.Details() {
		opts = append(opts, config.SetDetails(*details))
	}
	if *extend != "" {
		opts = append(opts, config.SetExtend(expandSets(*extend)))
	}
	if *extendc != "" {
		opts = append(opts, config.SetExtendC(expandSets(*extendc)))
	}
	if *include != "" {
		opts = append(opts, config.SetLimit(expandSets(*include)))
	}
	if *exclude != "" {
		opts = append(opts, config.SetExclude(expandSets(*exclude)))
	}
	if *bof != 0 {
		opts = append(opts, config.SetBOF(*bof))
	}
	if *eof != 0 {
		opts = append(opts, config.SetEOF(*eof))
	}
	if *noeof {
		opts = append(opts, config.SetNoEOF())
	}
	if *nopriority {
		opts = append(opts, config.SetNoPriority())
	}
	if *nocontainer {
		opts = append(opts, config.SetNoContainer())
	}
	if *notext {
		opts = append(opts, config.SetNoText())
	}
	if *noreports {
		opts = append(opts, config.SetNoReports())
	}
	if *rng != config.Range() {
		opts = append(opts, config.SetRange(*rng))
	}
	if *distance != config.Distance() {
		opts = append(opts, config.SetDistance(*distance))
	}
	if *choices != config.Choices() {
		opts = append(opts, config.SetChoices(*choices))
	}
	return opts
}
Esempio n. 10
0
func main() {

	flag.Parse()

	/*//UNCOMMENT TO RUN PROFILER
	go func() {
		log.Println(http.ListenAndServe("localhost:6060", nil))
	}()*/

	if *version {
		version := config.Version()
		fmt.Printf("siegfried %d.%d.%d\n", version[0], version[1], version[2])
		s, err := siegfried.Load(config.Signature())
		if err != nil {
			fmt.Println(err)
			return
		}
		fmt.Print(s)
		return
	}

	if *home != config.Home() {
		config.SetHome(*home)
	}

	if *sig != config.SignatureBase() {
		config.SetSignature(*sig)
	}

	if *update {
		msg, err := updateSigs()
		if err != nil {
			log.Fatalf("[FATAL] failed to update signature file, %v", err)
		}
		fmt.Println(msg)
		return
	}

	// during parallel scanning or in server mode, unsafe to access the last read buffer - so can't unzip or hash
	if *multi > 1 || *serve != "" {
		if *archive {
			log.Fatalln("[FATAL] cannot scan archive formats when running in parallel or server mode")
		}
		if *hashf != "" {
			log.Fatalln("[FATAL] cannot calculate file checksum when running in parallel or server mode")
		}
	}

	if *logf != "" {
		if *multi > 1 && *logf != "error" {
			log.Fatalln("[FATAL] cannot log in parallel mode")
		}
		if err := newLogger(*logf); err != nil {
			log.Fatalln(err)
		}
	}

	if err := setHash(); err != nil {
		log.Fatal(err)
	}

	if *serve != "" || *fprflag {
		s, err := siegfried.Load(config.Signature())
		if err != nil {
			log.Fatalf("[FATAL] error loading signature file, got: %v", err)
		}
		if *serve != "" {
			log.Printf("Starting server at %s. Use CTRL-C to quit.\n", *serve)
			listen(*serve, s)
			return
		}
		log.Printf("FPR server started at %s. Use CTRL-C to quit.\n", config.Fpr())
		serveFpr(config.Fpr(), s)
		return
	}

	if flag.NArg() != 1 {
		log.Fatalln("[FATAL] expecting a single file or directory argument")
	}

	s, err := siegfried.Load(config.Signature())
	if err != nil {
		log.Fatalf("[FATAL] error loading signature file, got: %v", err)
	}

	var w writer
	switch {
	case *csvo:
		w = newCSV(os.Stdout)
	case *jsono:
		w = newJSON(os.Stdout)
	case *droido:
		w = newDroid(os.Stdout)
	default:
		w = newYAML(os.Stdout)
	}

	if lg != nil && lg.w == os.Stdout {
		w = logWriter{}
	}

	// support reading list files from stdin
	if flag.Arg(0) == "-" {
		w.writeHead(s)
		scanner := bufio.NewScanner(os.Stdin)
		for scanner.Scan() {
			info, err := os.Stat(scanner.Text())
			if err != nil {
				info, err = retryStat(scanner.Text(), err)
			}
			if err != nil || info.IsDir() {
				writeError(w, scanner.Text(), 0, "", fmt.Errorf("failed to identify %s (in scanning mode, inputs must all be files and not directories), got: %v", scanner.Text(), err))
			} else {
				identifyFile(w, s, scanner.Text(), info.Size(), info.ModTime().Format(time.RFC3339))
			}
		}
		w.writeTail()
		lg.printElapsed()
		os.Exit(0)
	}

	info, err := os.Stat(flag.Arg(0))
	if err != nil {
		info, err = retryStat(flag.Arg(0), err)
		if err != nil {
			log.Fatalf("[FATAL] cannot get info for %v, got: %v", flag.Arg(0), err)
		}
	}

	if info.IsDir() {
		w.writeHead(s)
		if *multi > 16 {
			*multi = 16
		}
		if *multi > 1 {
			err = multiIdentifyP(w, s, flag.Arg(0), *nr)
		} else {
			if *throttlef != 0 {
				throttle = time.NewTicker(*throttlef)
				defer throttle.Stop()
			}
			err = multiIdentifyS(w, s, flag.Arg(0), "", *nr)
		}
		w.writeTail()
		if err != nil {
			log.Fatalf("[FATAL] %v\n", err)
		}
		lg.printElapsed()
		os.Exit(0)
	}
	w.writeHead(s)
	identifyFile(w, s, flag.Arg(0), info.Size(), info.ModTime().Format(time.RFC3339))
	w.writeTail()
	lg.printElapsed()
	os.Exit(0)
}