Ejemplo n.º 1
0
func applyAll(max int, reps []string, apply func(puid string) error) []error {
	ch := make(chan error, len(reps))
	wg := sync.WaitGroup{}
	queue := make(chan struct{}, max) // to avoid hammering TNA
	_, _, tf, _ := config.HarvestOptions()
	var throttle *time.Ticker
	if tf > 0 {
		throttle = time.NewTicker(tf)
		defer throttle.Stop()
	}
	for _, puid := range reps {
		if tf > 0 {
			<-throttle.C
		}
		wg.Add(1)
		go func(puid string) {
			queue <- struct{}{}
			defer wg.Done()
			if err := apply(puid); err != nil {
				ch <- err
			}
			<-queue
		}(puid)
	}
	wg.Wait()
	close(ch)
	var errors []error
	for err := range ch {
		errors = append(errors, err)
	}
	return errors
}
Ejemplo n.º 2
0
// UTILS
// Harvest fetches PRONOM reports listed in the DROID file
func Harvest() []error {
	d, err := newDroid(config.Droid())
	if err != nil {
		return []error{err}
	}
	apply := func(puid string) error {
		url, _, _, _ := config.HarvestOptions()
		return save(puid, url, config.Reports())
	}
	return applyAll(5, d.IDs(), apply)
}
Ejemplo n.º 3
0
func getHttp(url string) ([]byte, error) {
	req, err := http.NewRequest("GET", url, nil)
	if err != nil {
		return nil, err
	}
	_, timeout, _, transport := config.HarvestOptions()
	req.Header.Add("User-Agent", "siegfried/roybot (+https://github.com/richardlehane/siegfried)")
	timer := time.AfterFunc(timeout, func() {
		transport.CancelRequest(req)
	})
	defer timer.Stop()
	client := http.Client{
		Transport: transport,
	}
	resp, err := client.Do(req)
	if err != nil {
		return nil, err
	}
	defer resp.Body.Close()
	return ioutil.ReadAll(resp.Body)
}
Ejemplo n.º 4
0
	notext      = build.Bool("notext", false, "skip text matcher")
	noname      = build.Bool("noname", false, "skip filename matcher")
	nomime      = build.Bool("nomime", false, "skip MIME matcher")
	noxml       = build.Bool("noxml", false, "skip XML matcher")
	noriff      = build.Bool("noriff", false, "skip RIFF matcher")
	noreports   = build.Bool("noreports", false, "build directly from DROID file rather than PRONOM reports")
	doubleup    = build.Bool("doubleup", false, "include byte signatures for formats that also have container signatures")
	rng         = build.Int("range", config.Range(), "define a maximum range for segmentation")
	distance    = build.Int("distance", config.Distance(), "define a maximum distance for segmentation")
	choices     = build.Int("choices", config.Choices(), "define a maximum number of choices for segmentation")

	// HARVEST
	harvest           = flag.NewFlagSet("harvest", flag.ExitOnError)
	harvestHome       = harvest.String("home", config.Home(), "override the default home directory")
	harvestDroid      = harvest.String("droid", config.Droid(), "set name/path for DROID signature file")
	_, htimeout, _, _ = config.HarvestOptions()
	timeout           = harvest.Duration("timeout", htimeout, "set duration before timing-out harvesting requests e.g. 120s")
	throttlef         = harvest.Duration("throttle", 0, "set a time to wait HTTP requests e.g. 50ms")

	// INSPECT (roy inspect | roy inspect fmt/121 | roy inspect usr/local/mysig.sig | roy inspect 10)
	inspect        = flag.NewFlagSet("inspect", flag.ExitOnError)
	inspectHome    = inspect.String("home", config.Home(), "override the default home directory")
	inspectReports = inspect.Bool("reports", false, "build signatures from PRONOM reports (rather than DROID xml)")
	inspectExtend  = inspect.String("extend", "", "comma separated list of additional signatures")
	inspectExtendc = inspect.String("extendc", "", "comma separated list of additional container signatures")
	inspectInclude = inspect.String("limit", "", "when inspecting priorities, comma separated list of PRONOM signatures to include")
	inspectExclude = inspect.String("exclude", "", "when inspecting priorities, comma separated list of PRONOM signatures to exclude")
	inspectMI      = inspect.String("mi", "", "set name/path for MIMEInfo signature file to inspect")
	inspectFDD     = inspect.String("fdd", "", "set name/path for LOC FDD signature file to inspect")
	inspectLOC     = inspect.Bool("loc", false, "inspect a LOC FDD signature file")
	inspectCType   = inspect.Int("ct", 0, "provide container type to inspect container hits")