func applyAll(max int, reps []string, apply func(puid string) error) []error { ch := make(chan error, len(reps)) wg := sync.WaitGroup{} queue := make(chan struct{}, max) // to avoid hammering TNA _, _, tf, _ := config.HarvestOptions() var throttle *time.Ticker if tf > 0 { throttle = time.NewTicker(tf) defer throttle.Stop() } for _, puid := range reps { if tf > 0 { <-throttle.C } wg.Add(1) go func(puid string) { queue <- struct{}{} defer wg.Done() if err := apply(puid); err != nil { ch <- err } <-queue }(puid) } wg.Wait() close(ch) var errors []error for err := range ch { errors = append(errors, err) } return errors }
// UTILS // Harvest fetches PRONOM reports listed in the DROID file func Harvest() []error { d, err := newDroid(config.Droid()) if err != nil { return []error{err} } apply := func(puid string) error { url, _, _, _ := config.HarvestOptions() return save(puid, url, config.Reports()) } return applyAll(5, d.IDs(), apply) }
func getHttp(url string) ([]byte, error) { req, err := http.NewRequest("GET", url, nil) if err != nil { return nil, err } _, timeout, _, transport := config.HarvestOptions() req.Header.Add("User-Agent", "siegfried/roybot (+https://github.com/richardlehane/siegfried)") timer := time.AfterFunc(timeout, func() { transport.CancelRequest(req) }) defer timer.Stop() client := http.Client{ Transport: transport, } resp, err := client.Do(req) if err != nil { return nil, err } defer resp.Body.Close() return ioutil.ReadAll(resp.Body) }
notext = build.Bool("notext", false, "skip text matcher") noname = build.Bool("noname", false, "skip filename matcher") nomime = build.Bool("nomime", false, "skip MIME matcher") noxml = build.Bool("noxml", false, "skip XML matcher") noriff = build.Bool("noriff", false, "skip RIFF matcher") noreports = build.Bool("noreports", false, "build directly from DROID file rather than PRONOM reports") doubleup = build.Bool("doubleup", false, "include byte signatures for formats that also have container signatures") rng = build.Int("range", config.Range(), "define a maximum range for segmentation") distance = build.Int("distance", config.Distance(), "define a maximum distance for segmentation") choices = build.Int("choices", config.Choices(), "define a maximum number of choices for segmentation") // HARVEST harvest = flag.NewFlagSet("harvest", flag.ExitOnError) harvestHome = harvest.String("home", config.Home(), "override the default home directory") harvestDroid = harvest.String("droid", config.Droid(), "set name/path for DROID signature file") _, htimeout, _, _ = config.HarvestOptions() timeout = harvest.Duration("timeout", htimeout, "set duration before timing-out harvesting requests e.g. 120s") throttlef = harvest.Duration("throttle", 0, "set a time to wait HTTP requests e.g. 50ms") // INSPECT (roy inspect | roy inspect fmt/121 | roy inspect usr/local/mysig.sig | roy inspect 10) inspect = flag.NewFlagSet("inspect", flag.ExitOnError) inspectHome = inspect.String("home", config.Home(), "override the default home directory") inspectReports = inspect.Bool("reports", false, "build signatures from PRONOM reports (rather than DROID xml)") inspectExtend = inspect.String("extend", "", "comma separated list of additional signatures") inspectExtendc = inspect.String("extendc", "", "comma separated list of additional container signatures") inspectInclude = inspect.String("limit", "", "when inspecting priorities, comma separated list of PRONOM signatures to include") inspectExclude = inspect.String("exclude", "", "when inspecting priorities, comma separated list of PRONOM signatures to exclude") inspectMI = inspect.String("mi", "", "set name/path for MIMEInfo signature file to inspect") inspectFDD = inspect.String("fdd", "", "set name/path for LOC FDD signature file to inspect") inspectLOC = inspect.Bool("loc", false, "inspect a LOC FDD signature file") inspectCType = inspect.Int("ct", 0, "provide container type to inspect container hits")