// DecompressAndLoad will load or fetch a graph from the given path, decompress // it, and then call the given load function to process the decompressed graph. // If no loadFn is provided, db.Load is called. func DecompressAndLoad(qw graph.QuadWriter, cfg *config.Config, path, typ string, loadFn func(graph.QuadWriter, *config.Config, quad.Reader) error) error { var r io.Reader if path == "" { path = cfg.DatabasePath } if path == "" { return nil } u, err := url.Parse(path) if err != nil || u.Scheme == "file" || u.Scheme == "" { // Don't alter relative URL path or non-URL path parameter. if u.Scheme != "" && err == nil { // Recovery heuristic for mistyping "file://path/to/file". path = filepath.Join(u.Host, u.Path) } f, err := os.Open(path) if err != nil { return fmt.Errorf("could not open file %q: %v", path, err) } defer f.Close() r = f } else { res, err := client.Get(path) if err != nil { return fmt.Errorf("could not get resource <%s>: %v", u, err) } defer res.Body.Close() r = res.Body } r, err = Decompressor(r) if err != nil { if err == io.EOF { return nil } return err } var dec quad.ReadCloser switch typ { case "cquad", "cquads": dec = cquads.NewDecoder(r) case "nquad", "nquads": dec = nquads.NewDecoder(r) default: format := quad.FormatByName(typ) if format == nil || format.Reader == nil { return fmt.Errorf("unknown quad format %q", typ) } dec = format.Reader(r) } defer dec.Close() if loadFn != nil { return loadFn(qw, cfg, dec) } return db.Load(qw, cfg, dec) }
func main() { flag.Parse() args := flag.Args() if (!*f_drop && len(args) < 2) || (*f_drop && len(args) < 1) { fmt.Println("usage: qconv <src> <dst>") os.Exit(1) } start := time.Now() var cnt int defer func() { log.Printf("written %d quads in %v", cnt, time.Since(start)) }() var qw quad.WriteCloser if *f_drop { qw = devNull{} } else { name := args[len(args)-1] args = args[:len(args)-1] file, err := os.Create(name) if err != nil { log.Fatal(err) } defer file.Close() var w io.Writer = file ext := filepath.Ext(name) if strings.HasSuffix(ext, ".gz") { ext = filepath.Ext(strings.TrimSuffix(name, ".gz")) gz := gzip.NewWriter(w) defer gz.Close() w = gz } f := quad.FormatByExt(ext) if f == nil || f.Writer == nil { log.Fatal("unknown extension:", ext) } qw = f.Writer(w) } defer qw.Close() var qh []func(q quad.Quad) if *f_types { m := make(map[quad.IRI]struct{}) check := func(v quad.Value) { if t, ok := v.(quad.TypedString); ok { if _, ok = m[t.Type]; !ok { log.Println("new type found:", t.Type) m[t.Type] = struct{}{} } } } defer func() { log.Printf("dataset uses %d basic types:\n%v", len(m), m) }() qh = append(qh, func(q quad.Quad) { check(q.Subject) check(q.Predicate) check(q.Object) }) } if *f_lang { m := make(map[string]struct{}) check := func(v quad.Value) { if t, ok := v.(quad.LangString); ok { if _, ok = m[t.Lang]; !ok { log.Println("new language found:", t.Lang) m[t.Lang] = struct{}{} } } } defer func() { log.Printf("dataset uses %d languages:\n%v", len(m), m) }() qh = append(qh, func(q quad.Quad) { check(q.Subject) check(q.Predicate) check(q.Object) }) } if qh != nil { qw = quadHook{w: qw, f: qh} } var errored bool for _, name := range args { if name == "" || name == "-" { continue } err := func() error { var qr quad.ReadCloser file, err := os.Open(name) if err != nil { return err } defer file.Close() var r io.Reader = file ext := filepath.Ext(name) if strings.HasSuffix(ext, ".gz") { ext = filepath.Ext(strings.TrimSuffix(name, ".gz")) gz, err := gzip.NewReader(r) if err != nil { return err } defer gz.Close() r = gz } f := quad.FormatByExt(ext) if f == nil || f.Reader == nil { return fmt.Errorf("unknown extension: %v", ext) } qr = f.Reader(r) defer qr.Close() _, err = quad.Copy(writeHook{w: qw, f: func() { cnt++ if cnt%(1000*1000) == 0 { log.Printf("written %dM quads in %v", cnt/1000/1000, time.Since(start)) } }}, qr) return err }() if err != nil { log.Println("error:", err) errored = true continue } } if errored { os.Exit(1) } }