Exemple #1
0
// DecompressAndLoad will load or fetch a graph from the given path, decompress
// it, and then call the given load function to process the decompressed graph.
// If no loadFn is provided, db.Load is called.
func DecompressAndLoad(qw graph.QuadWriter, cfg *config.Config, path, typ string, loadFn func(graph.QuadWriter, *config.Config, quad.Reader) error) error {
	var r io.Reader

	if path == "" {
		path = cfg.DatabasePath
	}
	if path == "" {
		return nil
	}
	u, err := url.Parse(path)
	if err != nil || u.Scheme == "file" || u.Scheme == "" {
		// Don't alter relative URL path or non-URL path parameter.
		if u.Scheme != "" && err == nil {
			// Recovery heuristic for mistyping "file://path/to/file".
			path = filepath.Join(u.Host, u.Path)
		}
		f, err := os.Open(path)
		if err != nil {
			return fmt.Errorf("could not open file %q: %v", path, err)
		}
		defer f.Close()
		r = f
	} else {
		res, err := client.Get(path)
		if err != nil {
			return fmt.Errorf("could not get resource <%s>: %v", u, err)
		}
		defer res.Body.Close()
		r = res.Body
	}

	r, err = Decompressor(r)
	if err != nil {
		if err == io.EOF {
			return nil
		}
		return err
	}

	var dec quad.ReadCloser
	switch typ {
	case "cquad", "cquads":
		dec = cquads.NewDecoder(r)
	case "nquad", "nquads":
		dec = nquads.NewDecoder(r)
	default:
		format := quad.FormatByName(typ)
		if format == nil || format.Reader == nil {
			return fmt.Errorf("unknown quad format %q", typ)
		}
		dec = format.Reader(r)
	}
	defer dec.Close()

	if loadFn != nil {
		return loadFn(qw, cfg, dec)
	}

	return db.Load(qw, cfg, dec)
}
Exemple #2
0
func main() {
	flag.Parse()
	args := flag.Args()
	if (!*f_drop && len(args) < 2) || (*f_drop && len(args) < 1) {
		fmt.Println("usage: qconv <src> <dst>")
		os.Exit(1)
	}
	start := time.Now()
	var cnt int
	defer func() {
		log.Printf("written %d quads in %v", cnt, time.Since(start))
	}()
	var qw quad.WriteCloser
	if *f_drop {
		qw = devNull{}
	} else {
		name := args[len(args)-1]
		args = args[:len(args)-1]
		file, err := os.Create(name)
		if err != nil {
			log.Fatal(err)
		}
		defer file.Close()
		var w io.Writer = file
		ext := filepath.Ext(name)
		if strings.HasSuffix(ext, ".gz") {
			ext = filepath.Ext(strings.TrimSuffix(name, ".gz"))
			gz := gzip.NewWriter(w)
			defer gz.Close()
			w = gz
		}
		f := quad.FormatByExt(ext)
		if f == nil || f.Writer == nil {
			log.Fatal("unknown extension:", ext)
		}
		qw = f.Writer(w)
	}
	defer qw.Close()

	var qh []func(q quad.Quad)
	if *f_types {
		m := make(map[quad.IRI]struct{})
		check := func(v quad.Value) {
			if t, ok := v.(quad.TypedString); ok {
				if _, ok = m[t.Type]; !ok {
					log.Println("new type found:", t.Type)
					m[t.Type] = struct{}{}
				}
			}
		}
		defer func() {
			log.Printf("dataset uses %d basic types:\n%v", len(m), m)
		}()
		qh = append(qh, func(q quad.Quad) {
			check(q.Subject)
			check(q.Predicate)
			check(q.Object)
		})
	}
	if *f_lang {
		m := make(map[string]struct{})
		check := func(v quad.Value) {
			if t, ok := v.(quad.LangString); ok {
				if _, ok = m[t.Lang]; !ok {
					log.Println("new language found:", t.Lang)
					m[t.Lang] = struct{}{}
				}
			}
		}
		defer func() {
			log.Printf("dataset uses %d languages:\n%v", len(m), m)
		}()
		qh = append(qh, func(q quad.Quad) {
			check(q.Subject)
			check(q.Predicate)
			check(q.Object)
		})
	}
	if qh != nil {
		qw = quadHook{w: qw, f: qh}
	}

	var errored bool
	for _, name := range args {
		if name == "" || name == "-" {
			continue
		}
		err := func() error {
			var qr quad.ReadCloser
			file, err := os.Open(name)
			if err != nil {
				return err
			}
			defer file.Close()

			var r io.Reader = file
			ext := filepath.Ext(name)
			if strings.HasSuffix(ext, ".gz") {
				ext = filepath.Ext(strings.TrimSuffix(name, ".gz"))
				gz, err := gzip.NewReader(r)
				if err != nil {
					return err
				}
				defer gz.Close()
				r = gz
			}
			f := quad.FormatByExt(ext)
			if f == nil || f.Reader == nil {
				return fmt.Errorf("unknown extension: %v", ext)
			}
			qr = f.Reader(r)
			defer qr.Close()
			_, err = quad.Copy(writeHook{w: qw, f: func() {
				cnt++
				if cnt%(1000*1000) == 0 {
					log.Printf("written %dM quads in %v", cnt/1000/1000, time.Since(start))
				}
			}}, qr)
			return err
		}()
		if err != nil {
			log.Println("error:", err)
			errored = true
			continue
		}
	}
	if errored {
		os.Exit(1)
	}
}