Exemplo n.º 1
0
func (qs *TripleStore) BulkLoad(dec quad.Unmarshaler) error {
	if qs.Size() != 0 {
		return graph.ErrCannotBulkLoad
	}

	qs.session.SetSafe(nil)
	for {
		q, err := dec.Unmarshal()
		if err != nil {
			if err != io.EOF {
				return err
			}
			break
		}
		qs.writeTriple(q)
	}

	outputTo := bson.M{"replace": "nodes", "sharded": true}
	glog.Infoln("Mapreducing")
	job := mgo.MapReduce{
		Map: `function() {
      var len = this["_id"].length
      var s_key = this["_id"].slice(0, len / 4)
      var p_key = this["_id"].slice(len / 4, 2 * len / 4)
      var o_key = this["_id"].slice(2 * len / 4, 3 * len / 4)
      var c_key = this["_id"].slice(3 * len / 4)
      emit(s_key, {"_id": s_key, "Name" : this.Subject, "Size" : 1})
      emit(p_key, {"_id": p_key, "Name" : this.Predicate, "Size" : 1})
      emit(o_key, {"_id": o_key, "Name" : this.Object, "Size" : 1})
			if (this.Label != "") {
				emit(c_key, {"_id": c_key, "Name" : this.Label, "Size" : 1})
			}
    }
    `,
		Reduce: `
      function(key, value_list) {
        out = {"_id": key, "Name": value_list[0].Name}
        count = 0
        for (var i = 0; i < value_list.length; i++) {
          count = count + value_list[i].Size

        }
        out["Size"] = count
        return out
      }
    `,
		Out: outputTo,
	}
	qs.db.C("triples").Find(nil).MapReduce(&job, nil)
	glog.Infoln("Fixing")
	qs.db.Run(bson.D{{"eval", `function() { db.nodes.find().forEach(function (result) {
    db.nodes.update({"_id": result._id}, result.value)
  }) }`}, {"args", bson.D{}}}, nil)

	qs.session.SetSafe(&mgo.Safe{})

	return nil
}
Exemplo n.º 2
0
func (ts *TripleStore) BulkLoad(t_chan chan *graph.Triple) bool {
	if ts.Size() != 0 {
		return false
	}

	ts.session.SetSafe(nil)
	for triple := range t_chan {
		ts.writeTriple(triple)
	}
	outputTo := bson.M{"replace": "nodes", "sharded": true}
	glog.Infoln("Mapreducing")
	job := mgo.MapReduce{
		Map: `function() {
      var len = this["_id"].length
      var s_key = this["_id"].slice(0, len / 4)
      var p_key = this["_id"].slice(len / 4, 2 * len / 4)
      var o_key = this["_id"].slice(2 * len / 4, 3 * len / 4)
      var c_key = this["_id"].slice(3 * len / 4)
      emit(s_key, {"_id": s_key, "Name" : this.Subject, "Size" : 1})
      emit(p_key, {"_id": p_key, "Name" : this.Predicate, "Size" : 1})
      emit(o_key, {"_id": o_key, "Name" : this.Object, "Size" : 1})
			if (this.Provenance != "") {
				emit(c_key, {"_id": c_key, "Name" : this.Provenance, "Size" : 1})
			}
    }
    `,
		Reduce: `
      function(key, value_list) {
        out = {"_id": key, "Name": value_list[0].Name}
        count = 0
        for (var i = 0; i < value_list.length; i++) {
          count = count + value_list[i].Size

        }
        out["Size"] = count
        return out
      }
    `,
		Out: outputTo,
	}
	ts.db.C("triples").Find(nil).MapReduce(&job, nil)
	glog.Infoln("Fixing")
	ts.db.Run(bson.D{{"eval", `function() { db.nodes.find().forEach(function (result) {
    db.nodes.update({"_id": result._id}, result.value)
  }) }`}, {"args", bson.D{}}}, nil)

	ts.session.SetSafe(&mgo.Safe{})
	return true
}
Exemplo n.º 3
0
func ReadNQuadsFromReader(c chan *graph.Triple, reader io.Reader) {
	bf := bufio.NewReader(reader)

	nTriples := 0
	line := ""
	for {
		l, pre, err := bf.ReadLine()
		if err == io.EOF {
			break
		}
		if err != nil {
			glog.Fatalln("Something bad happened while reading file " + err.Error())
		}
		line += string(l)
		if pre {
			continue
		}
		triple := Parse(line)
		line = ""
		if triple != nil {
			nTriples++
			c <- triple
		}
	}
	glog.Infoln("Read", nTriples, "triples")
	close(c)
}
Exemplo n.º 4
0
func newTripleStore(path string, options graph.Options) (graph.TripleStore, error) {
	var ts TripleStore
	ts.path = path
	cache_size := DefaultCacheSize
	if val, ok := options.IntKey("cache_size_mb"); ok {
		cache_size = val
	}
	ts.dbOpts = &opt.Options{
		BlockCache: cache.NewLRUCache(cache_size * opt.MiB),
	}
	ts.dbOpts.ErrorIfMissing = true

	write_buffer_mb := DefaultWriteBufferSize
	if val, ok := options.IntKey("write_buffer_mb"); ok {
		write_buffer_mb = val
	}
	ts.dbOpts.WriteBuffer = write_buffer_mb * opt.MiB
	ts.hasher = sha1.New()
	ts.writeopts = &opt.WriteOptions{
		Sync: false,
	}
	ts.readopts = &opt.ReadOptions{}
	db, err := leveldb.OpenFile(ts.path, ts.dbOpts)
	if err != nil {
		panic("Error, couldn't open! " + err.Error())
	}
	ts.db = db
	glog.Infoln(ts.GetStats())
	ts.getSize()
	return &ts, nil
}
Exemplo n.º 5
0
func newTripleStore(path string, options graph.Options) (graph.TripleStore, error) {
	var qs TripleStore
	var err error
	qs.path = path
	cache_size := DefaultCacheSize
	if val, ok := options.IntKey("cache_size_mb"); ok {
		cache_size = val
	}
	qs.dbOpts = &opt.Options{
		BlockCache: cache.NewLRUCache(cache_size * opt.MiB),
	}
	qs.dbOpts.ErrorIfMissing = true

	write_buffer_mb := DefaultWriteBufferSize
	if val, ok := options.IntKey("write_buffer_mb"); ok {
		write_buffer_mb = val
	}
	qs.dbOpts.WriteBuffer = write_buffer_mb * opt.MiB
	qs.writeopts = &opt.WriteOptions{
		Sync: false,
	}
	qs.readopts = &opt.ReadOptions{}
	db, err := leveldb.OpenFile(qs.path, qs.dbOpts)
	if err != nil {
		glog.Errorln("Error, couldn't open! ", err)
		return nil, err
	}
	qs.db = db
	glog.Infoln(qs.GetStats())
	err = qs.getMetadata()
	if err != nil {
		return nil, err
	}
	return &qs, nil
}
Exemplo n.º 6
0
func configFrom(file string) *config.Config {
	// Find the file...
	if file != "" {
		if _, err := os.Stat(file); os.IsNotExist(err) {
			glog.Fatalln("Cannot find specified configuration file", file, ", aborting.")
		}
	} else if _, err := os.Stat(os.Getenv("CAYLEY_CFG")); err == nil {
		file = os.Getenv("CAYLEY_CFG")
	} else if _, err := os.Stat("/etc/cayley.cfg"); err == nil {
		file = "/etc/cayley.cfg"
	}
	if file == "" {
		glog.Infoln("Couldn't find a config file in either $CAYLEY_CFG or /etc/cayley.cfg. Going by flag defaults only.")
	}
	cfg, err := config.Load(file)
	if err != nil {
		glog.Fatalln(err)
	}
	if cfg.DatabasePath == "" {
		cfg.DatabasePath = *databasePath
	}

	if cfg.DatabaseType == "" {
		cfg.DatabaseType = *databaseBackend
	}
	return cfg
}
Exemplo n.º 7
0
func ParseConfigFromFlagsAndFile(fileFlag string) *Config {
	// Find the file...
	var trueFilename string
	if fileFlag != "" {
		if _, err := os.Stat(fileFlag); os.IsNotExist(err) {
			glog.Fatalln("Cannot find specified configuration file", fileFlag, ", aborting.")
		} else {
			trueFilename = fileFlag
		}
	} else {
		if _, err := os.Stat(os.Getenv("CAYLEY_CFG")); err == nil {
			trueFilename = os.Getenv("CAYLEY_CFG")
		} else {
			if _, err := os.Stat("/etc/cayley.cfg"); err == nil {
				trueFilename = "/etc/cayley.cfg"
			}
		}
	}
	if trueFilename == "" {
		glog.Infoln("Couldn't find a config file in either $CAYLEY_CFG or /etc/cayley.cfg. Going by flag defaults only.")
	}
	config := ParseConfigFromFile(trueFilename)

	if config.DatabasePath == "" {
		config.DatabasePath = *databasePath
	}

	if config.DatabaseType == "" {
		config.DatabaseType = *databaseBackend
	}

	if config.ReplicationType == "" {
		config.ReplicationType = *replicationBackend
	}

	if config.ListenHost == "" {
		config.ListenHost = *host
	}

	if config.ListenPort == "" {
		config.ListenPort = *port
	}

	if config.Timeout == 0 {
		config.Timeout = *timeout
	}

	if config.LoadSize == 0 {
		config.LoadSize = *loadSize
	}

	config.ReadOnly = config.ReadOnly || *readOnly

	return config
}
Exemplo n.º 8
0
func (qs *TripleStore) ApplyDeltas(in []graph.Delta) error {
	qs.session.SetSafe(nil)
	ids := make(map[string]int)
	// Pre-check the existence condition.
	for _, d := range in {
		key := qs.getIdForQuad(d.Quad)
		switch d.Action {
		case graph.Add:
			if qs.checkValid(key) {
				return graph.ErrQuadExists
			}
		case graph.Delete:
			if !qs.checkValid(key) {
				return graph.ErrQuadNotExist
			}
		}
	}
	if glog.V(2) {
		glog.Infoln("Existence verified. Proceeding.")
	}
	for _, d := range in {
		err := qs.updateLog(d)
		if err != nil {
			return err
		}
	}
	for _, d := range in {
		err := qs.updateQuad(d.Quad, d.ID, d.Action)
		if err != nil {
			return err
		}
		var countdelta int
		if d.Action == graph.Add {
			countdelta = 1
		} else {
			countdelta = -1
		}
		ids[d.Quad.Subject] += countdelta
		ids[d.Quad.Object] += countdelta
		ids[d.Quad.Predicate] += countdelta
		if d.Quad.Label != "" {
			ids[d.Quad.Label] += countdelta
		}
	}
	for k, v := range ids {
		err := qs.updateNodeBy(k, v)
		if err != nil {
			return err
		}
	}
	qs.session.SetSafe(&mgo.Safe{})
	return nil
}
Exemplo n.º 9
0
func main() {
	// No command? It's time for usage.
	if len(os.Args) == 1 {
		Usage()
		os.Exit(1)
	}
	cmd := os.Args[1]
	newargs := make([]string, 0)
	newargs = append(newargs, os.Args[0])
	newargs = append(newargs, os.Args[2:]...)
	os.Args = newargs
	flag.Parse()
	var ts graph.TripleStore
	cfg := config.ParseConfigFromFlagsAndFile(*configFile)
	if os.Getenv("GOMAXPROCS") == "" {
		runtime.GOMAXPROCS(runtime.NumCPU())
		glog.Infoln("Setting GOMAXPROCS to", runtime.NumCPU())
	} else {
		glog.Infoln("GOMAXPROCS currently", os.Getenv("GOMAXPROCS"), " -- not adjusting")
	}
	switch cmd {
	case "init":
		db.Init(cfg, *tripleFile)
	case "load":
		ts = db.Open(cfg)
		db.Load(ts, cfg, *tripleFile, false)
		ts.Close()
	case "repl":
		ts = db.Open(cfg)
		db.Repl(ts, *queryLanguage, cfg)
		ts.Close()
	case "http":
		ts = db.Open(cfg)
		http.Serve(ts, cfg)
		ts.Close()
	default:
		fmt.Println("No command", cmd)
		flag.Usage()
	}
}
Exemplo n.º 10
0
func configFrom(file string) (*config.Config, error) {
	// Find the file...
	if file != "" {
		if _, err := os.Stat(file); os.IsNotExist(err) {
			return nil, fmt.Errorf("Cannot find specified configuration file", file)
		}
	} else if _, err := os.Stat("/cayley_appengine.cfg"); err == nil {
		file = "/cayley_appengine.cfg"
	}
	if file == "" {
		glog.Infoln("Couldn't find a config file appengine.cfg. Going by flag defaults only.")
	}
	cfg, err := config.Load(file)
	if err != nil {
		return nil, err
	}

	if cfg.DatabasePath == "" {
		cfg.DatabasePath = databasePath
	}

	if cfg.DatabaseType == "" {
		cfg.DatabaseType = databaseBackend
	}

	if cfg.ReplicationType == "" {
		cfg.ReplicationType = replicationBackend
	}

	if cfg.ListenHost == "" {
		cfg.ListenHost = host
	}

	if cfg.ListenPort == "" {
		cfg.ListenPort = port
	}

	if cfg.Timeout == 0 {
		cfg.Timeout = timeout
	}

	if cfg.LoadSize == 0 {
		cfg.LoadSize = loadSize
	}

	cfg.ReadOnly = cfg.ReadOnly || readOnly

	return cfg, nil
}
Exemplo n.º 11
0
func newQuadStore(path string, options graph.Options) (graph.QuadStore, error) {
	var qs QuadStore
	var err error
	qs.path = path
	cacheSize := DefaultCacheSize
	val, ok, err := options.IntKey("cache_size_mb")
	if err != nil {
		return nil, err
	} else if ok {
		cacheSize = val
	}
	qs.dbOpts = &opt.Options{
		BlockCacheCapacity: cacheSize * opt.MiB,
	}
	qs.dbOpts.ErrorIfMissing = true

	writeBufferSize := DefaultWriteBufferSize
	val, ok, err = options.IntKey("writeBufferSize")
	if err != nil {
		return nil, err
	} else if ok {
		writeBufferSize = val
	}
	qs.dbOpts.WriteBuffer = writeBufferSize * opt.MiB
	qs.writeopts = &opt.WriteOptions{
		Sync: false,
	}
	qs.readopts = &opt.ReadOptions{}
	db, err := leveldb.OpenFile(qs.path, qs.dbOpts)
	if err != nil {
		glog.Errorln("Error, could not open! ", err)
		return nil, err
	}
	qs.db = db
	glog.Infoln(qs.GetStats())
	err = qs.getMetadata()
	if err != nil {
		return nil, err
	}
	return &qs, nil
}
Exemplo n.º 12
0
func (qs *QuadStore) ApplyDeltas(in []graph.Delta, ignoreOpts graph.IgnoreOpts) error {
	qs.session.SetSafe(nil)
	ids := make(map[string]int)
	// Pre-check the existence condition.
	for _, d := range in {
		if d.Action != graph.Add && d.Action != graph.Delete {
			return errors.New("mongo: invalid action")
		}
		key := qs.getIDForQuad(d.Quad)
		switch d.Action {
		case graph.Add:
			if qs.checkValid(key) {
				if ignoreOpts.IgnoreDup {
					continue
				} else {
					return graph.ErrQuadExists
				}
			}
		case graph.Delete:
			if !qs.checkValid(key) {
				if ignoreOpts.IgnoreMissing {
					continue
				} else {
					return graph.ErrQuadNotExist
				}
			}
		}
	}
	if glog.V(2) {
		glog.Infoln("Existence verified. Proceeding.")
	}
	for _, d := range in {
		err := qs.updateLog(d)
		if err != nil {
			return err
		}
	}
	for _, d := range in {
		err := qs.updateQuad(d.Quad, d.ID.Int(), d.Action)
		if err != nil {
			return err
		}
		var countdelta int
		if d.Action == graph.Add {
			countdelta = 1
		} else {
			countdelta = -1
		}
		ids[d.Quad.Subject] += countdelta
		ids[d.Quad.Object] += countdelta
		ids[d.Quad.Predicate] += countdelta
		if d.Quad.Label != "" {
			ids[d.Quad.Label] += countdelta
		}
	}
	for k, v := range ids {
		err := qs.updateNodeBy(k, v)
		if err != nil {
			return err
		}
	}
	qs.session.SetSafe(&mgo.Safe{})
	return nil
}
Exemplo n.º 13
0
func main() {
	// No command? It's time for usage.
	if len(os.Args) == 1 {
		fmt.Fprintln(os.Stderr, "Cayley is a graph store and graph query layer.")
		usage()
		os.Exit(1)
	}

	cmd := os.Args[1]
	os.Args = append(os.Args[:1], os.Args[2:]...)
	flag.Parse()

	var buildString string
	if Version != "" {
		buildString = fmt.Sprint("Cayley ", Version, " built ", BuildDate)
		glog.Infoln(buildString)
	}

	cfg := configFrom(*configFile)

	if os.Getenv("GOMAXPROCS") == "" {
		runtime.GOMAXPROCS(runtime.NumCPU())
		glog.Infoln("Setting GOMAXPROCS to", runtime.NumCPU())
	} else {
		glog.Infoln("GOMAXPROCS currently", os.Getenv("GOMAXPROCS"), " -- not adjusting")
	}

	var (
		handle *graph.Handle
		err    error
	)
	switch cmd {
	case "version":
		if Version != "" {
			fmt.Println(buildString)
		} else {
			fmt.Println("Cayley snapshot")
		}
		os.Exit(0)

	case "init":
		err = db.Init(cfg)
		if err != nil {
			break
		}
		if *quadFile != "" {
			handle, err = db.Open(cfg)
			if err != nil {
				break
			}
			err = internal.Load(handle.QuadWriter, cfg, *quadFile, *quadType)
			if err != nil {
				break
			}
			handle.Close()
		}

	case "load":
		handle, err = db.Open(cfg)
		if err != nil {
			break
		}
		err = internal.Load(handle.QuadWriter, cfg, *quadFile, *quadType)
		if err != nil {
			break
		}

		handle.Close()

	case "repl":
		handle, err = db.Open(cfg)
		if err != nil {
			break
		}
		if !graph.IsPersistent(cfg.DatabaseType) {
			err = internal.Load(handle.QuadWriter, cfg, "", *quadType)
			if err != nil {
				break
			}
		}

		err = db.Repl(handle, *queryLanguage, cfg)

		handle.Close()

	case "http":
		handle, err = db.Open(cfg)
		if err != nil {
			break
		}
		if !graph.IsPersistent(cfg.DatabaseType) {
			err = internal.Load(handle.QuadWriter, cfg, "", *quadType)
			if err != nil {
				break
			}
		}

		http.Serve(handle, cfg)

		handle.Close()

	default:
		fmt.Println("No command", cmd)
		usage()
	}
	if err != nil {
		glog.Errorln(err)
	}
}
Exemplo n.º 14
0
func main() {
	// No command? It's time for usage.
	if len(os.Args) == 1 {
		Usage()
		os.Exit(1)
	}

	cmd := os.Args[1]
	var newargs []string
	newargs = append(newargs, os.Args[0])
	newargs = append(newargs, os.Args[2:]...)
	os.Args = newargs
	flag.Parse()

	var buildString string
	if VERSION != "" {
		buildString = fmt.Sprint("Cayley ", VERSION, " built ", BUILD_DATE)
		glog.Infoln(buildString)
	}

	cfg := config.ParseConfigFromFlagsAndFile(*configFile)

	if os.Getenv("GOMAXPROCS") == "" {
		runtime.GOMAXPROCS(runtime.NumCPU())
		glog.Infoln("Setting GOMAXPROCS to", runtime.NumCPU())
	} else {
		glog.Infoln("GOMAXPROCS currently", os.Getenv("GOMAXPROCS"), " -- not adjusting")
	}

	var (
		ts  graph.TripleStore
		err error
	)
	switch cmd {
	case "version":
		if VERSION != "" {
			fmt.Println(buildString)
		} else {
			fmt.Println("Cayley snapshot")
		}
		os.Exit(0)
	case "init":
		err = db.Init(cfg, *tripleFile)
	case "load":
		ts, err = db.Open(cfg)
		if err != nil {
			break
		}
		err = db.Load(ts, cfg, *tripleFile)
		if err != nil {
			break
		}
		ts.Close()
	case "repl":
		ts, err = db.Open(cfg)
		if err != nil {
			break
		}
		err = db.Repl(ts, *queryLanguage, cfg)
		if err != nil {
			break
		}
		ts.Close()
	case "http":
		ts, err = db.Open(cfg)
		if err != nil {
			break
		}
		http.Serve(ts, cfg)
		ts.Close()
	default:
		fmt.Println("No command", cmd)
		flag.Usage()
	}
	if err != nil {
		glog.Fatalln(err)
	}
}