func (qs *TripleStore) BulkLoad(dec quad.Unmarshaler) error { if qs.Size() != 0 { return graph.ErrCannotBulkLoad } qs.session.SetSafe(nil) for { q, err := dec.Unmarshal() if err != nil { if err != io.EOF { return err } break } qs.writeTriple(q) } outputTo := bson.M{"replace": "nodes", "sharded": true} glog.Infoln("Mapreducing") job := mgo.MapReduce{ Map: `function() { var len = this["_id"].length var s_key = this["_id"].slice(0, len / 4) var p_key = this["_id"].slice(len / 4, 2 * len / 4) var o_key = this["_id"].slice(2 * len / 4, 3 * len / 4) var c_key = this["_id"].slice(3 * len / 4) emit(s_key, {"_id": s_key, "Name" : this.Subject, "Size" : 1}) emit(p_key, {"_id": p_key, "Name" : this.Predicate, "Size" : 1}) emit(o_key, {"_id": o_key, "Name" : this.Object, "Size" : 1}) if (this.Label != "") { emit(c_key, {"_id": c_key, "Name" : this.Label, "Size" : 1}) } } `, Reduce: ` function(key, value_list) { out = {"_id": key, "Name": value_list[0].Name} count = 0 for (var i = 0; i < value_list.length; i++) { count = count + value_list[i].Size } out["Size"] = count return out } `, Out: outputTo, } qs.db.C("triples").Find(nil).MapReduce(&job, nil) glog.Infoln("Fixing") qs.db.Run(bson.D{{"eval", `function() { db.nodes.find().forEach(function (result) { db.nodes.update({"_id": result._id}, result.value) }) }`}, {"args", bson.D{}}}, nil) qs.session.SetSafe(&mgo.Safe{}) return nil }
func (ts *TripleStore) BulkLoad(t_chan chan *graph.Triple) bool { if ts.Size() != 0 { return false } ts.session.SetSafe(nil) for triple := range t_chan { ts.writeTriple(triple) } outputTo := bson.M{"replace": "nodes", "sharded": true} glog.Infoln("Mapreducing") job := mgo.MapReduce{ Map: `function() { var len = this["_id"].length var s_key = this["_id"].slice(0, len / 4) var p_key = this["_id"].slice(len / 4, 2 * len / 4) var o_key = this["_id"].slice(2 * len / 4, 3 * len / 4) var c_key = this["_id"].slice(3 * len / 4) emit(s_key, {"_id": s_key, "Name" : this.Subject, "Size" : 1}) emit(p_key, {"_id": p_key, "Name" : this.Predicate, "Size" : 1}) emit(o_key, {"_id": o_key, "Name" : this.Object, "Size" : 1}) if (this.Provenance != "") { emit(c_key, {"_id": c_key, "Name" : this.Provenance, "Size" : 1}) } } `, Reduce: ` function(key, value_list) { out = {"_id": key, "Name": value_list[0].Name} count = 0 for (var i = 0; i < value_list.length; i++) { count = count + value_list[i].Size } out["Size"] = count return out } `, Out: outputTo, } ts.db.C("triples").Find(nil).MapReduce(&job, nil) glog.Infoln("Fixing") ts.db.Run(bson.D{{"eval", `function() { db.nodes.find().forEach(function (result) { db.nodes.update({"_id": result._id}, result.value) }) }`}, {"args", bson.D{}}}, nil) ts.session.SetSafe(&mgo.Safe{}) return true }
func ReadNQuadsFromReader(c chan *graph.Triple, reader io.Reader) { bf := bufio.NewReader(reader) nTriples := 0 line := "" for { l, pre, err := bf.ReadLine() if err == io.EOF { break } if err != nil { glog.Fatalln("Something bad happened while reading file " + err.Error()) } line += string(l) if pre { continue } triple := Parse(line) line = "" if triple != nil { nTriples++ c <- triple } } glog.Infoln("Read", nTriples, "triples") close(c) }
func newTripleStore(path string, options graph.Options) (graph.TripleStore, error) { var ts TripleStore ts.path = path cache_size := DefaultCacheSize if val, ok := options.IntKey("cache_size_mb"); ok { cache_size = val } ts.dbOpts = &opt.Options{ BlockCache: cache.NewLRUCache(cache_size * opt.MiB), } ts.dbOpts.ErrorIfMissing = true write_buffer_mb := DefaultWriteBufferSize if val, ok := options.IntKey("write_buffer_mb"); ok { write_buffer_mb = val } ts.dbOpts.WriteBuffer = write_buffer_mb * opt.MiB ts.hasher = sha1.New() ts.writeopts = &opt.WriteOptions{ Sync: false, } ts.readopts = &opt.ReadOptions{} db, err := leveldb.OpenFile(ts.path, ts.dbOpts) if err != nil { panic("Error, couldn't open! " + err.Error()) } ts.db = db glog.Infoln(ts.GetStats()) ts.getSize() return &ts, nil }
func newTripleStore(path string, options graph.Options) (graph.TripleStore, error) { var qs TripleStore var err error qs.path = path cache_size := DefaultCacheSize if val, ok := options.IntKey("cache_size_mb"); ok { cache_size = val } qs.dbOpts = &opt.Options{ BlockCache: cache.NewLRUCache(cache_size * opt.MiB), } qs.dbOpts.ErrorIfMissing = true write_buffer_mb := DefaultWriteBufferSize if val, ok := options.IntKey("write_buffer_mb"); ok { write_buffer_mb = val } qs.dbOpts.WriteBuffer = write_buffer_mb * opt.MiB qs.writeopts = &opt.WriteOptions{ Sync: false, } qs.readopts = &opt.ReadOptions{} db, err := leveldb.OpenFile(qs.path, qs.dbOpts) if err != nil { glog.Errorln("Error, couldn't open! ", err) return nil, err } qs.db = db glog.Infoln(qs.GetStats()) err = qs.getMetadata() if err != nil { return nil, err } return &qs, nil }
func configFrom(file string) *config.Config { // Find the file... if file != "" { if _, err := os.Stat(file); os.IsNotExist(err) { glog.Fatalln("Cannot find specified configuration file", file, ", aborting.") } } else if _, err := os.Stat(os.Getenv("CAYLEY_CFG")); err == nil { file = os.Getenv("CAYLEY_CFG") } else if _, err := os.Stat("/etc/cayley.cfg"); err == nil { file = "/etc/cayley.cfg" } if file == "" { glog.Infoln("Couldn't find a config file in either $CAYLEY_CFG or /etc/cayley.cfg. Going by flag defaults only.") } cfg, err := config.Load(file) if err != nil { glog.Fatalln(err) } if cfg.DatabasePath == "" { cfg.DatabasePath = *databasePath } if cfg.DatabaseType == "" { cfg.DatabaseType = *databaseBackend } return cfg }
func ParseConfigFromFlagsAndFile(fileFlag string) *Config { // Find the file... var trueFilename string if fileFlag != "" { if _, err := os.Stat(fileFlag); os.IsNotExist(err) { glog.Fatalln("Cannot find specified configuration file", fileFlag, ", aborting.") } else { trueFilename = fileFlag } } else { if _, err := os.Stat(os.Getenv("CAYLEY_CFG")); err == nil { trueFilename = os.Getenv("CAYLEY_CFG") } else { if _, err := os.Stat("/etc/cayley.cfg"); err == nil { trueFilename = "/etc/cayley.cfg" } } } if trueFilename == "" { glog.Infoln("Couldn't find a config file in either $CAYLEY_CFG or /etc/cayley.cfg. Going by flag defaults only.") } config := ParseConfigFromFile(trueFilename) if config.DatabasePath == "" { config.DatabasePath = *databasePath } if config.DatabaseType == "" { config.DatabaseType = *databaseBackend } if config.ReplicationType == "" { config.ReplicationType = *replicationBackend } if config.ListenHost == "" { config.ListenHost = *host } if config.ListenPort == "" { config.ListenPort = *port } if config.Timeout == 0 { config.Timeout = *timeout } if config.LoadSize == 0 { config.LoadSize = *loadSize } config.ReadOnly = config.ReadOnly || *readOnly return config }
func (qs *TripleStore) ApplyDeltas(in []graph.Delta) error { qs.session.SetSafe(nil) ids := make(map[string]int) // Pre-check the existence condition. for _, d := range in { key := qs.getIdForQuad(d.Quad) switch d.Action { case graph.Add: if qs.checkValid(key) { return graph.ErrQuadExists } case graph.Delete: if !qs.checkValid(key) { return graph.ErrQuadNotExist } } } if glog.V(2) { glog.Infoln("Existence verified. Proceeding.") } for _, d := range in { err := qs.updateLog(d) if err != nil { return err } } for _, d := range in { err := qs.updateQuad(d.Quad, d.ID, d.Action) if err != nil { return err } var countdelta int if d.Action == graph.Add { countdelta = 1 } else { countdelta = -1 } ids[d.Quad.Subject] += countdelta ids[d.Quad.Object] += countdelta ids[d.Quad.Predicate] += countdelta if d.Quad.Label != "" { ids[d.Quad.Label] += countdelta } } for k, v := range ids { err := qs.updateNodeBy(k, v) if err != nil { return err } } qs.session.SetSafe(&mgo.Safe{}) return nil }
func main() { // No command? It's time for usage. if len(os.Args) == 1 { Usage() os.Exit(1) } cmd := os.Args[1] newargs := make([]string, 0) newargs = append(newargs, os.Args[0]) newargs = append(newargs, os.Args[2:]...) os.Args = newargs flag.Parse() var ts graph.TripleStore cfg := config.ParseConfigFromFlagsAndFile(*configFile) if os.Getenv("GOMAXPROCS") == "" { runtime.GOMAXPROCS(runtime.NumCPU()) glog.Infoln("Setting GOMAXPROCS to", runtime.NumCPU()) } else { glog.Infoln("GOMAXPROCS currently", os.Getenv("GOMAXPROCS"), " -- not adjusting") } switch cmd { case "init": db.Init(cfg, *tripleFile) case "load": ts = db.Open(cfg) db.Load(ts, cfg, *tripleFile, false) ts.Close() case "repl": ts = db.Open(cfg) db.Repl(ts, *queryLanguage, cfg) ts.Close() case "http": ts = db.Open(cfg) http.Serve(ts, cfg) ts.Close() default: fmt.Println("No command", cmd) flag.Usage() } }
func configFrom(file string) (*config.Config, error) { // Find the file... if file != "" { if _, err := os.Stat(file); os.IsNotExist(err) { return nil, fmt.Errorf("Cannot find specified configuration file", file) } } else if _, err := os.Stat("/cayley_appengine.cfg"); err == nil { file = "/cayley_appengine.cfg" } if file == "" { glog.Infoln("Couldn't find a config file appengine.cfg. Going by flag defaults only.") } cfg, err := config.Load(file) if err != nil { return nil, err } if cfg.DatabasePath == "" { cfg.DatabasePath = databasePath } if cfg.DatabaseType == "" { cfg.DatabaseType = databaseBackend } if cfg.ReplicationType == "" { cfg.ReplicationType = replicationBackend } if cfg.ListenHost == "" { cfg.ListenHost = host } if cfg.ListenPort == "" { cfg.ListenPort = port } if cfg.Timeout == 0 { cfg.Timeout = timeout } if cfg.LoadSize == 0 { cfg.LoadSize = loadSize } cfg.ReadOnly = cfg.ReadOnly || readOnly return cfg, nil }
func newQuadStore(path string, options graph.Options) (graph.QuadStore, error) { var qs QuadStore var err error qs.path = path cacheSize := DefaultCacheSize val, ok, err := options.IntKey("cache_size_mb") if err != nil { return nil, err } else if ok { cacheSize = val } qs.dbOpts = &opt.Options{ BlockCacheCapacity: cacheSize * opt.MiB, } qs.dbOpts.ErrorIfMissing = true writeBufferSize := DefaultWriteBufferSize val, ok, err = options.IntKey("writeBufferSize") if err != nil { return nil, err } else if ok { writeBufferSize = val } qs.dbOpts.WriteBuffer = writeBufferSize * opt.MiB qs.writeopts = &opt.WriteOptions{ Sync: false, } qs.readopts = &opt.ReadOptions{} db, err := leveldb.OpenFile(qs.path, qs.dbOpts) if err != nil { glog.Errorln("Error, could not open! ", err) return nil, err } qs.db = db glog.Infoln(qs.GetStats()) err = qs.getMetadata() if err != nil { return nil, err } return &qs, nil }
func (qs *QuadStore) ApplyDeltas(in []graph.Delta, ignoreOpts graph.IgnoreOpts) error { qs.session.SetSafe(nil) ids := make(map[string]int) // Pre-check the existence condition. for _, d := range in { if d.Action != graph.Add && d.Action != graph.Delete { return errors.New("mongo: invalid action") } key := qs.getIDForQuad(d.Quad) switch d.Action { case graph.Add: if qs.checkValid(key) { if ignoreOpts.IgnoreDup { continue } else { return graph.ErrQuadExists } } case graph.Delete: if !qs.checkValid(key) { if ignoreOpts.IgnoreMissing { continue } else { return graph.ErrQuadNotExist } } } } if glog.V(2) { glog.Infoln("Existence verified. Proceeding.") } for _, d := range in { err := qs.updateLog(d) if err != nil { return err } } for _, d := range in { err := qs.updateQuad(d.Quad, d.ID.Int(), d.Action) if err != nil { return err } var countdelta int if d.Action == graph.Add { countdelta = 1 } else { countdelta = -1 } ids[d.Quad.Subject] += countdelta ids[d.Quad.Object] += countdelta ids[d.Quad.Predicate] += countdelta if d.Quad.Label != "" { ids[d.Quad.Label] += countdelta } } for k, v := range ids { err := qs.updateNodeBy(k, v) if err != nil { return err } } qs.session.SetSafe(&mgo.Safe{}) return nil }
func main() { // No command? It's time for usage. if len(os.Args) == 1 { fmt.Fprintln(os.Stderr, "Cayley is a graph store and graph query layer.") usage() os.Exit(1) } cmd := os.Args[1] os.Args = append(os.Args[:1], os.Args[2:]...) flag.Parse() var buildString string if Version != "" { buildString = fmt.Sprint("Cayley ", Version, " built ", BuildDate) glog.Infoln(buildString) } cfg := configFrom(*configFile) if os.Getenv("GOMAXPROCS") == "" { runtime.GOMAXPROCS(runtime.NumCPU()) glog.Infoln("Setting GOMAXPROCS to", runtime.NumCPU()) } else { glog.Infoln("GOMAXPROCS currently", os.Getenv("GOMAXPROCS"), " -- not adjusting") } var ( handle *graph.Handle err error ) switch cmd { case "version": if Version != "" { fmt.Println(buildString) } else { fmt.Println("Cayley snapshot") } os.Exit(0) case "init": err = db.Init(cfg) if err != nil { break } if *quadFile != "" { handle, err = db.Open(cfg) if err != nil { break } err = internal.Load(handle.QuadWriter, cfg, *quadFile, *quadType) if err != nil { break } handle.Close() } case "load": handle, err = db.Open(cfg) if err != nil { break } err = internal.Load(handle.QuadWriter, cfg, *quadFile, *quadType) if err != nil { break } handle.Close() case "repl": handle, err = db.Open(cfg) if err != nil { break } if !graph.IsPersistent(cfg.DatabaseType) { err = internal.Load(handle.QuadWriter, cfg, "", *quadType) if err != nil { break } } err = db.Repl(handle, *queryLanguage, cfg) handle.Close() case "http": handle, err = db.Open(cfg) if err != nil { break } if !graph.IsPersistent(cfg.DatabaseType) { err = internal.Load(handle.QuadWriter, cfg, "", *quadType) if err != nil { break } } http.Serve(handle, cfg) handle.Close() default: fmt.Println("No command", cmd) usage() } if err != nil { glog.Errorln(err) } }
func main() { // No command? It's time for usage. if len(os.Args) == 1 { Usage() os.Exit(1) } cmd := os.Args[1] var newargs []string newargs = append(newargs, os.Args[0]) newargs = append(newargs, os.Args[2:]...) os.Args = newargs flag.Parse() var buildString string if VERSION != "" { buildString = fmt.Sprint("Cayley ", VERSION, " built ", BUILD_DATE) glog.Infoln(buildString) } cfg := config.ParseConfigFromFlagsAndFile(*configFile) if os.Getenv("GOMAXPROCS") == "" { runtime.GOMAXPROCS(runtime.NumCPU()) glog.Infoln("Setting GOMAXPROCS to", runtime.NumCPU()) } else { glog.Infoln("GOMAXPROCS currently", os.Getenv("GOMAXPROCS"), " -- not adjusting") } var ( ts graph.TripleStore err error ) switch cmd { case "version": if VERSION != "" { fmt.Println(buildString) } else { fmt.Println("Cayley snapshot") } os.Exit(0) case "init": err = db.Init(cfg, *tripleFile) case "load": ts, err = db.Open(cfg) if err != nil { break } err = db.Load(ts, cfg, *tripleFile) if err != nil { break } ts.Close() case "repl": ts, err = db.Open(cfg) if err != nil { break } err = db.Repl(ts, *queryLanguage, cfg) if err != nil { break } ts.Close() case "http": ts, err = db.Open(cfg) if err != nil { break } http.Serve(ts, cfg) ts.Close() default: fmt.Println("No command", cmd) flag.Usage() } if err != nil { glog.Fatalln(err) } }