// Import imports triples from an Turtle stream, in batches of given size. // It will ignore triples with blank nodes and errors. // It returns the total number of triples imported. func (db *DB) Import(r io.Reader, batchSize int) (int, error) { dec := rdf.NewDecoder(r) g := rdf.NewGraph() c := 0 // totalt count i := 0 // current batch count for tr, err := dec.Decode(); err != io.EOF; tr, err = dec.Decode() { if err != nil { // log.Println(err.Error()) continue } g.Insert(tr) i++ if i == batchSize { err = db.ImportGraph(g) if err != nil { return c, err } c += i i = 0 g = rdf.NewGraph() } } if len(g.Nodes()) > 0 { err := db.ImportGraph(g) if err != nil { return c, err } c += i } return c, nil }
func (t testdata) Graph() *rdf.Graph { g := rdf.NewGraph() for _, item := range t { g.Insert(item.Triple) } return g }
// Verify that Describe returns the same graph as rdf rdf.Graph reference implementation. func TestDescribe_Quick(t *testing.T) { f := func(items testdata) bool { db := newTestDB() defer db.Close() // test against in-memory reference implementation ref := rdf.NewGraph() for _, item := range items { if err := db.Insert(item.Triple); err != nil { t.Logf("DB.Insert(%v) failed: %v", item.Triple, err) t.FailNow() } ref.Insert(item.Triple) } for _, item := range items { want := ref.Describe(item.Triple.Subj, false) got, err := db.Describe(item.Triple.Subj, false) if err != nil { t.Logf("DB.Describe(%v, false) failed: %v", item.Triple.Subj, err) t.FailNow() } if !got.Eq(want) { t.Logf("DB.Describe(%v, false) =>\n%s\nwant:\n%s", item.Triple.Subj, got.Serialize(rdf.Turtle, ""), want.Serialize(rdf.Turtle, "")) t.FailNow() } want = ref.Describe(item.Triple.Subj, true) got, err = db.Describe(item.Triple.Subj, true) if err != nil { t.Logf("DB.Describe(%v, true) failed: %v", item.Triple.Subj, err) t.FailNow() } if !got.Eq(want) { t.Logf("DB.Describe(%v, true) =>\n%s\nwant:\n%s", item.Triple.Subj, got.Serialize(rdf.Turtle, ""), want.Serialize(rdf.Turtle, "")) t.FailNow() } } print(".") return true } if err := quick.Check(f, qconfig()); err != nil { t.Error(err) } }
// Describe returns a graph with all the triples where the given node // is subject. If asObject is true, it also includes the triples where // the node is object. func (db *DB) Describe(node rdf.URI, asObject bool) (*rdf.Graph, error) { g := rdf.NewGraph() err := db.kv.View(func(tx *bolt.Tx) error { bkt := tx.Bucket(bucketIdxTerms) bt := db.encode(node) bs := bkt.Get(bt) if bs == nil { return nil } // seek in SPO index: // WHERE { <node> ?p ?o } sid := btou32(bs) cur := tx.Bucket(bucketSPO).Cursor() outerSPO: for k, v := cur.Seek(u32tob(sid - 1)); k != nil; k, v = cur.Next() { switch bytes.Compare(k[:4], bs) { case 0: bkt = tx.Bucket(bucketTerms) b := bkt.Get(k[4:]) if b == nil { return errors.New("bug: term ID in index, but not stored") } // TODO get pred from cache pred, err := db.decode(b) if err != nil { return err } bitmap := roaring.NewBitmap() _, err = bitmap.ReadFrom(bytes.NewReader(v)) if err != nil { return err } it := bitmap.Iterator() for it.HasNext() { o := it.Next() b = bkt.Get(u32tob(o)) if b == nil { return errors.New("bug: term ID in index, but not stored") } obj, err := db.decode(b) if err != nil { return err } g.Insert(rdf.Triple{Subj: node, Pred: pred.(rdf.URI), Obj: obj}) } case 1: break outerSPO } } if !asObject { return nil } // seek in OSP index: // WHERE { ?s ?p <node> } cur = tx.Bucket(bucketOSP).Cursor() outerOSP: for k, v := cur.Seek(u32tob(sid - 1)); k != nil; k, v = cur.Next() { switch bytes.Compare(k[:4], bs) { case 0: bkt = tx.Bucket(bucketTerms) b := bkt.Get(k[4:]) if b == nil { return errors.New("bug: term ID in index, but not stored") } subj, err := db.decode(b) if err != nil { return err } bitmap := roaring.NewBitmap() _, err = bitmap.ReadFrom(bytes.NewReader(v)) if err != nil { return err } it := bitmap.Iterator() for it.HasNext() { o := it.Next() b = bkt.Get(u32tob(o)) if b == nil { return errors.New("bug: term ID in index, but not stored") } // TODO get pred from cache pred, err := db.decode(b) if err != nil { return err } g.Insert(rdf.Triple{Subj: subj.(rdf.URI), Pred: pred.(rdf.URI), Obj: node}) } case 1: break outerOSP } } return nil }) return g, err }