Esempio n. 1
0
File: db.go Progetto: boutros/sopp
// Import imports triples from an Turtle stream, in batches of given size.
// It will ignore triples with blank nodes and errors.
// It returns the total number of triples imported.
func (db *DB) Import(r io.Reader, batchSize int) (int, error) {
	dec := rdf.NewDecoder(r)
	g := rdf.NewGraph()
	c := 0 // totalt count
	i := 0 // current batch count
	for tr, err := dec.Decode(); err != io.EOF; tr, err = dec.Decode() {
		if err != nil {
			// log.Println(err.Error())
			continue
		}
		g.Insert(tr)
		i++
		if i == batchSize {
			err = db.ImportGraph(g)
			if err != nil {
				return c, err
			}
			c += i
			i = 0
			g = rdf.NewGraph()
		}
	}
	if len(g.Nodes()) > 0 {
		err := db.ImportGraph(g)
		if err != nil {
			return c, err
		}
		c += i
	}
	return c, nil
}
Esempio n. 2
0
func (t testdata) Graph() *rdf.Graph {
	g := rdf.NewGraph()
	for _, item := range t {
		g.Insert(item.Triple)
	}
	return g
}
Esempio n. 3
0
// Verify that Describe returns the same graph as rdf rdf.Graph reference implementation.
func TestDescribe_Quick(t *testing.T) {
	f := func(items testdata) bool {
		db := newTestDB()
		defer db.Close()

		// test against in-memory reference implementation
		ref := rdf.NewGraph()

		for _, item := range items {
			if err := db.Insert(item.Triple); err != nil {
				t.Logf("DB.Insert(%v) failed: %v", item.Triple, err)
				t.FailNow()
			}
			ref.Insert(item.Triple)
		}

		for _, item := range items {
			want := ref.Describe(item.Triple.Subj, false)

			got, err := db.Describe(item.Triple.Subj, false)
			if err != nil {
				t.Logf("DB.Describe(%v, false) failed: %v", item.Triple.Subj, err)
				t.FailNow()
			}

			if !got.Eq(want) {
				t.Logf("DB.Describe(%v, false) =>\n%s\nwant:\n%s",
					item.Triple.Subj, got.Serialize(rdf.Turtle, ""), want.Serialize(rdf.Turtle, ""))
				t.FailNow()
			}

			want = ref.Describe(item.Triple.Subj, true)
			got, err = db.Describe(item.Triple.Subj, true)
			if err != nil {
				t.Logf("DB.Describe(%v, true) failed: %v", item.Triple.Subj, err)
				t.FailNow()
			}

			if !got.Eq(want) {
				t.Logf("DB.Describe(%v, true) =>\n%s\nwant:\n%s",
					item.Triple.Subj, got.Serialize(rdf.Turtle, ""), want.Serialize(rdf.Turtle, ""))
				t.FailNow()
			}
		}

		print(".")
		return true
	}
	if err := quick.Check(f, qconfig()); err != nil {
		t.Error(err)
	}
}
Esempio n. 4
0
File: db.go Progetto: boutros/sopp
// Describe returns a graph with all the triples where the given node
// is subject. If asObject is true, it also includes the triples where
// the node is object.
func (db *DB) Describe(node rdf.URI, asObject bool) (*rdf.Graph, error) {
	g := rdf.NewGraph()
	err := db.kv.View(func(tx *bolt.Tx) error {
		bkt := tx.Bucket(bucketIdxTerms)
		bt := db.encode(node)
		bs := bkt.Get(bt)
		if bs == nil {
			return nil
		}
		// seek in SPO index:
		// WHERE { <node> ?p ?o }
		sid := btou32(bs)
		cur := tx.Bucket(bucketSPO).Cursor()
	outerSPO:
		for k, v := cur.Seek(u32tob(sid - 1)); k != nil; k, v = cur.Next() {
			switch bytes.Compare(k[:4], bs) {
			case 0:
				bkt = tx.Bucket(bucketTerms)
				b := bkt.Get(k[4:])
				if b == nil {
					return errors.New("bug: term ID in index, but not stored")
				}

				// TODO get pred from cache
				pred, err := db.decode(b)
				if err != nil {
					return err
				}
				bitmap := roaring.NewBitmap()
				_, err = bitmap.ReadFrom(bytes.NewReader(v))
				if err != nil {
					return err
				}
				it := bitmap.Iterator()
				for it.HasNext() {
					o := it.Next()
					b = bkt.Get(u32tob(o))
					if b == nil {
						return errors.New("bug: term ID in index, but not stored")
					}

					obj, err := db.decode(b)
					if err != nil {
						return err
					}
					g.Insert(rdf.Triple{Subj: node, Pred: pred.(rdf.URI), Obj: obj})
				}
			case 1:
				break outerSPO
			}
		}

		if !asObject {
			return nil
		}
		// seek in OSP index:
		// WHERE { ?s ?p <node> }
		cur = tx.Bucket(bucketOSP).Cursor()
	outerOSP:
		for k, v := cur.Seek(u32tob(sid - 1)); k != nil; k, v = cur.Next() {
			switch bytes.Compare(k[:4], bs) {
			case 0:
				bkt = tx.Bucket(bucketTerms)
				b := bkt.Get(k[4:])
				if b == nil {
					return errors.New("bug: term ID in index, but not stored")
				}
				subj, err := db.decode(b)
				if err != nil {
					return err
				}
				bitmap := roaring.NewBitmap()
				_, err = bitmap.ReadFrom(bytes.NewReader(v))
				if err != nil {
					return err
				}
				it := bitmap.Iterator()
				for it.HasNext() {
					o := it.Next()
					b = bkt.Get(u32tob(o))
					if b == nil {
						return errors.New("bug: term ID in index, but not stored")
					}
					// TODO get pred from cache
					pred, err := db.decode(b)
					if err != nil {
						return err
					}
					g.Insert(rdf.Triple{Subj: subj.(rdf.URI), Pred: pred.(rdf.URI), Obj: node})
				}
			case 1:
				break outerOSP
			}
		}

		return nil
	})

	return g, err
}