// Example_roaring demonstrates how to use the roaring library. func Example_roaring() { // example inspired by https://github.com/fzandona/goroar fmt.Println("==roaring==") rb1 := roaring.BitmapOf(1, 2, 3, 4, 5, 100, 1000) fmt.Println(rb1.String()) rb2 := roaring.BitmapOf(3, 4, 1000) fmt.Println(rb2.String()) rb3 := roaring.NewBitmap() fmt.Println(rb3.String()) fmt.Println("Cardinality: ", rb1.GetCardinality()) fmt.Println("Contains 3? ", rb1.Contains(3)) rb1.And(rb2) rb3.Add(1) rb3.Add(5) rb3.Or(rb1) // prints 1, 3, 4, 5, 1000 i := rb3.Iterator() for i.HasNext() { fmt.Println(i.Next()) } fmt.Println() // next we include an example of serialization buf := new(bytes.Buffer) size, err := rb1.WriteTo(buf) if err != nil { fmt.Println("Failed writing") return } else { fmt.Println("Wrote ", size, " bytes") } newrb := roaring.NewBitmap() size, err = newrb.ReadFrom(buf) if err != nil { fmt.Println("Failed reading") return } if !rb1.Equals(newrb) { fmt.Println("I did not get back to original bitmap?") return } else { fmt.Println("I wrote the content to a byte stream and read it back.") } }
// removeTriple removes a triple from the indices. If the triple // contains any terms unique to that triple, they will also be removed. func (db *DB) removeTriple(tx *bolt.Tx, s, p, o uint32) error { // TODO think about what to do if present in one index but // not in another: maybe panic? Cause It's a bug that should be fixed. indices := []struct { k1 uint32 k2 uint32 v uint32 bk []byte }{ {s, p, o, bucketSPO}, {o, s, p, bucketOSP}, {p, o, s, bucketPOS}, } key := make([]byte, 8) for _, i := range indices { bkt := tx.Bucket(i.bk) copy(key, u32tob(i.k1)) copy(key[4:], u32tob(i.k2)) bo := bkt.Get(key) if bo == nil { // TODO should never happen, return bug error? return ErrNotFound } bitmap := roaring.NewBitmap() _, err := bitmap.ReadFrom(bytes.NewReader(bo)) if err != nil { return err } hasTriple := bitmap.CheckedRemove(i.v) if !hasTriple { // TODO should never happen, return bug error? return ErrNotFound } // Remove from index if bitmap is empty if bitmap.GetCardinality() == 0 { err = bkt.Delete(key) if err != nil { return err } } else { var b bytes.Buffer _, err = bitmap.WriteTo(&b) if err != nil { return err } err = bkt.Put(key, b.Bytes()) if err != nil { return err } } } //atomic.AddInt64(&db.numTr, -1) return db.removeOrphanedTerms(tx, s, p, o) }
func (b *baseBlockset) GetLiveINodes() *roaring.Bitmap { out := roaring.NewBitmap() for _, blk := range b.blocks { if blk.IsZero() { continue } out.Add(uint32(blk.INode)) } return out }
func (db *DB) forEach(fn func(rdf.Triple) error) error { return db.kv.View(func(tx *bolt.Tx) error { bkt := tx.Bucket(bucketSPO) // iterate over each each triple in SPO index if err := bkt.ForEach(func(k, v []byte) error { if len(k) != 8 { panic("len(SPO key) != 8") } sID := btou32(k[:4]) pID := btou32(k[4:]) var tr rdf.Triple var err error var term rdf.Term if term, err = db.getTerm(tx, sID); err != nil { return err } tr.Subj = term.(rdf.URI) if term, err = db.getTerm(tx, pID); err != nil { return err } tr.Pred = term.(rdf.URI) bitmap := roaring.NewBitmap() if _, err := bitmap.ReadFrom(bytes.NewReader(v)); err != nil { return err } // Iterate over each term in object bitmap i := bitmap.Iterator() for i.HasNext() { if tr.Obj, err = db.getTerm(tx, i.Next()); err != nil { return err } if err := fn(tr); err != nil { return err } } return nil }); err != nil { return err } return nil }) }
func (db *DB) storeTriple(tx *bolt.Tx, s, p, o uint32) error { indices := []struct { k1 uint32 k2 uint32 v uint32 bk []byte }{ {s, p, o, bucketSPO}, {o, s, p, bucketOSP}, {p, o, s, bucketPOS}, } key := make([]byte, 8) for _, i := range indices { bkt := tx.Bucket(i.bk) copy(key, u32tob(i.k1)) copy(key[4:], u32tob(i.k2)) bitmap := roaring.NewBitmap() bo := bkt.Get(key) if bo != nil { _, err := bitmap.ReadFrom(bytes.NewReader(bo)) if err != nil { return err } } newTriple := bitmap.CheckedAdd(i.v) if !newTriple { // Triple is allready stored return nil } var b bytes.Buffer _, err := bitmap.WriteTo(&b) if err != nil { return err } err = bkt.Put(key, b.Bytes()) if err != nil { return err } } //atomic.AddInt64(&db.numTr, 1) return nil }
// Has checks if the given Triple is stored. func (db *DB) Has(tr rdf.Triple) (exists bool, err error) { err = db.kv.View(func(tx *bolt.Tx) error { sID, err := db.getID(tx, tr.Subj) if err == ErrNotFound { return nil } else if err != nil { return err } // TODO get pID from cache, and move to top before sID pID, err := db.getID(tx, tr.Pred) if err == ErrNotFound { return nil } else if err != nil { return err } oID, err := db.getID(tx, tr.Obj) if err == ErrNotFound { return nil } else if err != nil { return err } bkt := tx.Bucket(bucketSPO) sp := make([]byte, 8) copy(sp, u32tob(sID)) copy(sp[4:], u32tob(pID)) bitmap := roaring.NewBitmap() bo := bkt.Get(sp) if bo == nil { return nil } _, err = bitmap.ReadFrom(bytes.NewReader(bo)) if err != nil { return err } exists = bitmap.Contains(oID) return nil }) return exists, err }
func (me *Bitmap) lazyRB() *roaring.Bitmap { if me.rb == nil { me.rb = roaring.NewBitmap() } return me.rb }
// Dump writes the entire database as a Turtle serialization to the given writer. func (db *DB) Dump(to io.Writer) error { // TODO getTerm without expanding base URI? // base is prefixed added, but then stripped again here w := bufio.NewWriter(to) w.WriteString("@base <") w.WriteString(db.base) w.WriteString(">") return db.kv.View(func(tx *bolt.Tx) error { defer w.Flush() var curSubj uint32 var subj, pred, obj rdf.Term bkt := tx.Bucket(bucketSPO) if err := bkt.ForEach(func(k, v []byte) error { if len(k) != 8 { panic("len(SPO key) != 8") } var err error sID := btou32(k[:4]) if sID != curSubj { // end previous statement w.WriteString(" .\n") curSubj = sID if subj, err = db.getTerm(tx, sID); err != nil { return err } w.WriteRune('<') w.WriteString(strings.TrimPrefix(subj.String(), db.base)) w.WriteString("> ") } else { // continue with same subject w.WriteString(" ;\n\t") } pID := btou32(k[4:]) if pred, err = db.getTerm(tx, pID); err != nil { return err } if pred == rdf.RDFtype { w.WriteString("a ") } else { w.WriteRune('<') w.WriteString(strings.TrimPrefix(pred.String(), db.base)) w.WriteString("> ") } bitmap := roaring.NewBitmap() if _, err := bitmap.ReadFrom(bytes.NewReader(v)); err != nil { return err } i := bitmap.Iterator() c := 0 for i.HasNext() { if obj, err = db.getTerm(tx, i.Next()); err != nil { return err } if c > 0 { w.WriteString(", ") } switch t := obj.(type) { case rdf.URI: w.WriteRune('<') w.WriteString(strings.TrimPrefix(obj.String(), db.base)) w.WriteRune('>') case rdf.Literal: // TODO bench & optimize switch t.DataType() { case rdf.RDFlangString: fmt.Fprintf(w, "%q@%s", t.String(), t.Lang()) case rdf.XSDstring: fmt.Fprintf(w, "%q", t.String()) default: fmt.Fprintf(w, "%q^^<%s>", t.String(), strings.TrimPrefix(t.DataType().String(), db.base)) } } c++ } return nil }); err != nil { return err } w.WriteString(" .\n") return nil }) }
// Describe returns a graph with all the triples where the given node // is subject. If asObject is true, it also includes the triples where // the node is object. func (db *DB) Describe(node rdf.URI, asObject bool) (*rdf.Graph, error) { g := rdf.NewGraph() err := db.kv.View(func(tx *bolt.Tx) error { bkt := tx.Bucket(bucketIdxTerms) bt := db.encode(node) bs := bkt.Get(bt) if bs == nil { return nil } // seek in SPO index: // WHERE { <node> ?p ?o } sid := btou32(bs) cur := tx.Bucket(bucketSPO).Cursor() outerSPO: for k, v := cur.Seek(u32tob(sid - 1)); k != nil; k, v = cur.Next() { switch bytes.Compare(k[:4], bs) { case 0: bkt = tx.Bucket(bucketTerms) b := bkt.Get(k[4:]) if b == nil { return errors.New("bug: term ID in index, but not stored") } // TODO get pred from cache pred, err := db.decode(b) if err != nil { return err } bitmap := roaring.NewBitmap() _, err = bitmap.ReadFrom(bytes.NewReader(v)) if err != nil { return err } it := bitmap.Iterator() for it.HasNext() { o := it.Next() b = bkt.Get(u32tob(o)) if b == nil { return errors.New("bug: term ID in index, but not stored") } obj, err := db.decode(b) if err != nil { return err } g.Insert(rdf.Triple{Subj: node, Pred: pred.(rdf.URI), Obj: obj}) } case 1: break outerSPO } } if !asObject { return nil } // seek in OSP index: // WHERE { ?s ?p <node> } cur = tx.Bucket(bucketOSP).Cursor() outerOSP: for k, v := cur.Seek(u32tob(sid - 1)); k != nil; k, v = cur.Next() { switch bytes.Compare(k[:4], bs) { case 0: bkt = tx.Bucket(bucketTerms) b := bkt.Get(k[4:]) if b == nil { return errors.New("bug: term ID in index, but not stored") } subj, err := db.decode(b) if err != nil { return err } bitmap := roaring.NewBitmap() _, err = bitmap.ReadFrom(bytes.NewReader(v)) if err != nil { return err } it := bitmap.Iterator() for it.HasNext() { o := it.Next() b = bkt.Get(u32tob(o)) if b == nil { return errors.New("bug: term ID in index, but not stored") } // TODO get pred from cache pred, err := db.decode(b) if err != nil { return err } g.Insert(rdf.Triple{Subj: subj.(rdf.URI), Pred: pred.(rdf.URI), Obj: node}) } case 1: break outerOSP } } return nil }) return g, err }