func (qs *TripleStore) RemoveTriple(t quad.Quad) { _, err := qs.db.Get(qs.createKeyFor(spo, t), qs.readopts) if err != nil && err != leveldb.ErrNotFound { glog.Error("Couldn't access DB to confirm deletion") return } if err == leveldb.ErrNotFound { // No such triple in the database, forget about it. return } batch := &leveldb.Batch{} batch.Delete(qs.createKeyFor(spo, t)) batch.Delete(qs.createKeyFor(osp, t)) batch.Delete(qs.createKeyFor(pos, t)) qs.UpdateValueKeyBy(t.Get(quad.Subject), -1, batch) qs.UpdateValueKeyBy(t.Get(quad.Predicate), -1, batch) qs.UpdateValueKeyBy(t.Get(quad.Object), -1, batch) if t.Get(quad.Label) != "" { batch.Delete(qs.createProvKeyFor(pso, t)) qs.UpdateValueKeyBy(t.Get(quad.Label), -1, batch) } err = qs.db.Write(batch, nil) if err != nil { glog.Errorf("Couldn't delete triple %s.", t) return } qs.size-- }
func main() { // Writer of link between entities and sentences. rdfWriter := gzip.NewWriter(os.Stdout) defer func() { rdfWriter.Flush() rdfWriter.Close() }() // Read ASSIGNMENT.csv r := csv.NewReader(os.Stdin) line := 0 for { record, err := r.Read() line++ if err == io.EOF { break } else if err != nil { fmt.Fprintf(os.Stderr, "Error line %d: %v\n", line, err) continue } if line%10000 == 0 { fmt.Fprintf(os.Stderr, "Line %d\n", line) } // Link to entity. quad := quad.Quad{ Subject: "<" + record[1] + ">", Predicate: "<http://rdf.romainstrock.com/ns/sentence.is_about>", Object: "<" + record[3] + ">", } fmt.Fprintln(rdfWriter, quad.NQuad()) } }
func (qs *QuadStore) indexOf(t quad.Quad) (int64, bool) { min := maxInt var tree *b.Tree for d := quad.Subject; d <= quad.Label; d++ { sid := t.Get(d) if d == quad.Label && sid == "" { continue } id, ok := qs.idMap[sid] // If we've never heard about a node, it must not exist if !ok { return 0, false } index, ok := qs.index.Get(d, id) if !ok { // If it's never been indexed in this direction, it can't exist. return 0, false } if l := index.Len(); l < min { min, tree = l, index } } it := NewIterator(tree, "", qs) for it.Next() { val := it.Result() if t == qs.log[val.(int64)].Quad { return val.(int64), true } } return 0, false }
func (ts *TripleStore) tripleExists(t quad.Quad) (bool, int64) { smallest := -1 var smallest_tree *llrb.LLRB for d := quad.Subject; d <= quad.Label; d++ { sid := t.Get(d) if d == quad.Label && sid == "" { continue } id, ok := ts.idMap[sid] // If we've never heard about a node, it most not exist if !ok { return false, 0 } index, exists := ts.index.Get(d, id) if !exists { // If it's never been indexed in this direction, it can't exist. return false, 0 } if smallest == -1 || index.Len() < smallest { smallest = index.Len() smallest_tree = index } } it := NewLlrbIterator(smallest_tree, "") for it.Next() { val := it.Result() if t == ts.triples[val.(int64)] { return true, val.(int64) } } return false, 0 }
func (qs *QuadStore) createKeyFor(d [4]quad.Direction, triple quad.Quad) []byte { key := make([]byte, 0, (hashSize * 4)) key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...) key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...) key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...) key = append(key, qs.convertStringToByteHash(triple.Get(d[3]))...) return key }
func (qs *QuadStore) createKeyFor(d [4]quad.Direction, q quad.Quad) []byte { key := make([]byte, quad.HashSize*4) quad.HashTo(q.Get(d[0]), key[quad.HashSize*0:quad.HashSize*1]) quad.HashTo(q.Get(d[1]), key[quad.HashSize*1:quad.HashSize*2]) quad.HashTo(q.Get(d[2]), key[quad.HashSize*2:quad.HashSize*3]) quad.HashTo(q.Get(d[3]), key[quad.HashSize*3:quad.HashSize*4]) return key }
func (qs *QuadStore) createKeyFor(d [4]quad.Direction, q quad.Quad) []byte { key := make([]byte, 0, (hashSize * 4)) key = append(key, hashOf(q.Get(d[0]))...) key = append(key, hashOf(q.Get(d[1]))...) key = append(key, hashOf(q.Get(d[2]))...) key = append(key, hashOf(q.Get(d[3]))...) return key }
func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byte { key := make([]byte, 0, 2+(qs.hasher.Size()*3)) // TODO(kortschak) Remove dependence on String() method. key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...) key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...) key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...) key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...) return key }
func (qs *TripleStore) buildWrite(batch *leveldb.Batch, t quad.Quad) { qs.buildTripleWrite(batch, t) qs.UpdateValueKeyBy(t.Get(quad.Subject), 1, nil) qs.UpdateValueKeyBy(t.Get(quad.Predicate), 1, nil) qs.UpdateValueKeyBy(t.Get(quad.Object), 1, nil) if t.Get(quad.Label) != "" { qs.UpdateValueKeyBy(t.Get(quad.Label), 1, nil) } }
func createKeyFor(d [4]quad.Direction, q quad.Quad) []byte { key := make([]byte, 2+(quad.HashSize*4)) key[0] = d[0].Prefix() key[1] = d[1].Prefix() quad.HashTo(q.Get(d[0]), key[2+quad.HashSize*0:2+quad.HashSize*1]) quad.HashTo(q.Get(d[1]), key[2+quad.HashSize*1:2+quad.HashSize*2]) quad.HashTo(q.Get(d[2]), key[2+quad.HashSize*2:2+quad.HashSize*3]) quad.HashTo(q.Get(d[3]), key[2+quad.HashSize*3:2+quad.HashSize*4]) return key }
func (qs *QuadStore) createKeyFor(d [4]quad.Direction, q quad.Quad) []byte { key := make([]byte, 0, 2+(hashSize*4)) // TODO(kortschak) Remove dependence on String() method. key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...) key = append(key, hashOf(q.Get(d[0]))...) key = append(key, hashOf(q.Get(d[1]))...) key = append(key, hashOf(q.Get(d[2]))...) key = append(key, hashOf(q.Get(d[3]))...) return key }
func (l *SQLLinkIterator) buildResult(result []string, cols []string) map[string]string { var q quad.Quad q.Subject = result[0] q.Predicate = result[1] q.Object = result[2] q.Label = result[3] l.resultQuad = q m := make(map[string]string) for i, c := range cols[4:] { m[c] = result[i+4] } return m }
func (qs *TripleStore) buildTripleWrite(batch *leveldb.Batch, t quad.Quad) { bytes, err := json.Marshal(t) if err != nil { glog.Errorf("Couldn't write to buffer for triple %s: %s", t, err) return } batch.Put(qs.createKeyFor(spo, t), bytes) batch.Put(qs.createKeyFor(osp, t), bytes) batch.Put(qs.createKeyFor(pos, t), bytes) if t.Get(quad.Label) != "" { batch.Put(qs.createProvKeyFor(pso, t), bytes) } }
func main() { // Writer of link between entities and sentences. rdfWriter := gzip.NewWriter(os.Stdout) defer func() { rdfWriter.Flush() rdfWriter.Close() }() // Read SENTENCES.csv r := csv.NewReader(os.Stdin) line := 0 for { record, err := r.Read() line++ if err == io.EOF { break } else if err != nil { fmt.Fprintf(os.Stderr, "Error line %d: %v\n", line, err) continue } if line%10000 == 0 { fmt.Fprintf(os.Stderr, "Line %d\n", line) } // Speaker. quad_1 := quad.Quad{ Subject: "<" + record[0] + ">", Predicate: "<http://rdf.romainstrock.com/ns/sentence.speaker>", Object: "<" + record[3] + ">", } fmt.Fprintln(rdfWriter, quad_1.NQuad()) // Sentence text. text := record[6] quad_2 := quad.Quad{ Subject: "<" + record[0] + ">", Predicate: "<http://rdf.romainstrock.com/ns/sentence.text>", Object: strconv.Quote(text), } fmt.Fprintln(rdfWriter, quad_2.NQuad()) // Source URL. quad_3 := quad.Quad{ Subject: "<" + record[0] + ">", Predicate: "<http://rdf.romainstrock.com/ns/sentence.source_url>", Object: "<" + record[5] + ">", } fmt.Fprintln(rdfWriter, quad_3.NQuad()) } }
func (qs *QuadStore) buildQuadWrite(batch *leveldb.Batch, q quad.Quad, id int64, isAdd bool) error { var entry proto.HistoryEntry data, err := qs.db.Get(createKeyFor(spo, q), qs.readopts) if err != nil && err != leveldb.ErrNotFound { glog.Error("could not access DB to prepare index: ", err) return err } if data != nil { // We got something. err = entry.Unmarshal(data) if err != nil { return err } } if isAdd && len(entry.History)%2 == 1 { if glog.V(2) { glog.Errorf("attempt to add existing quad %v: %#v", entry, q) } return graph.ErrQuadExists } if !isAdd && len(entry.History)%2 == 0 { if glog.V(2) { glog.Errorf("attempt to delete non-existent quad %v: %#c", entry, q) } return graph.ErrQuadNotExist } entry.History = append(entry.History, uint64(id)) bytes, err := entry.Marshal() if err != nil { glog.Errorf("could not write to buffer for entry %#v: %s", entry, err) return err } batch.Put(createKeyFor(spo, q), bytes) batch.Put(createKeyFor(osp, q), bytes) batch.Put(createKeyFor(pos, q), bytes) if q.Get(quad.Label) != nil { batch.Put(createKeyFor(cps, q), bytes) } return nil }
func (qs *QuadStore) buildQuadWrite(tx *bolt.Tx, q quad.Quad, id int64, isAdd bool) error { var entry proto.HistoryEntry b := tx.Bucket(spoBucket) b.FillPercent = localFillPercent data := b.Get(qs.createKeyFor(spo, q)) if data != nil { // We got something. err := entry.Unmarshal(data) if err != nil { return err } } if isAdd && len(entry.History)%2 == 1 { glog.Errorf("attempt to add existing quad %v: %#v", entry, q) return graph.ErrQuadExists } if !isAdd && len(entry.History)%2 == 0 { glog.Errorf("attempt to delete non-existent quad %v: %#v", entry, q) return graph.ErrQuadNotExist } entry.History = append(entry.History, uint64(id)) bytes, err := entry.Marshal() if err != nil { glog.Errorf("Couldn't write to buffer for entry %#v: %s", entry, err) return err } for _, index := range [][4]quad.Direction{spo, osp, pos, cps} { if index == cps && q.Get(quad.Label) == nil { continue } b := tx.Bucket(bucketFor(index)) b.FillPercent = localFillPercent err = b.Put(qs.createKeyFor(index, q), bytes) if err != nil { return err } } return nil }
func (qs *QuadStore) buildQuadWrite(tx *bolt.Tx, q quad.Quad, id int64, isAdd bool) error { var entry IndexEntry b := tx.Bucket(spoBucket) b.FillPercent = localFillPercent data := b.Get(qs.createKeyFor(spo, q)) if data != nil { // We got something. err := json.Unmarshal(data, &entry) if err != nil { return err } } if isAdd && len(entry.History)%2 == 1 { glog.Error("Adding a valid quad ", entry) return graph.ErrQuadExists } if !isAdd && len(entry.History)%2 == 0 { glog.Error("Deleting an invalid quad ", entry) return graph.ErrQuadNotExist } entry.History = append(entry.History, id) jsonbytes, err := json.Marshal(entry) if err != nil { glog.Errorf("Couldn't write to buffer for entry %#v: %s", entry, err) return err } for _, index := range [][4]quad.Direction{spo, osp, pos, cps} { if index == cps && q.Get(quad.Label) == "" { continue } b := tx.Bucket(bucketFor(index)) b.FillPercent = localFillPercent err = b.Put(qs.createKeyFor(index, q), jsonbytes) if err != nil { return err } } return nil }
func (ts *TripleStore) AddTriple(t quad.Quad) { if exists, _ := ts.tripleExists(t); exists { return } var tripleID int64 ts.triples = append(ts.triples, t) tripleID = ts.tripleIdCounter ts.size++ ts.tripleIdCounter++ for d := quad.Subject; d <= quad.Label; d++ { sid := t.Get(d) if d == quad.Label && sid == "" { continue } if _, ok := ts.idMap[sid]; !ok { ts.idMap[sid] = ts.idCounter ts.revIdMap[ts.idCounter] = sid ts.idCounter++ } } for d := quad.Subject; d <= quad.Label; d++ { if d == quad.Label && t.Get(d) == "" { continue } id := ts.idMap[t.Get(d)] tree := ts.index.GetOrCreate(d, id) tree.ReplaceOrInsert(Int64(tripleID)) } // TODO(barakmich): Add VIP indexing }
func (qs *QuadStore) buildQuadWriteLMDB(tx *lmdb.Txn, q quad.Quad, id int64, isAdd bool) error { var entry proto.HistoryEntry dbi := qs.dbis[spoDB] data, err := tx.Get(dbi, qs.createKeyFor(spo, q)) if err == nil { // We got something. err := entry.Unmarshal(data) if err != nil { return err } } if isAdd && len(entry.History)%2 == 1 { glog.Errorf("attempt to add existing quad %v: %#v", entry, q) return graph.ErrQuadExists } if !isAdd && len(entry.History)%2 == 0 { glog.Errorf("attempt to delete non-existent quad %v: %#v", entry, q) return graph.ErrQuadNotExist } entry.History = append(entry.History, uint64(id)) bytes, err := entry.Marshal() if err != nil { glog.Errorf("Couldn't write to buffer for entry %#v: %s", entry, err) return err } for _, index := range [][4]quad.Direction{spo, osp, pos, cps} { if index == cps && q.Get(quad.Label) == "" { continue } dbi = qs.dbis[dbFor(index)] err = tx.Put(dbi, qs.createKeyFor(index, q), bytes, 0) if err != nil { return err } } return nil }
func (qs *QuadStore) indexOf(t quad.Quad) (int64, bool) { min := maxInt var tree *Tree for d := quad.Subject; d <= quad.Label; d++ { sid := t.Get(d) if d == quad.Label && sid == nil { continue } qs.idmu.RLock() id, ok := qs.idMap[quad.StringOf(sid)] qs.idmu.RUnlock() // If we've never heard about a node, it must not exist if !ok { return 0, false } index, ok := qs.index.Get(d, id) if !ok { // If it's never been indexed in this direction, it can't exist. return 0, false } if l := index.Len(); l < min { min, tree = l, index } } it := NewIterator(tree, qs, 0, nil) for it.Next() { qs.logmu.RLock() l := qs.log[it.result] qs.logmu.RUnlock() if t == l.Quad { return it.result, true } } return 0, false }
func (qs *TripleStore) buildQuadWrite(batch *leveldb.Batch, q quad.Quad, id int64, isAdd bool) error { var entry IndexEntry data, err := qs.db.Get(qs.createKeyFor(spo, q), qs.readopts) if err != nil && err != leveldb.ErrNotFound { glog.Error("Couldn't access DB to prepare index: ", err) return err } if err == nil { // We got something. err = json.Unmarshal(data, &entry) if err != nil { return err } } else { entry.Quad = q } entry.History = append(entry.History, id) if isAdd && len(entry.History)%2 == 0 { glog.Error("Entry History is out of sync for", entry) return errors.New("Odd index history") } bytes, err := json.Marshal(entry) if err != nil { glog.Errorf("Couldn't write to buffer for entry %#v: %s", entry, err) return err } batch.Put(qs.createKeyFor(spo, q), bytes) batch.Put(qs.createKeyFor(osp, q), bytes) batch.Put(qs.createKeyFor(pos, q), bytes) if q.Get(quad.Label) != "" { batch.Put(qs.createKeyFor(cps, q), bytes) } return nil }
func (ts *TripleStore) RemoveTriple(t quad.Quad) { var tripleID int64 var exists bool tripleID = 0 if exists, tripleID = ts.tripleExists(t); !exists { return } ts.triples[tripleID] = quad.Quad{} ts.size-- for d := quad.Subject; d <= quad.Label; d++ { if d == quad.Label && t.Get(d) == "" { continue } id := ts.idMap[t.Get(d)] tree := ts.index.GetOrCreate(d, id) tree.Delete(Int64(tripleID)) } for d := quad.Subject; d <= quad.Label; d++ { if d == quad.Label && t.Get(d) == "" { continue } id, ok := ts.idMap[t.Get(d)] if !ok { continue } stillExists := false for d := quad.Subject; d <= quad.Label; d++ { if d == quad.Label && t.Get(d) == "" { continue } nodeTree := ts.index.GetOrCreate(d, id) if nodeTree.Len() != 0 { stillExists = true break } } if !stillExists { delete(ts.idMap, t.Get(d)) delete(ts.revIdMap, id) } } }