Beispiel #1
0
func (qs *TripleStore) RemoveTriple(t quad.Quad) {
	_, err := qs.db.Get(qs.createKeyFor(spo, t), qs.readopts)
	if err != nil && err != leveldb.ErrNotFound {
		glog.Error("Couldn't access DB to confirm deletion")
		return
	}
	if err == leveldb.ErrNotFound {
		// No such triple in the database, forget about it.
		return
	}
	batch := &leveldb.Batch{}
	batch.Delete(qs.createKeyFor(spo, t))
	batch.Delete(qs.createKeyFor(osp, t))
	batch.Delete(qs.createKeyFor(pos, t))
	qs.UpdateValueKeyBy(t.Get(quad.Subject), -1, batch)
	qs.UpdateValueKeyBy(t.Get(quad.Predicate), -1, batch)
	qs.UpdateValueKeyBy(t.Get(quad.Object), -1, batch)
	if t.Get(quad.Label) != "" {
		batch.Delete(qs.createProvKeyFor(pso, t))
		qs.UpdateValueKeyBy(t.Get(quad.Label), -1, batch)
	}
	err = qs.db.Write(batch, nil)
	if err != nil {
		glog.Errorf("Couldn't delete triple %s.", t)
		return
	}
	qs.size--
}
Beispiel #2
0
func main() {
	// Writer of link between entities and sentences.
	rdfWriter := gzip.NewWriter(os.Stdout)

	defer func() {
		rdfWriter.Flush()
		rdfWriter.Close()
	}()

	// Read ASSIGNMENT.csv
	r := csv.NewReader(os.Stdin)
	line := 0
	for {
		record, err := r.Read()
		line++
		if err == io.EOF {
			break
		} else if err != nil {
			fmt.Fprintf(os.Stderr, "Error line %d: %v\n", line, err)
			continue
		}

		if line%10000 == 0 {
			fmt.Fprintf(os.Stderr, "Line %d\n", line)
		}

		// Link to entity.
		quad := quad.Quad{
			Subject:   "<" + record[1] + ">",
			Predicate: "<http://rdf.romainstrock.com/ns/sentence.is_about>",
			Object:    "<" + record[3] + ">",
		}
		fmt.Fprintln(rdfWriter, quad.NQuad())
	}
}
Beispiel #3
0
func (qs *QuadStore) indexOf(t quad.Quad) (int64, bool) {
	min := maxInt
	var tree *b.Tree
	for d := quad.Subject; d <= quad.Label; d++ {
		sid := t.Get(d)
		if d == quad.Label && sid == "" {
			continue
		}
		id, ok := qs.idMap[sid]
		// If we've never heard about a node, it must not exist
		if !ok {
			return 0, false
		}
		index, ok := qs.index.Get(d, id)
		if !ok {
			// If it's never been indexed in this direction, it can't exist.
			return 0, false
		}
		if l := index.Len(); l < min {
			min, tree = l, index
		}
	}
	it := NewIterator(tree, "", qs)

	for it.Next() {
		val := it.Result()
		if t == qs.log[val.(int64)].Quad {
			return val.(int64), true
		}
	}
	return 0, false
}
Beispiel #4
0
func (ts *TripleStore) tripleExists(t quad.Quad) (bool, int64) {
	smallest := -1
	var smallest_tree *llrb.LLRB
	for d := quad.Subject; d <= quad.Label; d++ {
		sid := t.Get(d)
		if d == quad.Label && sid == "" {
			continue
		}
		id, ok := ts.idMap[sid]
		// If we've never heard about a node, it most not exist
		if !ok {
			return false, 0
		}
		index, exists := ts.index.Get(d, id)
		if !exists {
			// If it's never been indexed in this direction, it can't exist.
			return false, 0
		}
		if smallest == -1 || index.Len() < smallest {
			smallest = index.Len()
			smallest_tree = index
		}
	}
	it := NewLlrbIterator(smallest_tree, "")

	for it.Next() {
		val := it.Result()
		if t == ts.triples[val.(int64)] {
			return true, val.(int64)
		}
	}
	return false, 0
}
Beispiel #5
0
func (qs *QuadStore) createKeyFor(d [4]quad.Direction, triple quad.Quad) []byte {
	key := make([]byte, 0, (hashSize * 4))
	key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...)
	key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...)
	key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...)
	key = append(key, qs.convertStringToByteHash(triple.Get(d[3]))...)
	return key
}
Beispiel #6
0
func (qs *QuadStore) createKeyFor(d [4]quad.Direction, q quad.Quad) []byte {
	key := make([]byte, quad.HashSize*4)
	quad.HashTo(q.Get(d[0]), key[quad.HashSize*0:quad.HashSize*1])
	quad.HashTo(q.Get(d[1]), key[quad.HashSize*1:quad.HashSize*2])
	quad.HashTo(q.Get(d[2]), key[quad.HashSize*2:quad.HashSize*3])
	quad.HashTo(q.Get(d[3]), key[quad.HashSize*3:quad.HashSize*4])
	return key
}
Beispiel #7
0
func (qs *QuadStore) createKeyFor(d [4]quad.Direction, q quad.Quad) []byte {
	key := make([]byte, 0, (hashSize * 4))
	key = append(key, hashOf(q.Get(d[0]))...)
	key = append(key, hashOf(q.Get(d[1]))...)
	key = append(key, hashOf(q.Get(d[2]))...)
	key = append(key, hashOf(q.Get(d[3]))...)
	return key
}
Beispiel #8
0
func (qs *TripleStore) createKeyFor(d [3]quad.Direction, triple quad.Quad) []byte {
	key := make([]byte, 0, 2+(qs.hasher.Size()*3))
	// TODO(kortschak) Remove dependence on String() method.
	key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...)
	key = append(key, qs.convertStringToByteHash(triple.Get(d[0]))...)
	key = append(key, qs.convertStringToByteHash(triple.Get(d[1]))...)
	key = append(key, qs.convertStringToByteHash(triple.Get(d[2]))...)
	return key
}
Beispiel #9
0
func (qs *TripleStore) buildWrite(batch *leveldb.Batch, t quad.Quad) {
	qs.buildTripleWrite(batch, t)
	qs.UpdateValueKeyBy(t.Get(quad.Subject), 1, nil)
	qs.UpdateValueKeyBy(t.Get(quad.Predicate), 1, nil)
	qs.UpdateValueKeyBy(t.Get(quad.Object), 1, nil)
	if t.Get(quad.Label) != "" {
		qs.UpdateValueKeyBy(t.Get(quad.Label), 1, nil)
	}
}
Beispiel #10
0
func createKeyFor(d [4]quad.Direction, q quad.Quad) []byte {
	key := make([]byte, 2+(quad.HashSize*4))
	key[0] = d[0].Prefix()
	key[1] = d[1].Prefix()
	quad.HashTo(q.Get(d[0]), key[2+quad.HashSize*0:2+quad.HashSize*1])
	quad.HashTo(q.Get(d[1]), key[2+quad.HashSize*1:2+quad.HashSize*2])
	quad.HashTo(q.Get(d[2]), key[2+quad.HashSize*2:2+quad.HashSize*3])
	quad.HashTo(q.Get(d[3]), key[2+quad.HashSize*3:2+quad.HashSize*4])
	return key
}
Beispiel #11
0
func (qs *QuadStore) createKeyFor(d [4]quad.Direction, q quad.Quad) []byte {
	key := make([]byte, 0, 2+(hashSize*4))
	// TODO(kortschak) Remove dependence on String() method.
	key = append(key, []byte{d[0].Prefix(), d[1].Prefix()}...)
	key = append(key, hashOf(q.Get(d[0]))...)
	key = append(key, hashOf(q.Get(d[1]))...)
	key = append(key, hashOf(q.Get(d[2]))...)
	key = append(key, hashOf(q.Get(d[3]))...)
	return key
}
Beispiel #12
0
func (l *SQLLinkIterator) buildResult(result []string, cols []string) map[string]string {
	var q quad.Quad
	q.Subject = result[0]
	q.Predicate = result[1]
	q.Object = result[2]
	q.Label = result[3]
	l.resultQuad = q
	m := make(map[string]string)
	for i, c := range cols[4:] {
		m[c] = result[i+4]
	}
	return m
}
Beispiel #13
0
func (qs *TripleStore) buildTripleWrite(batch *leveldb.Batch, t quad.Quad) {
	bytes, err := json.Marshal(t)
	if err != nil {
		glog.Errorf("Couldn't write to buffer for triple %s: %s", t, err)
		return
	}
	batch.Put(qs.createKeyFor(spo, t), bytes)
	batch.Put(qs.createKeyFor(osp, t), bytes)
	batch.Put(qs.createKeyFor(pos, t), bytes)
	if t.Get(quad.Label) != "" {
		batch.Put(qs.createProvKeyFor(pso, t), bytes)
	}
}
Beispiel #14
0
func main() {
	// Writer of link between entities and sentences.
	rdfWriter := gzip.NewWriter(os.Stdout)

	defer func() {
		rdfWriter.Flush()
		rdfWriter.Close()
	}()

	// Read SENTENCES.csv
	r := csv.NewReader(os.Stdin)
	line := 0
	for {
		record, err := r.Read()
		line++
		if err == io.EOF {
			break
		} else if err != nil {
			fmt.Fprintf(os.Stderr, "Error line %d: %v\n", line, err)
			continue
		}

		if line%10000 == 0 {
			fmt.Fprintf(os.Stderr, "Line %d\n", line)
		}

		// Speaker.
		quad_1 := quad.Quad{
			Subject:   "<" + record[0] + ">",
			Predicate: "<http://rdf.romainstrock.com/ns/sentence.speaker>",
			Object:    "<" + record[3] + ">",
		}
		fmt.Fprintln(rdfWriter, quad_1.NQuad())

		// Sentence text.
		text := record[6]
		quad_2 := quad.Quad{
			Subject:   "<" + record[0] + ">",
			Predicate: "<http://rdf.romainstrock.com/ns/sentence.text>",
			Object:    strconv.Quote(text),
		}
		fmt.Fprintln(rdfWriter, quad_2.NQuad())

		// Source URL.
		quad_3 := quad.Quad{
			Subject:   "<" + record[0] + ">",
			Predicate: "<http://rdf.romainstrock.com/ns/sentence.source_url>",
			Object:    "<" + record[5] + ">",
		}
		fmt.Fprintln(rdfWriter, quad_3.NQuad())
	}
}
Beispiel #15
0
func (qs *QuadStore) buildQuadWrite(batch *leveldb.Batch, q quad.Quad, id int64, isAdd bool) error {
	var entry proto.HistoryEntry
	data, err := qs.db.Get(createKeyFor(spo, q), qs.readopts)
	if err != nil && err != leveldb.ErrNotFound {
		glog.Error("could not access DB to prepare index: ", err)
		return err
	}
	if data != nil {
		// We got something.
		err = entry.Unmarshal(data)
		if err != nil {
			return err
		}
	}

	if isAdd && len(entry.History)%2 == 1 {
		if glog.V(2) {
			glog.Errorf("attempt to add existing quad %v: %#v", entry, q)
		}
		return graph.ErrQuadExists
	}
	if !isAdd && len(entry.History)%2 == 0 {
		if glog.V(2) {
			glog.Errorf("attempt to delete non-existent quad %v: %#c", entry, q)
		}
		return graph.ErrQuadNotExist
	}

	entry.History = append(entry.History, uint64(id))

	bytes, err := entry.Marshal()
	if err != nil {
		glog.Errorf("could not write to buffer for entry %#v: %s", entry, err)
		return err
	}
	batch.Put(createKeyFor(spo, q), bytes)
	batch.Put(createKeyFor(osp, q), bytes)
	batch.Put(createKeyFor(pos, q), bytes)
	if q.Get(quad.Label) != nil {
		batch.Put(createKeyFor(cps, q), bytes)
	}
	return nil
}
Beispiel #16
0
func (qs *QuadStore) buildQuadWrite(tx *bolt.Tx, q quad.Quad, id int64, isAdd bool) error {
	var entry proto.HistoryEntry
	b := tx.Bucket(spoBucket)
	b.FillPercent = localFillPercent
	data := b.Get(qs.createKeyFor(spo, q))
	if data != nil {
		// We got something.
		err := entry.Unmarshal(data)
		if err != nil {
			return err
		}
	}

	if isAdd && len(entry.History)%2 == 1 {
		glog.Errorf("attempt to add existing quad %v: %#v", entry, q)
		return graph.ErrQuadExists
	}
	if !isAdd && len(entry.History)%2 == 0 {
		glog.Errorf("attempt to delete non-existent quad %v: %#v", entry, q)
		return graph.ErrQuadNotExist
	}

	entry.History = append(entry.History, uint64(id))

	bytes, err := entry.Marshal()
	if err != nil {
		glog.Errorf("Couldn't write to buffer for entry %#v: %s", entry, err)
		return err
	}
	for _, index := range [][4]quad.Direction{spo, osp, pos, cps} {
		if index == cps && q.Get(quad.Label) == nil {
			continue
		}
		b := tx.Bucket(bucketFor(index))
		b.FillPercent = localFillPercent
		err = b.Put(qs.createKeyFor(index, q), bytes)
		if err != nil {
			return err
		}
	}
	return nil
}
Beispiel #17
0
func (qs *QuadStore) buildQuadWrite(tx *bolt.Tx, q quad.Quad, id int64, isAdd bool) error {
	var entry IndexEntry
	b := tx.Bucket(spoBucket)
	b.FillPercent = localFillPercent
	data := b.Get(qs.createKeyFor(spo, q))
	if data != nil {
		// We got something.
		err := json.Unmarshal(data, &entry)
		if err != nil {
			return err
		}
	}

	if isAdd && len(entry.History)%2 == 1 {
		glog.Error("Adding a valid quad ", entry)
		return graph.ErrQuadExists
	}
	if !isAdd && len(entry.History)%2 == 0 {
		glog.Error("Deleting an invalid quad ", entry)
		return graph.ErrQuadNotExist
	}

	entry.History = append(entry.History, id)

	jsonbytes, err := json.Marshal(entry)
	if err != nil {
		glog.Errorf("Couldn't write to buffer for entry %#v: %s", entry, err)
		return err
	}
	for _, index := range [][4]quad.Direction{spo, osp, pos, cps} {
		if index == cps && q.Get(quad.Label) == "" {
			continue
		}
		b := tx.Bucket(bucketFor(index))
		b.FillPercent = localFillPercent
		err = b.Put(qs.createKeyFor(index, q), jsonbytes)
		if err != nil {
			return err
		}
	}
	return nil
}
Beispiel #18
0
func (ts *TripleStore) AddTriple(t quad.Quad) {
	if exists, _ := ts.tripleExists(t); exists {
		return
	}
	var tripleID int64
	ts.triples = append(ts.triples, t)
	tripleID = ts.tripleIdCounter
	ts.size++
	ts.tripleIdCounter++

	for d := quad.Subject; d <= quad.Label; d++ {
		sid := t.Get(d)
		if d == quad.Label && sid == "" {
			continue
		}
		if _, ok := ts.idMap[sid]; !ok {
			ts.idMap[sid] = ts.idCounter
			ts.revIdMap[ts.idCounter] = sid
			ts.idCounter++
		}
	}

	for d := quad.Subject; d <= quad.Label; d++ {
		if d == quad.Label && t.Get(d) == "" {
			continue
		}
		id := ts.idMap[t.Get(d)]
		tree := ts.index.GetOrCreate(d, id)
		tree.ReplaceOrInsert(Int64(tripleID))
	}

	// TODO(barakmich): Add VIP indexing
}
Beispiel #19
0
func (qs *QuadStore) buildQuadWriteLMDB(tx *lmdb.Txn, q quad.Quad, id int64, isAdd bool) error {
	var entry proto.HistoryEntry
	dbi := qs.dbis[spoDB]
	data, err := tx.Get(dbi, qs.createKeyFor(spo, q))
	if err == nil {
		// We got something.
		err := entry.Unmarshal(data)
		if err != nil {
			return err
		}
	}

	if isAdd && len(entry.History)%2 == 1 {
		glog.Errorf("attempt to add existing quad %v: %#v", entry, q)
		return graph.ErrQuadExists
	}
	if !isAdd && len(entry.History)%2 == 0 {
		glog.Errorf("attempt to delete non-existent quad %v: %#v", entry, q)
		return graph.ErrQuadNotExist
	}

	entry.History = append(entry.History, uint64(id))

	bytes, err := entry.Marshal()
	if err != nil {
		glog.Errorf("Couldn't write to buffer for entry %#v: %s", entry, err)
		return err
	}
	for _, index := range [][4]quad.Direction{spo, osp, pos, cps} {
		if index == cps && q.Get(quad.Label) == "" {
			continue
		}
		dbi = qs.dbis[dbFor(index)]
		err = tx.Put(dbi, qs.createKeyFor(index, q), bytes, 0)
		if err != nil {
			return err
		}
	}
	return nil
}
Beispiel #20
0
func (qs *QuadStore) indexOf(t quad.Quad) (int64, bool) {
	min := maxInt
	var tree *Tree
	for d := quad.Subject; d <= quad.Label; d++ {
		sid := t.Get(d)
		if d == quad.Label && sid == nil {
			continue
		}
		qs.idmu.RLock()
		id, ok := qs.idMap[quad.StringOf(sid)]
		qs.idmu.RUnlock()
		// If we've never heard about a node, it must not exist
		if !ok {
			return 0, false
		}
		index, ok := qs.index.Get(d, id)
		if !ok {
			// If it's never been indexed in this direction, it can't exist.
			return 0, false
		}
		if l := index.Len(); l < min {
			min, tree = l, index
		}
	}

	it := NewIterator(tree, qs, 0, nil)
	for it.Next() {
		qs.logmu.RLock()
		l := qs.log[it.result]
		qs.logmu.RUnlock()
		if t == l.Quad {
			return it.result, true
		}
	}
	return 0, false
}
Beispiel #21
0
func (qs *TripleStore) buildQuadWrite(batch *leveldb.Batch, q quad.Quad, id int64, isAdd bool) error {
	var entry IndexEntry
	data, err := qs.db.Get(qs.createKeyFor(spo, q), qs.readopts)
	if err != nil && err != leveldb.ErrNotFound {
		glog.Error("Couldn't access DB to prepare index: ", err)
		return err
	}
	if err == nil {
		// We got something.
		err = json.Unmarshal(data, &entry)
		if err != nil {
			return err
		}
	} else {
		entry.Quad = q
	}
	entry.History = append(entry.History, id)

	if isAdd && len(entry.History)%2 == 0 {
		glog.Error("Entry History is out of sync for", entry)
		return errors.New("Odd index history")
	}

	bytes, err := json.Marshal(entry)
	if err != nil {
		glog.Errorf("Couldn't write to buffer for entry %#v: %s", entry, err)
		return err
	}
	batch.Put(qs.createKeyFor(spo, q), bytes)
	batch.Put(qs.createKeyFor(osp, q), bytes)
	batch.Put(qs.createKeyFor(pos, q), bytes)
	if q.Get(quad.Label) != "" {
		batch.Put(qs.createKeyFor(cps, q), bytes)
	}
	return nil
}
Beispiel #22
0
func (ts *TripleStore) RemoveTriple(t quad.Quad) {
	var tripleID int64
	var exists bool
	tripleID = 0
	if exists, tripleID = ts.tripleExists(t); !exists {
		return
	}

	ts.triples[tripleID] = quad.Quad{}
	ts.size--

	for d := quad.Subject; d <= quad.Label; d++ {
		if d == quad.Label && t.Get(d) == "" {
			continue
		}
		id := ts.idMap[t.Get(d)]
		tree := ts.index.GetOrCreate(d, id)
		tree.Delete(Int64(tripleID))
	}

	for d := quad.Subject; d <= quad.Label; d++ {
		if d == quad.Label && t.Get(d) == "" {
			continue
		}
		id, ok := ts.idMap[t.Get(d)]
		if !ok {
			continue
		}
		stillExists := false
		for d := quad.Subject; d <= quad.Label; d++ {
			if d == quad.Label && t.Get(d) == "" {
				continue
			}
			nodeTree := ts.index.GetOrCreate(d, id)
			if nodeTree.Len() != 0 {
				stillExists = true
				break
			}
		}
		if !stillExists {
			delete(ts.idMap, t.Get(d))
			delete(ts.revIdMap, id)
		}
	}
}