func testEnumerate(t *testing.T, kv sorted.KeyValue, start, end string, want ...string) { var got []string it := kv.Find(start, end) for it.Next() { key, val := it.Key(), it.Value() keyb, valb := it.KeyBytes(), it.ValueBytes() if key != string(keyb) { t.Errorf("Key and KeyBytes disagree: %q vs %q", key, keyb) } if val != string(valb) { t.Errorf("Value and ValueBytes disagree: %q vs %q", val, valb) } if key+"v" != val { t.Errorf("iterator returned unexpected pair for test: %q, %q", key, val) } got = append(got, val) } err := it.Close() if err != nil { t.Errorf("for enumerate of (%q, %q), Close error: %v", start, end, err) } if !reflect.DeepEqual(got, want) { t.Errorf("for enumerate of (%q, %q), got: %q; want %q", start, end, got, want) } }
func testDeleteNotFoundBatch(t *testing.T, kv sorted.KeyValue) { b := kv.BeginBatch() b.Delete(notExistKey) if err := kv.CommitBatch(b); err != nil { t.Fatalf("Batch deletion of non existing key returned an error: %v", err) } }
func isEmpty(t *testing.T, kv sorted.KeyValue) bool { it := kv.Find("", "") hasRow := it.Next() if err := it.Close(); err != nil { t.Fatalf("Error closing iterator while testing for emptiness: %v", err) } return !hasRow }
func foreachSorted(t *testing.T, s sorted.KeyValue, fn func(string, string)) { it := s.Find("", "") for it.Next() { fn(it.Key(), it.Value()) } if err := it.Close(); err != nil { t.Fatal(err) } }
func testInsertTooLarge(t *testing.T, kv sorted.KeyValue) { largeKey := make([]byte, sorted.MaxKeySize+1) largeValue := make([]byte, sorted.MaxValueSize+1) if err := kv.Set(string(largeKey), "whatever"); err == nil || err != sorted.ErrKeyTooLarge { t.Fatalf("Insertion of too large a key should have failed, but err was %v", err) } if err := kv.Set("whatever", string(largeValue)); err == nil || err != sorted.ErrValueTooLarge { t.Fatalf("Insertion of too large a value should have failed, but err was %v", err) } }
func queryPrefixString(s sorted.KeyValue, prefix string) sorted.Iterator { if prefix == "" { return s.Find("", "") } lastByte := prefix[len(prefix)-1] if lastByte == 0xff { panic("unsupported query prefix ending in 0xff") } end := prefix[:len(prefix)-1] + string(lastByte+1) return s.Find(prefix, end) }
// newStorage returns a new storage in path root with the given maxFileSize, // or defaultMaxFileSize (512MB) if <= 0 func newStorage(root string, maxFileSize int64, indexConf jsonconfig.Obj) (s *storage, err error) { fi, err := os.Stat(root) if os.IsNotExist(err) { return nil, fmt.Errorf("storage root %q doesn't exist", root) } if err != nil { return nil, fmt.Errorf("Failed to stat directory %q: %v", root, err) } if !fi.IsDir() { return nil, fmt.Errorf("storage root %q exists but is not a directory.", root) } var index sorted.KeyValue if len(indexConf) > 0 { index, err = sorted.NewKeyValue(indexConf) } else { index, err = kvfile.NewStorage(filepath.Join(root, indexKV)) } if err != nil { return nil, err } defer func() { if err != nil { index.Close() } }() if maxFileSize <= 0 { maxFileSize = defaultMaxFileSize } // Be consistent with trailing slashes. Makes expvar stats for total // reads/writes consistent across diskpacked targets, regardless of what // people put in their low level config. root = strings.TrimRight(root, `\/`) s = &storage{ root: root, index: index, maxFileSize: maxFileSize, Generationer: local.NewGenerationer(root), } s.mu.Lock() defer s.mu.Unlock() if err := s.openAllPacks(); err != nil { return nil, err } if _, _, err := s.StorageGeneration(); err != nil { return nil, fmt.Errorf("Error initialization generation for %q: %v", root, err) } return s, nil }
func TestSorted(t *testing.T, kv sorted.KeyValue) { defer test.TLog(t)() if !isEmpty(t, kv) { t.Fatal("kv for test is expected to be initially empty") } set := func(k, v string) { if err := kv.Set(k, v); err != nil { t.Fatalf("Error setting %q to %q: %v", k, v, err) } } set("foo", "bar") if isEmpty(t, kv) { t.Fatalf("iterator reports the kv is empty after adding foo=bar; iterator must be broken") } if v, err := kv.Get("foo"); err != nil || v != "bar" { t.Errorf("get(foo) = %q, %v; want bar", v, err) } if v, err := kv.Get("NOT_EXIST"); err != sorted.ErrNotFound { t.Errorf("get(NOT_EXIST) = %q, %v; want error sorted.ErrNotFound", v, err) } for i := 0; i < 2; i++ { if err := kv.Delete("foo"); err != nil { t.Errorf("Delete(foo) (on loop %d/2) returned error %v", i+1, err) } } set("a", "av") set("b", "bv") set("c", "cv") testEnumerate(t, kv, "", "", "av", "bv", "cv") testEnumerate(t, kv, "a", "", "av", "bv", "cv") testEnumerate(t, kv, "b", "", "bv", "cv") testEnumerate(t, kv, "a", "c", "av", "bv") testEnumerate(t, kv, "a", "b", "av") testEnumerate(t, kv, "a", "a") testEnumerate(t, kv, "d", "") testEnumerate(t, kv, "d", "e") // Verify that < comparison works identically for all DBs (because it is affected by collation rules) // http://postgresql.1045698.n5.nabble.com/String-comparison-and-the-SQL-standard-td5740721.html set("foo|abc", "foo|abcv") testEnumerate(t, kv, "foo|", "", "foo|abcv") testEnumerate(t, kv, "foo|", "foo}", "foo|abcv") // Verify that the value isn't being used instead of the key in the range comparison. set("y", "x:foo") testEnumerate(t, kv, "x:", "x~") testInsertLarge(t, kv) testInsertTooLarge(t, kv) // TODO: test batch commits // Deleting a non-existent item in a batch should not be an error testDeleteNotFoundBatch(t, kv) testDeletePartialNotFoundBatch(t, kv) }
func check(t *testing.T, kv sorted.KeyValue, prefix string, want []mod) { it := kv.Find("", "") for i, m := range want { if !it.Next() { t.Fatalf("%v: unexpected it.Next == false on iteration %d", prefix, i) } if k, v := it.Key(), it.Value(); k != m.key || v != m.value { t.Errorf("%v: got key == %q value == %q, want key == %q value == %q on iteration %d", prefix, k, v, m.key, m.value, i) } } if it.Next() { t.Errorf("%v: unexpected it.Next == true after complete iteration", prefix) } if err := it.Close(); err != nil { t.Errorf("%v: error closing iterator: %v", prefix, err) } }
func testDeletePartialNotFoundBatch(t *testing.T, kv sorted.KeyValue) { if err := kv.Set(butIExistKey, "whatever"); err != nil { t.Fatal(err) } b := kv.BeginBatch() b.Delete(notExistKey) b.Delete(butIExistKey) if err := kv.CommitBatch(b); err != nil { t.Fatalf("Batch deletion with one non existing key returned an error: %v", err) } if val, err := kv.Get(butIExistKey); err != sorted.ErrNotFound || val != "" { t.Fatalf("Key %q should have been batch deleted", butIExistKey) } }
func TestSorted(t *testing.T, kv sorted.KeyValue) { defer test.TLog(t)() if !isEmpty(t, kv) { t.Fatal("kv for test is expected to be initially empty") } set := func(k, v string) { if err := kv.Set(k, v); err != nil { t.Fatalf("Error setting %q to %q: %v", k, v, err) } } set("foo", "bar") if isEmpty(t, kv) { t.Fatalf("iterator reports the kv is empty after adding foo=bar; iterator must be broken") } if v, err := kv.Get("foo"); err != nil || v != "bar" { t.Errorf("get(foo) = %q, %v; want bar", v, err) } if v, err := kv.Get("NOT_EXIST"); err != sorted.ErrNotFound { t.Errorf("get(NOT_EXIST) = %q, %v; want error sorted.ErrNotFound", v, err) } for i := 0; i < 2; i++ { if err := kv.Delete("foo"); err != nil { t.Errorf("Delete(foo) (on loop %d/2) returned error %v", i+1, err) } } set("a", "av") set("b", "bv") set("c", "cv") testEnumerate(t, kv, "", "", "av", "bv", "cv") testEnumerate(t, kv, "a", "", "av", "bv", "cv") testEnumerate(t, kv, "b", "", "bv", "cv") testEnumerate(t, kv, "a", "c", "av", "bv") testEnumerate(t, kv, "a", "b", "av") testEnumerate(t, kv, "a", "a") testEnumerate(t, kv, "d", "") testEnumerate(t, kv, "d", "e") // Verify that the value isn't being used instead of the key in the range comparison. set("y", "x:foo") testEnumerate(t, kv, "x:", "x~") // TODO: test batch commits }
func testInsertTooLarge(t *testing.T, kv sorted.KeyValue) { largeKey := make([]byte, sorted.MaxKeySize+1) largeValue := make([]byte, sorted.MaxValueSize+1) err := kv.Set(string(largeKey), "whatever") if err != nil { t.Fatalf("Insertion of too large a key should have skipped with some logging, but err was %v", err) } _, err = kv.Get(string(largeKey)) if err == nil { t.Fatal("large key should not have been inserted") } err = kv.Set("testInsertTooLarge", string(largeValue)) if err != nil { t.Fatalf("Insertion of too large a value should have skipped with some logging, but err was %v", err) } _, err = kv.Get("testInsertTooLarge") if err == nil { t.Fatal("large value should not have been inserted") } }
func (s *storage) reindexOne(ctx *context.Context, index sorted.KeyValue, overwrite bool, packID int) error { var batch sorted.BatchMutation if overwrite { batch = index.BeginBatch() } allOk := true // TODO(tgulacsi): proper verbose from context verbose := camliDebug err := s.walkPack(verbose, packID, func(packID int, ref blob.Ref, offset int64, size uint32) error { if !ref.Valid() { if camliDebug { log.Printf("found deleted blob in %d at %d with size %d", packID, offset, size) } return nil } meta := blobMeta{packID, offset, size}.String() if overwrite && batch != nil { batch.Set(ref.String(), meta) } else { if old, err := index.Get(ref.String()); err != nil { allOk = false if err == sorted.ErrNotFound { log.Println(ref.String() + ": cannot find in index!") } else { log.Println(ref.String()+": error getting from index: ", err.Error()) } } else if old != meta { allOk = false log.Printf("%s: index mismatch - index=%s data=%s", ref.String(), old, meta) } } return nil }) if err != nil { return err } if overwrite && batch != nil { log.Printf("overwriting %s from %d", index, packID) if err = index.CommitBatch(batch); err != nil { return err } } else if !allOk { return fmt.Errorf("index does not match data in %d", packID) } return nil }
func (s *storage) reindexOne(ctx *context.Context, index sorted.KeyValue, overwrite bool, packID int) error { var batch sorted.BatchMutation if overwrite { batch = index.BeginBatch() } allOk := true // TODO(tgulacsi): proper verbose from context verbose := env.IsDebug() misses := make(map[blob.Ref]string, 8) err := s.walkPack(verbose, packID, func(packID int, ref blob.Ref, offset int64, size uint32) error { if !ref.Valid() { if verbose { log.Printf("found deleted blob in %d at %d with size %d", packID, offset, size) } return nil } meta := blobMeta{packID, offset, size}.String() if overwrite && batch != nil { batch.Set(ref.String(), meta) return nil } if _, ok := misses[ref]; ok { // maybe this is the last of this blob. delete(misses, ref) } if old, err := index.Get(ref.String()); err != nil { allOk = false if err == sorted.ErrNotFound { log.Println(ref.String() + ": cannot find in index!") } else { log.Println(ref.String()+": error getting from index: ", err.Error()) } } else if old != meta { if old > meta { misses[ref] = meta log.Printf("WARN: possible duplicate blob %s", ref.String()) } else { allOk = false log.Printf("ERROR: index mismatch for %s - index=%s, meta=%s!", ref.String(), old, meta) } } return nil }) if err != nil { return err } for ref, meta := range misses { log.Printf("ERROR: index mismatch for %s (%s)!", ref.String(), meta) allOk = false } if overwrite && batch != nil { if err := index.CommitBatch(batch); err != nil { return err } } else if !allOk { return fmt.Errorf("index does not match data in %d", packID) } return nil }
func testInsertLarge(t *testing.T, kv sorted.KeyValue) { largeKey := make([]byte, sorted.MaxKeySize-1) // setting all the bytes because postgres whines about an invalid byte sequence // otherwise for k, _ := range largeKey { largeKey[k] = 'A' } largeKey[sorted.MaxKeySize-2] = 'B' largeValue := make([]byte, sorted.MaxValueSize-1) for k, _ := range largeValue { largeValue[k] = 'A' } largeValue[sorted.MaxValueSize-2] = 'B' // insert with large key if err := kv.Set(string(largeKey), "whatever"); err != nil { t.Fatalf("Insertion of large key failed: %v", err) } // and verify we can get it back, i.e. that the key hasn't been truncated. it := kv.Find(string(largeKey), "") if !it.Next() || it.Key() != string(largeKey) || it.Value() != "whatever" { it.Close() t.Fatalf("Find(largeKey) = %q, %q; want %q, %q", it.Key(), it.Value(), largeKey, "whatever") } it.Close() // insert with large value if err := kv.Set("whatever", string(largeValue)); err != nil { t.Fatalf("Insertion of large value failed: %v", err) } // and verify we can get it back, i.e. that the value hasn't been truncated. if v, err := kv.Get("whatever"); err != nil || v != string(largeValue) { t.Fatalf("get(\"whatever\") = %q, %v; want %q", v, err, largeValue) } // insert with large key and large value if err := kv.Set(string(largeKey), string(largeValue)); err != nil { t.Fatalf("Insertion of large key and value failed: %v", err) } // and verify we can get them back it = kv.Find(string(largeKey), "") defer it.Close() if !it.Next() || it.Key() != string(largeKey) || it.Value() != string(largeValue) { t.Fatalf("Find(largeKey) = %q, %q; want %q, %q", it.Key(), it.Value(), largeKey, largeValue) } }
func testDeleteNotFound(t *testing.T, kv sorted.KeyValue) { if err := kv.Delete(notExistKey); err != nil { t.Fatalf("Deletion of non existing key returned an error: %v", err) } }
func reindexOne(index sorted.KeyValue, overwrite, verbose bool, r io.ReadSeeker, name string, packId int64) error { l, err := lock.Lock(name + ".lock") defer l.Close() var pos, size int64 errAt := func(prefix, suffix string) error { if prefix != "" { prefix = prefix + " " } if suffix != "" { suffix = " " + suffix } return fmt.Errorf(prefix+"at %d (0x%x) in %q:"+suffix, pos, pos, name) } var batch sorted.BatchMutation if overwrite { batch = index.BeginBatch() } allOk := true br := bufio.NewReaderSize(r, 512) for { if b, err := br.ReadByte(); err != nil { if err == io.EOF { break } return errAt("error while reading", err.Error()) } else if b != '[' { return errAt(fmt.Sprintf("found byte 0x%x", b), "but '[' should be here!") } chunk, err := br.ReadSlice(']') if err != nil { if err == io.EOF { break } return errAt("error reading blob header", err.Error()) } m := len(chunk) chunk = chunk[:m-1] i := bytes.IndexByte(chunk, byte(' ')) if i <= 0 { return errAt("", fmt.Sprintf("bad header format (no space in %q)", chunk)) } if size, err = strconv.ParseInt(string(chunk[i+1:]), 10, 64); err != nil { return errAt(fmt.Sprintf("cannot parse size %q as int", chunk[i+1:]), err.Error()) } ref, ok := blob.Parse(string(chunk[:i])) if !ok { return errAt("", fmt.Sprintf("cannot parse %q as blobref", chunk[:i])) } if verbose { log.Printf("found %s at %d", ref, pos) } meta := blobMeta{packId, pos + 1 + int64(m), size}.String() if overwrite && batch != nil { batch.Set(ref.String(), meta) } else { if old, err := index.Get(ref.String()); err != nil { allOk = false if err == sorted.ErrNotFound { log.Println(ref.String() + ": cannot find in index!") } else { log.Println(ref.String()+": error getting from index: ", err.Error()) } } else if old != meta { allOk = false log.Printf("%s: index mismatch - index=%s data=%s", ref.String(), old, meta) } } pos += 1 + int64(m) // TODO(tgulacsi78): not just seek, but check the hashes of the files // maybe with a different command-line flag, only. if pos, err = r.Seek(pos+size, 0); err != nil { return errAt("", "cannot seek +"+strconv.FormatInt(size, 10)+" bytes") } // drain the buffer after the underlying reader Seeks io.CopyN(ioutil.Discard, br, int64(br.Buffered())) } if overwrite && batch != nil { log.Printf("overwriting %s from %s", index, name) if err = index.CommitBatch(batch); err != nil { return err } } else if !allOk { return fmt.Errorf("index does not match data in %q", name) } return nil }