func TestMultiDomainLogIter(t *testing.T) { domains := []string{"test", "another_test"} // Number of transactions n := 100 // Number of facts per transaction m := 100 engine := randMultidomainStorage(domains, n, m) // Open and merge the commit logs. log0, err := view.OpenLog(engine, domains[0], "commit") if err != nil { t.Fatal(err) } log1, err := view.OpenLog(engine, domains[1], "commit") if err != nil { t.Fatal(err) } now := time.Now() mergedStreams := view.Merge(log0.Asof(now), log1.Asof(now)) if err = mergedStreams.Err(); err != nil { t.Fatal(err) } i := 0 var prevTrnId uint64 = 1<<64 - 1 for { f := mergedStreams.Next() if err = mergedStreams.Err(); err != nil { t.Fatal(err) } if f == nil { break } else { if prevTrnId < f.Transaction { t.Errorf("Transactions are ordered incorrectly") } i++ } } if i != n*m { t.Errorf("expected %d facts, got %d", n*m, i) } }
func mergeDomains(engine storage.Engine, w origins.Writer, domains []string, since, asof time.Time) int { var ( err error count int log *view.Log ) iters := make([]origins.Iterator, len(domains)) // Merge and output facts across domains. for i, d := range domains { log, err = view.OpenLog(engine, d, "commit") if err != nil { logrus.Fatal(err) } iters[i] = log.View(since, asof) } if count, err = origins.Copy(view.Merge(iters...), w); err != nil { logrus.Fatal(err) } return count }
func concatDomains(engine storage.Engine, w origins.Writer, domains []string, since, asof time.Time) int { var ( err error n, count int log *view.Log ) // Output facts for each domain in the order they are supplied. for _, d := range domains { log, err = view.OpenLog(engine, d, "commit") if err != nil { logrus.Fatal(err) } v := log.View(since, asof) n, err = origins.Copy(v, w) if err != nil { logrus.Fatal(err) } count += n } return count }
// Encapsulates the logic for building a domain-based iterator. func domainIteratorResource(domain string, r *http.Request, e storage.Engine) (origins.Iterator, int, error) { var ( err error since, asof time.Time offset, limit int ) if since, asof, err = parseTimeParams(r); err != nil { return nil, StatusUnprocessableEntity, err } if offset, limit, err = parseSliceParams(r); err != nil { return nil, StatusUnprocessableEntity, err } log, err := view.OpenLog(e, domain, "commit") if err == view.ErrDoesNotExist { return nil, http.StatusNotFound, err } if err != nil { return nil, http.StatusInternalServerError, err } iter := log.View(since, asof) if offset > 0 || limit > 0 { iter = origins.Slice(iter, offset, limit) } return iter, http.StatusOK, nil }
func TestLogIter(t *testing.T) { domain := "test" // Transactions n := 100 // Size of write m := 100 engine := randStorage(domain, n, m) // Open the commit log. log, err := view.OpenLog(engine, domain, "commit") if err != nil { t.Fatal(err) } num, err := testNext(log.Now()) if err != nil { t.Fatal(err) } if num != n*m { t.Errorf("expected %d facts, got %d", n*m, num) } }
func TestLogReader(t *testing.T) { domain := "test" // Transactions n := 100 // Size of write m := 100 engine := randStorage(domain, n, m) // Open the commit log. log, err := view.OpenLog(engine, domain, "commit") if err != nil { t.Fatal(err) } iter := log.Now() facts, err := origins.ReadAll(iter) if err != nil { t.Fatal(err) } if len(facts) != n*m { t.Errorf("expected %d facts, got %d", n*m, len(facts)) } }
func benchmarkDeduplication(b *testing.B, numTrn, factsPerTrn, eLen, aLen, vLen int) { domain := "test" engine := randStorageWRepeats(domain, numTrn, factsPerTrn, eLen, aLen, vLen) log, err := view.OpenLog(engine, domain, "commit") if err != nil { b.Fatal(err) } b.ResetTimer() for i := 0; i < b.N; i++ { now := log.Now() iter := view.Deduplicate(now) if err = iter.Err(); err != nil { b.Fatal(err) } // The Deduplicate() operation is lazy, and most of the actual work happens during Next(), // so to evaluate the true cost of deduplication we need to time how long it takes to step // through the resulting iterator. _, err = testNext(iter) if err != nil { b.Fatal(err) } } }
// Load materializes the current state of a schema from the database. func Load(engine storage.Engine, domain string) (*Schema, error) { log, err := view.OpenLog(engine, domain, "commit") if err != nil { return nil, err } iter := log.Now() return Init(domain, iter), nil }
func TestLogExcludeDuplicates(t *testing.T) { domain := "test" // number of transactions n := 100 // number of facts per transaction m := 100 eLen, aLen, vLen := 2, 3, 4 engine := randStorageWRepeats(domain, n, m, eLen, aLen, vLen) // Open the commit log. log, err := view.OpenLog(engine, domain, "commit") if err != nil { t.Fatal(err) } // first check the total number of facts num, err := testNext(log.Now()) if err != nil { t.Fatal(err) } if num != n*m { t.Errorf("expected %d total facts, got %d", n*m, num) } // Now check that Next() works on the deduplicated stream, // and verify the number of unique facts. iter := view.Deduplicate(log.Now()) if err := iter.Err(); err != nil { t.Fatal(err) } num, err = testNext(iter) if err != nil { t.Fatal(err) } // With the dictionary size being very small (e.g. 24) compared to the number of generated facts (e.g. 10,000), // the probability that any of the possible facts didn't get generated is negligible if num != eLen*aLen*vLen { t.Errorf("expected %d unique facts, got %d", eLen*aLen*vLen, num) } }
func benchmarkDomainMerge(b *testing.B, numTrn, factsPerTrn, numDomains int) { var err error domains := make([]string, numDomains) iters := make([]origins.Iterator, len(domains)) logs := make([]*view.Log, len(domains)) for j := 0; j < numDomains; j++ { domains[j] = "domain_" + strconv.Itoa(j+1) } engine := randMultidomainStorage(domains, numTrn, factsPerTrn) for j := 0; j < len(domains); j++ { logs[j], err = view.OpenLog(engine, domains[j], "commit") if err != nil { b.Fatal(err) } } now := time.Now() b.ResetTimer() for i := 0; i < b.N; i++ { for j := 0; j < len(domains); j++ { iters[j] = logs[j].Asof(now) } iter := view.Merge(iters...) if err := iter.Err(); err != nil { b.Fatal(err) } // The Merge() operation is lazy, and most of the actual work happens during Next(), // so to evaluate the true cost of merging we need to time how long it takes to step // through the resulting iterator. _, err = testNext(iter) if err != nil { b.Fatal(err) } } }
func TestLogSince(t *testing.T) { domain := "test" // Transactions n := 100 // Size of write m := 100 engine := randStorage(domain, n, m) // Open the commit log. log, err := view.OpenLog(engine, domain, "commit") // 1 minute before min := time.Now().Add(-time.Minute) iter := log.Since(min) facts, err := origins.ReadAll(iter) if err != nil { t.Fatal(err) } if len(facts) != n*m { t.Errorf("expected %d facts, got %d", n*m, len(facts)) } // 1 minute later min = time.Now().Add(time.Minute) iter = log.Since(min) facts, err = origins.ReadAll(iter) if err != nil { t.Fatal(err) } if len(facts) != 0 { t.Errorf("expected 0 facts, got %d", len(facts)) } }
func (p *Pipeline) initCache() error { logrus.Debugf("transactor.Pipeline(%s): initializing cache", p.Domain) p.initialized = true log, err := view.OpenLog(p.engine, p.Domain, commitLogName) // This denotes the domain is new. if err == view.ErrDoesNotExist { return nil } else if err != nil { return err } facts, err := origins.ReadAll(log.Asof(p.segment.Time)) if err != nil { return err } // Sort facts by entity. origins.Timsort(facts, origins.EAVTComparator) // Group the facts by entity. giter := origins.Groupby(origins.NewBuffer(facts), func(f1, f2 *origins.Fact) bool { return f1.Entity.Is(f2.Entity) }) // Initializing ctrie. cache := ctrie.New(nil) err = origins.MapFacts(giter, func(facts origins.Facts) error { cache.Insert([]byte(facts[0].Entity.Name), facts) return nil }) p.cache = cache logrus.Debugf("transactor.Pipeline(%s): cache initialized", p.Domain) return err }
"github.com/chop-dbhi/origins" "github.com/chop-dbhi/origins/view" "github.com/spf13/cobra" ) var domainsCmd = &cobra.Command{ Use: "domains", Short: "Outputs a list of domains.", Run: func(cmd *cobra.Command, args []string) { bindStorageFlags(cmd.Flags()) engine := initStorage() log, err := view.OpenLog(engine, origins.DomainsDomain, "commit") if err != nil { logrus.Fatal(err) } idents, err := origins.Entities(log.Now()) if err != nil { logrus.Fatal(err) } sort.Sort(idents) for _, ident := range idents { os.Stdout.Write([]byte(ident.Name + "\n"))