// appends records to our record input queue // every now and then, we should pack the input queue into a column, though func RunDigestCmdLine() { flag.Parse() if *sybil.FLAGS.TABLE == "" { flag.PrintDefaults() return } if *sybil.FLAGS.PROFILE { profile := sybil.RUN_PROFILER() defer profile.Start().Stop() } sybil.DELETE_BLOCKS_AFTER_QUERY = false t := sybil.GetTable(*sybil.FLAGS.TABLE) if t.LoadTableInfo() == false { log.Println("Warning: Couldn't read table info, exiting early") return } t.DigestRecords() }
// appends records to our record input queue // every now and then, we should pack the input queue into a column, though func RunRebuildCmdLine() { REPLACE_INFO := flag.Bool("replace", false, "Replace broken info.db if it exists") FORCE_UPDATE := flag.Bool("force", false, "Force re-calculation of info.db, even if it exists") flag.Parse() if *sybil.FLAGS.TABLE == "" { flag.PrintDefaults() return } if *sybil.FLAGS.PROFILE { profile := sybil.RUN_PROFILER() defer profile.Start().Stop() } t := sybil.GetTable(*sybil.FLAGS.TABLE) loaded := t.LoadTableInfo() && *FORCE_UPDATE == false if loaded { log.Println("TABLE INFO ALREADY EXISTS, NOTHING TO REBUILD!") return } t.DeduceTableInfoFromBlocks() // TODO: prompt to see if this table info looks good and then write it to // original info.db if *REPLACE_INFO == true { log.Println("REPLACING info.db WITH DATA COMPUTED ABOVE") lock := sybil.Lock{Table: t, Name: "info"} lock.ForceDeleteFile() t.SaveTableInfo("info") } else { log.Println("SAVING TO temp_info.db") t.SaveTableInfo("temp_info") } }
func RunSessionizeCmdLine() { addSessionFlags() flag.Parse() start := time.Now() table := *sybil.FLAGS.TABLE if table == "" { flag.PrintDefaults() return } table_names := strings.Split(table, ",") log.Println("LOADING TABLES", table_names) tables := make([]*sybil.Table, 0) for _, tablename := range table_names { t := sybil.GetTable(tablename) // LOAD TABLE INFOS BEFORE WE CREATE OUR FILTERS, SO WE CAN CREATE FILTERS ON // THE RIGHT COLUMN ID t.LoadTableInfo() t.LoadRecords(nil) count := 0 for _, block := range t.BlockList { count += int(block.Info.NumRecords) } log.Println("WILL INSPECT", count, "RECORDS FROM", tablename) // VERIFY THE KEY TABLE IS IN ORDER, OTHERWISE WE NEED TO EXIT log.Println("KEY TABLE", t.KeyTable) log.Println("KEY TYPES", t.KeyTypes) used := make(map[int16]int) for _, v := range t.KeyTable { used[v]++ if used[v] > 1 { log.Fatal("THERE IS A SERIOUS KEY TABLE INCONSISTENCY") return } } tables = append(tables, t) } debug.SetGCPercent(-1) if *sybil.FLAGS.PROFILE && sybil.PROFILER_ENABLED { profile := sybil.RUN_PROFILER() defer profile.Start().Stop() } filters := []sybil.Filter{} groupings := []sybil.Grouping{} aggs := []sybil.Aggregation{} querySpec := sybil.QuerySpec{Groups: groupings, Filters: filters, Aggregations: aggs} querySpec.Limit = int16(*sybil.FLAGS.LIMIT) if *sybil.FLAGS.SESSION_COL != "" { sessionSpec := sybil.NewSessionSpec() sybil.LoadAndSessionize(tables, &querySpec, &sessionSpec) } end := time.Now() log.Println("LOAD AND QUERY RECORDS TOOK", end.Sub(start)) }
// appends records to our record input queue // every now and then, we should pack the input queue into a column, though func RunIngestCmdLine() { ingestfile := flag.String("file", sybil.INGEST_DIR, "name of dir to ingest into") f_INTS := flag.String("ints", "", "columns to treat as ints (comma delimited)") f_CSV := flag.Bool("csv", false, "expect incoming data in CSV format") f_EXCLUDES := flag.String("exclude", "", "Columns to exclude (comma delimited)") f_JSON_PATH := flag.String("path", "$", "Path to JSON record, ex: $.foo.bar") flag.Parse() digestfile := fmt.Sprintf("%s", *ingestfile) if *sybil.FLAGS.TABLE == "" { flag.PrintDefaults() return } JSON_PATH = *f_JSON_PATH if *sybil.FLAGS.PROFILE { profile := sybil.RUN_PROFILER() defer profile.Start().Stop() } for _, v := range strings.Split(*f_INTS, ",") { INT_CAST[v] = true } for _, v := range strings.Split(*f_EXCLUDES, ",") { EXCLUDES[v] = true } for k, _ := range EXCLUDES { log.Println("EXCLUDING COLUMN", k) } t := sybil.GetTable(*sybil.FLAGS.TABLE) // We have 5 tries to load table info, just in case the lock is held by // someone else var loaded_table = false for i := 0; i < 5; i++ { loaded := t.LoadTableInfo() if loaded == true || t.HasFlagFile() == false { loaded_table = true break } time.Sleep(time.Millisecond * 10) } if loaded_table == false { if t.HasFlagFile() { log.Println("Warning: Ingestor couldn't read table info, losing samples") return } } if *f_CSV == false { import_json_records() } else { import_csv_records() } t.IngestRecords(digestfile) }
func RunQueryCmdLine() { addQueryFlags() flag.Parse() if *LIST_TABLES { sybil.PrintTables() return } if *TIME_FORMAT != "" { sybil.OPTS.TIME_FORMAT = sybil.GetTimeFormat(*TIME_FORMAT) } table := *sybil.FLAGS.TABLE if table == "" { flag.PrintDefaults() return } t := sybil.GetTable(table) ints := make([]string, 0) groups := make([]string, 0) strs := make([]string, 0) if *sybil.FLAGS.GROUPS != "" { groups = strings.Split(*sybil.FLAGS.GROUPS, ",") sybil.OPTS.GROUP_BY = groups } if *NO_RECYCLE_MEM == true { sybil.FLAGS.RECYCLE_MEM = &sybil.FALSE } // PROCESS CMD LINE ARGS THAT USE COMMA DELIMITERS if *sybil.FLAGS.STRS != "" { strs = strings.Split(*sybil.FLAGS.STRS, ",") } if *sybil.FLAGS.INTS != "" { ints = strings.Split(*sybil.FLAGS.INTS, ",") } if *sybil.FLAGS.PROFILE && sybil.PROFILER_ENABLED { profile := sybil.RUN_PROFILER() defer profile.Start().Stop() } if *sybil.FLAGS.LOAD_THEN_QUERY { sybil.FLAGS.LOAD_AND_QUERY = &FALSE } if *sybil.FLAGS.READ_ROWSTORE { sybil.FLAGS.READ_INGESTION_LOG = &TRUE } // LOAD TABLE INFOS BEFORE WE CREATE OUR FILTERS, SO WE CAN CREATE FILTERS ON // THE RIGHT COLUMN ID t.LoadTableInfo() t.LoadRecords(nil) count := 0 for _, block := range t.BlockList { count += int(block.Info.NumRecords) } log.Println("WILL INSPECT", count, "RECORDS") groupings := []sybil.Grouping{} for _, g := range groups { groupings = append(groupings, t.Grouping(g)) } aggs := []sybil.Aggregation{} for _, agg := range ints { aggs = append(aggs, t.Aggregation(agg, *sybil.FLAGS.OP)) } // VERIFY THE KEY TABLE IS IN ORDER, OTHERWISE WE NEED TO EXIT log.Println("KEY TABLE", t.KeyTable) log.Println("KEY TYPES", t.KeyTypes) used := make(map[int16]int) for _, v := range t.KeyTable { used[v]++ if used[v] > 1 { log.Fatal("THERE IS A SERIOUS KEY TABLE INCONSISTENCY") return } } loadSpec := t.NewLoadSpec() filterSpec := sybil.FilterSpec{Int: *sybil.FLAGS.INT_FILTERS, Str: *sybil.FLAGS.STR_FILTERS, Set: *sybil.FLAGS.SET_FILTERS} filters := sybil.BuildFilters(t, &loadSpec, filterSpec) querySpec := sybil.QuerySpec{Groups: groupings, Filters: filters, Aggregations: aggs} for _, v := range groups { switch t.GetColumnType(v) { case sybil.STR_VAL: loadSpec.Str(v) case sybil.INT_VAL: loadSpec.Int(v) default: t.PrintColInfo() fmt.Println("") log.Fatal("Unknown column type for column: ", v, t.GetColumnType(v)) } } for _, v := range strs { loadSpec.Str(v) } for _, v := range ints { loadSpec.Int(v) } if *sybil.FLAGS.SORT != "" { if *sybil.FLAGS.SORT != sybil.OPTS.SORT_COUNT { loadSpec.Int(*sybil.FLAGS.SORT) } querySpec.OrderBy = *sybil.FLAGS.SORT } else { querySpec.OrderBy = "" } if *sybil.FLAGS.TIME { // TODO: infer the TimeBucket size querySpec.TimeBucket = *sybil.FLAGS.TIME_BUCKET log.Println("USING TIME BUCKET", querySpec.TimeBucket, "SECONDS") loadSpec.Int(*sybil.FLAGS.TIME_COL) time_col_id, ok := t.KeyTable[*sybil.FLAGS.TIME_COL] if ok { sybil.OPTS.TIME_COL_ID = time_col_id } } if *sybil.FLAGS.WEIGHT_COL != "" { sybil.OPTS.WEIGHT_COL = true loadSpec.Int(*sybil.FLAGS.WEIGHT_COL) sybil.OPTS.WEIGHT_COL_ID = t.KeyTable[*sybil.FLAGS.WEIGHT_COL] } querySpec.Limit = int16(*sybil.FLAGS.LIMIT) if *sybil.FLAGS.SAMPLES { sybil.HOLD_MATCHES = true sybil.DELETE_BLOCKS_AFTER_QUERY = false loadSpec := t.NewLoadSpec() loadSpec.LoadAllColumns = true t.LoadAndQueryRecords(&loadSpec, &querySpec) t.PrintSamples() return } if !*sybil.FLAGS.PRINT_INFO { // DISABLE GC FOR QUERY PATH log.Println("ADDING BULLET HOLES FOR SPEED (DISABLING GC)") debug.SetGCPercent(-1) log.Println("USING LOAD SPEC", loadSpec) log.Println("USING QUERY SPEC", querySpec) start := time.Now() // We can load and query at the same time if *sybil.FLAGS.LOAD_AND_QUERY { count = t.LoadAndQueryRecords(&loadSpec, &querySpec) end := time.Now() log.Println("LOAD AND QUERY RECORDS TOOK", end.Sub(start)) querySpec.PrintResults() if sybil.FLAGS.ANOVA_ICC != nil && *sybil.FLAGS.ANOVA_ICC { querySpec.CalculateICC() } } } if *sybil.FLAGS.PRINT_INFO { t := sybil.GetTable(table) sybil.FLAGS.LOAD_AND_QUERY = &FALSE t.LoadRecords(nil) t.PrintColInfo() } }
func RunTrimCmdLine() { MB_LIMIT := flag.Int("mb", 0, "max table size in MB") DELETE_BEFORE := flag.Int("before", 0, "delete blocks with data older than TIMESTAMP") DELETE := flag.Bool("delete", false, "delete blocks? be careful! will actually delete your data!") REALLY := flag.Bool("really", false, "don't prompt before deletion") sybil.FLAGS.TIME_COL = flag.String("time-col", "", "which column to treat as a timestamp [REQUIRED]") flag.Parse() if *sybil.FLAGS.TABLE == "" || *sybil.FLAGS.TIME_COL == "" { flag.PrintDefaults() return } if *sybil.FLAGS.PROFILE { profile := sybil.RUN_PROFILER() defer profile.Start().Stop() } sybil.DELETE_BLOCKS_AFTER_QUERY = false t := sybil.GetTable(*sybil.FLAGS.TABLE) if t.LoadTableInfo() == false { log.Println("Warning: Couldn't read table info, exiting early") return } loadSpec := t.NewLoadSpec() loadSpec.Int(*sybil.FLAGS.TIME_COL) trimSpec := sybil.TrimSpec{} trimSpec.DeleteBefore = int64(*DELETE_BEFORE) trimSpec.MBLimit = int64(*MB_LIMIT) to_trim := t.TrimTable(&trimSpec) log.Println("FOUND", len(to_trim), "CANDIDATE BLOCKS FOR TRIMMING") if len(to_trim) > 0 { for _, b := range to_trim { fmt.Println(b.Name) } } if *DELETE { if *REALLY != true { // TODO: prompt for deletion fmt.Println("DELETE THE ABOVE BLOCKS? (Y/N)") if askConfirmation() == false { log.Println("ABORTING") return } } log.Println("DELETING CANDIDATE BLOCKS") for _, b := range to_trim { log.Println("DELETING", b.Name) if len(b.Name) > 5 { os.RemoveAll(b.Name) } else { log.Println("REFUSING TO DELETE", b.Name) } } } }