func main() { doUtf8 := true doRaw := true if util.IsTerminal(os.Stdin) { syntax() return } for _, arg := range os.Args[1:] { switch arg { case "-r": doUtf8 = false case "-u": doRaw = false default: syntax() return } } if !doUtf8 && !doRaw { syntax() return } data, err := ioutil.ReadAll(os.Stdin) util.CheckErr(err) str := string(data) if doRaw { fmt.Println("[[[RAW]]]") n := 0 for i, p := range textcat.GetPatterns(str, false) { if i == textcat.MaxPatterns { break } n += 1 fmt.Printf("%s\t%d\n", p.S, p.I) } if n < textcat.MaxPatterns { fmt.Fprintf(os.Stderr, "Warning: there are less than %d raw patterns\n", textcat.MaxPatterns) } } if doUtf8 { fmt.Println("[[[UTF8]]]") n := 0 for i, p := range textcat.GetPatterns(str, true) { if i == textcat.MaxPatterns { break } n += 1 fmt.Printf("%s\t%d\n", p.S, p.I) } if n < textcat.MaxPatterns { fmt.Fprintf(os.Stderr, "Warning: there are less than %d utf8 patterns\n", textcat.MaxPatterns) } } }
func main() { parser := flags.NewParser(&opts, flags.Default) args, err := parser.Parse() if err != nil { os.Exit(1) } parser.Name = "pt" parser.Usage = "[OPTIONS] PATTERN [PATH]" if !terminal.IsTerminal(os.Stdout) { opts.NoColor = true opts.NoGroup = true } if len(args) == 0 { parser.WriteHelp(os.Stdout) os.Exit(1) } var root = "." if len(args) == 2 { root = args[1] } searcher := search.Searcher{root, args[0], &opts} searcher.Search() }
func main() { termcols = getTermWidth() flag.Parse() buildPatterns() if len(*file) > 0 { f, err := os.Create(*file) if err != nil { log.Fatal("Error opening output file: "+*file, err) } outputFile = f } if *input == "auto" { if !util.IsTerminal(os.Stdin) { *input = "stdin" } else { *input = "adb" } } switch *input { case "adb": testEnv() deviceId, err := getDeviceId() if err != nil { log.Fatal("Error: ", err) return } if deviceId == "????????????" { log.Fatal("No permissions for device") return } fmt.Printf("Selected device: %s\n\n", deviceId) getPids() adbReadlog(deviceId) case "stdin": fileReadlog(os.Stdin) default: file, err := os.Open(*input) if err != nil { log.Fatal("Error: ", err) return } fileReadlog(file) } }
func main() { if len(os.Args) != 2 || (os.Args[1] != "en" && os.Args[1] != "nl") || util.IsTerminal(os.Stdin) { fmt.Printf(` Usage: %s language < text ... where language is one of: en nl `, os.Args[0]) return } scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { fmt.Println(untok(scanner.Text(), os.Args[1])) } if err := scanner.Err(); err != nil { log.Fatal(err) } }
// useStdin determines if we're using the terminal's stdin or not func useStdin() bool { return !util.IsTerminal(os.Stdin) }
func (c *ShuttleConfig) UseStdin() bool { return !util.IsTerminal(os.Stdin) }
func main() { var prefix = flag.String("prefix", ".", "Prefix used to match lines") var delimeter = flag.String("delimeter", "=", "Prefix used to match lines") flag.Parse() if util.IsTerminal(os.Stdin) { fmt.Println("We only support streams from unix pipes at the moment, please pipe something into pfdb") return } err := termbox.Init() if err != nil { panic(err) } defer termbox.Close() termbox.SetInputMode(termbox.InputEsc | termbox.InputMouse) state := ProgramState{ 0, []string{}, []int64{}, map[string]*HistoricalVariable{}, []string{}, 1, 0, 0, int64(0), true, time.Now().UnixNano(), } stdin_channel := make(chan string) input_channel := make(chan termbox.Event) go func() { bio := bufio.NewReader(os.Stdin) for { line, _, err := bio.ReadLine() if err != nil { break } stdin_channel <- string(line) } }() go func() { for { input_channel <- termbox.PollEvent() } }() draw_all(&state) loop: for { select { case line := <-stdin_channel: fmt.Println("A LINE") timestamp := time.Now().UnixNano() if len(line) > 0 && strings.HasPrefix(line, *prefix) { slices := strings.SplitN(strings.TrimPrefix(string(line), *prefix), *delimeter, 2) if len(slices) == 2 { existing := state.log[slices[0]] if existing == nil { existing = &HistoricalVariable{slices[0], []string{}, []int64{}, 0} state.log[slices[0]] = existing state.log_keys = append(state.log_keys, slices[0]) } existing.Values = append(existing.Values, slices[1]) existing.Timestamp = append(existing.Timestamp, timestamp) // Update the existing thing to realtime if state.realtime { existing.Focused = len(existing.Values) - 1 } } } state.buffer = append(state.buffer, string(line)) state.timestamps = append(state.timestamps, timestamp) if state.realtime { _, height := termbox.Size() state.offset = max(len(state.buffer)-height+1, 0) state.selected_buffer_line = len(state.buffer) - 1 } draw_all(&state) case ev := <-input_channel: switch ev.Type { case termbox.EventKey: switch ev.Key { case termbox.KeyEsc: break loop case termbox.KeyArrowDown: switch state.focused_pane { case 0: state.realtime = false state.selected_buffer_line = min(state.selected_buffer_line+1, len(state.buffer)-1) _, height := termbox.Size() state.timecursor = state.timestamps[state.selected_buffer_line] state.offset = max(state.selected_buffer_line-height+2, 0) seek_to_time(state.timecursor, &state.log) case 1: state.selected_index = min(state.selected_index+1, len(state.log)-1) } case termbox.KeyArrowUp: switch state.focused_pane { case 0: state.realtime = false state.selected_buffer_line = max(0, state.selected_buffer_line-1) _, height := termbox.Size() state.timecursor = state.timestamps[state.selected_buffer_line] state.offset = max(state.selected_buffer_line-height+2, 0) seek_to_time(state.timecursor, &state.log) case 1: state.selected_index = max(0, state.selected_index-1) } case termbox.KeyArrowLeft: if state.focused_pane == 1 { state.realtime = false // Figure out where the new timecursor should be v := state.log[state.log_keys[state.selected_index]] v.Focused = max(0, min(len(v.Values)-1, v.Focused-1)) state.timecursor = v.Timestamp[v.Focused] // Update _, height := termbox.Size() state.selected_buffer_line = scroll_to_time(state.timecursor, state.selected_buffer_line, &state.timestamps) state.offset = max(state.selected_buffer_line-height+1, 0) seek_to_time(state.timecursor, &state.log) } case termbox.KeyArrowRight: if state.focused_pane == 1 { state.realtime = false // Figure out where the new timecursor should be v := state.log[state.log_keys[state.selected_index]] v.Focused = max(0, min(len(v.Values)-1, v.Focused+1)) state.timecursor = v.Timestamp[v.Focused] // Update _, height := termbox.Size() state.selected_buffer_line = scroll_to_time(state.timecursor, state.selected_buffer_line, &state.timestamps) state.offset = max(state.selected_buffer_line-height+1, 0) seek_to_time(state.timecursor, &state.log) } case termbox.KeySpace: state.focused_pane = (state.focused_pane + 1) % 2 case termbox.KeyCtrlR: state.realtime = true for _, v := range state.log { v.Focused = len(v.Values) - 1 } state.selected_buffer_line = len(state.buffer) - 1 } draw_all(&state) case termbox.EventMouse: case termbox.EventResize: draw_all(&state) } } } }
func main() { now := time.Now() buffer[DPRL].Grow(50000) buffer[SENT].Grow(50000) buffer[WORD].Grow(50000) buffer[FILE].Grow(50000) buffer[ARCH].Grow(50000) buffer[META].Grow(50000) buffer[MIDX].Grow(50000) db_makeindex = true db_updatestatus = true for len(os.Args) > 1 { if os.Args[1] == "-a" { db_append = true db_overwrite = false } else if os.Args[1] == "-w" { db_append = false db_overwrite = true } else if os.Args[1] == "-i" { db_makeindex = false } else if os.Args[1] == "-s" { db_updatestatus = false } else if os.Args[1] == "-p" && len(os.Args) > 2 { db_strippath = true os.Args = append(os.Args[:1], os.Args[2:]...) rePath = regexp.MustCompile(os.Args[1]) } else if os.Args[1] == "-d" { db_decode = true } else { break } os.Args = append(os.Args[:1], os.Args[2:]...) } if len(os.Args) != 5 || util.IsTerminal(os.Stdin) { fmt.Printf(` Syntax: %s [-a] [-w] [-i] [-s] [-p regexp] [-d] id description owner public < bestandnamen Opties: -a : toevoegen aan bestaande database -w : bestaande database overschrijven -i : geen tabel van woord naar lemmas aanmaken -s : status niet bijwerken als klaar -p : prefix die van bestandnaam wordt gestript voor label -d : bestandnaam decoderen voor label id: description: owner: 'none' of een e-mailadres public: 0 (private) of 1 (public) `, os.Args[0]) return } prefix = strings.TrimSpace(os.Args[1]) desc = strings.TrimSpace(os.Args[2]) owner = strings.TrimSpace(os.Args[3]) public = strings.TrimSpace(os.Args[4]) paqudir := os.Getenv("PAQU") if paqudir == "" { if DefaultPaquDir != "" { paqudir = DefaultPaquDir } else { paqudir = filepath.Join(os.Getenv("HOME"), ".paqu") } } _, err := TomlDecodeFile(filepath.Join(paqudir, "setup.toml"), &Cfg) util.CheckErr(err) if desc == "" { util.CheckErr(fmt.Errorf("De omschrijving mag niet leeg zijn")) } if owner != "none" && strings.Index(owner, "@") < 0 { util.CheckErr(fmt.Errorf("De eigenaar moet 'none' zijn of een e-mailadres")) } if prefix == "" { util.CheckErr(fmt.Errorf("De id mag niet leeg zijn")) } for _, c := range prefix { if c < 'a' || c > 'z' { util.CheckErr(fmt.Errorf("Ongeldige tekens in '%s'. Alleen kleine letters a tot z mogen.", prefix)) } } db = connect() defer func() { fmt.Println("Verbinding met database wordt gesloten...") util.CheckErr(db.Close()) }() // // kijk of de database al bestaat // rows, err := db.Query("SELECT `begin` FROM `" + Cfg.Prefix + "_c_" + prefix + "_deprel` LIMIT 0, 1;") if err == nil && rows.Next() { rows.Close() if !(db_append || db_overwrite) { util.CheckErr(fmt.Errorf("De database bestaat al, en er is geen optie -a of -w")) } db_exists = true if db_append { rows, err := db.Query("SELECT MAX(id) FROM " + Cfg.Prefix + "_c_" + prefix + "_arch") util.CheckErr(err) if rows.Next() { if rows.Scan(&toparch) != nil { toparch = -1 } rows.Close() } rows, err = db.Query("SELECT MAX(id) FROM " + Cfg.Prefix + "_c_" + prefix + "_file") util.CheckErr(err) if rows.Next() { if rows.Scan(&topfile) != nil { topfile = -1 } rows.Close() } rows, err = db.Query("SELECT MAX(id) FROM " + Cfg.Prefix + "_c_" + prefix + "_midx") util.CheckErr(err) if rows.Next() { if rows.Scan(&topmidx) != nil { topmidx = -1 } rows.Close() } fmt.Println("Verwijderen indexen uit " + Cfg.Prefix + "_c_" + prefix + "_deprel ...") db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_deprel DROP INDEX word, DROP INDEX lemma, DROP INDEX root, DROP INDEX postag, DROP INDEX rel, DROP INDEX hword, DROP INDEX hlemma, DROP INDEX hroot, DROP INDEX hpostag, DROP INDEX file, DROP INDEX arch;`) fmt.Println("Verwijderen indexen uit " + Cfg.Prefix + "_c_" + prefix + "_sent ...") db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_sent DROP INDEX file, DROP INDEX arch, DROP INDEX lbl;`) fmt.Println("Verwijderen index uit " + Cfg.Prefix + "_c_" + prefix + "_file ...") db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_file DROP INDEX id`) fmt.Println("Verwijderen index uit " + Cfg.Prefix + "_c_" + prefix + "_arch ...") db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_arch DROP INDEX id;`) fmt.Println("Verwijderen indexen uit " + Cfg.Prefix + "_c_" + prefix + "_meta ...") db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_meta DROP INDEX id, DROP INDEX file, DROP INDEX arch, DROP INDEX tval, DROP INDEX ival, DROP INDEX fval, DROP INDEX dval, DROP INDEX idx;`) fmt.Println("Verwijderen indexen uit " + Cfg.Prefix + "_c_" + prefix + "_midx ...") db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_midx DROP INDEX id, DROP INDEX name;`) } } share := "PRIVATE" if public == "1" { share = "PUBLIC" } db.Exec(fmt.Sprintf("INSERT `%s_info` (`id`,`description`,`msg`,`params`) VALUES (%q,\"\",\"\",\"\");", Cfg.Prefix, prefix)) // negeer fout _, err = db.Exec(fmt.Sprintf("UPDATE `%s_info` SET `description` = %q, `owner` = %q, `status` = \"WORKING\", `shared` = %q WHERE `id` = %q", Cfg.Prefix, desc, owner, share, prefix)) util.CheckErr(err) _, err = db.Exec("DELETE FROM " + Cfg.Prefix + "_corpora WHERE `prefix` = \"" + prefix + "\";") util.CheckErr(err) // oude tabellen weggooien if !db_exists || db_overwrite { _, err := db.Exec(fmt.Sprintf( "DROP TABLE IF EXISTS `%s_c_%s_deprel`, `%s_c_%s_sent`, `%s_c_%s_file`, `%s_c_%s_arch`, `%s_c_%s_meta`, `%s_c_%s_midx`;", Cfg.Prefix, prefix, Cfg.Prefix, prefix, Cfg.Prefix, prefix, Cfg.Prefix, prefix, Cfg.Prefix, prefix, Cfg.Prefix, prefix)) util.CheckErr(err) // nieuwe tabellen aanmaken _, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_deprel ( idd int NOT NULL AUTO_INCREMENT PRIMARY KEY, word varchar(128) NOT NULL, lemma varchar(128) NOT NULL, root varchar(128) NOT NULL, postag varchar(64) NOT NULL, rel varchar(64) NOT NULL, hword varchar(128) NOT NULL, hlemma varchar(128) NOT NULL, hroot varchar(128) NOT NULL, hpostag varchar(64) NOT NULL, arch int NOT NULL, file int NOT NULL, begin int NOT NULL, end int NOT NULL, hbegin int NOT NULL, hend int NOT NULL, mark varchar(128) NOT NULL) DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_unicode_ci;`) util.CheckErr(err) _, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_sent ( arch int NOT NULL, file int NOT NULL, sent text NOT NULL, lbl varchar(190) NOT NULL) DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_unicode_ci;`) util.CheckErr(err) _, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_file ( id int NOT NULL, file varchar(260) NOT NULL) DEFAULT CHARACTER SET utf8;`) util.CheckErr(err) _, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_arch ( id int NOT NULL, arch varchar(260) NOT NULL) DEFAULT CHARACTER SET utf8;`) util.CheckErr(err) _, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_midx ( id int NOT NULL, type enum('TEXT','INT','FLOAT','DATE','DATETIME') NOT NULL DEFAULT 'TEXT', name varchar(128) NOT NULL) DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_unicode_ci;`) util.CheckErr(err) _, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_meta ( id int NOT NULL, arch int NOT NULL, file int NOT NULL, tval varchar(128) NOT NULL DEFAULT "", ival int NOT NULL DEFAULT 0, fval float NOT NULL DEFAULT 0.0, dval datetime NOT NULL DEFAULT "1000-01-01 00:00:00", idx int NOT NULL DEFAULT -1) DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_unicode_ci;`) util.CheckErr(err) } // // Bestandnamen van stdin inlezen en verwerken. // scanner := bufio.NewScanner(os.Stdin) for scanner.Scan() { filename := strings.TrimSpace(scanner.Text()) if filename == "" { continue } var err error filename, err = filepath.Abs(filename) util.CheckErr(err) filename = filepath.Clean(filename) lowername := strings.ToLower(filename) if strings.HasSuffix(lowername, ".xml") { data, err := ioutil.ReadFile(filename) util.CheckErr(err) do_data("", filename, data) } else if strings.HasSuffix(lowername, ".xml.gz") { fp, err := os.Open(filename) util.CheckErr(err) r, err := gzip.NewReader(fp) util.CheckErr(err) data, err := ioutil.ReadAll(r) r.Close() fp.Close() util.CheckErr(err) do_data("", filename[:len(filename)-3], data) } else if has_dbxml && strings.HasSuffix(lowername, ".dact") { do_dact(filename) } else if strings.HasSuffix(lowername, ".data.dz") { reader, err := compactcorpus.Open(filename) util.CheckErr(err) fmt.Println(">>>", filename) docs, err := reader.NewRange() util.CheckErr(err) for docs.HasNext() { name, xml := docs.Next() do_data(filename, name, xml) } showmemstats() } else { util.CheckErr(fmt.Errorf("Ongeldige extensie voor bestand '%s'", filename)) } } util.CheckErr(scanner.Err()) // stuur laatste data uit buffers naar de database buf_flush(DPRL) buf_flush(SENT) buf_flush(FILE) buf_flush(ARCH) buf_flush(META) buf_flush(MIDX) _, err = db.Exec("COMMIT;") util.CheckErr(err) fmt.Println("Tijd:", time.Now().Sub(now)) if !db_makeindex { sizes() return } fmt.Println("Aanmaken indexen op " + Cfg.Prefix + "_c_" + prefix + "_deprel ...") _, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_deprel ADD INDEX (word), ADD INDEX (lemma), ADD INDEX (root), ADD INDEX (postag), ADD INDEX (rel), ADD INDEX (hword), ADD INDEX (hlemma), ADD INDEX (hroot), ADD INDEX (hpostag), ADD INDEX (file), ADD INDEX (arch);`) util.CheckErr(err) fmt.Println("Aanmaken indexen op " + Cfg.Prefix + "_c_" + prefix + "_sent ...") _, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_sent ADD INDEX (file), ADD INDEX (arch), ADD INDEX (lbl);`) util.CheckErr(err) fmt.Println("Aanmaken index op " + Cfg.Prefix + "_c_" + prefix + "_file ...") _, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_file ADD UNIQUE INDEX (id)`) util.CheckErr(err) fmt.Println("Aanmaken index op " + Cfg.Prefix + "_c_" + prefix + "_arch ...") _, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_arch ADD UNIQUE INDEX (id);`) util.CheckErr(err) fmt.Println("Aanmaken indexen op " + Cfg.Prefix + "_c_" + prefix + "_midx ...") _, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_midx ADD INDEX (name), ADD UNIQUE INDEX (id);`) util.CheckErr(err) fmt.Println("Aanmaken indexen op " + Cfg.Prefix + "_c_" + prefix + "_meta ...") _, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_meta ADD INDEX (id), ADD INDEX (file), ADD INDEX (arch), ADD INDEX (tval), ADD INDEX (ival), ADD INDEX (fval), ADD INDEX (dval), ADD INDEX (idx);`) util.CheckErr(err) // tijd voor aanmaken tabellen <prefix>_deprel en <prefix>_sent fmt.Println("Tijd:", time.Now().Sub(now)) showmemstats() // // tabel <prefix>_word aanmaken // _, err = db.Exec(fmt.Sprintf( "DROP TABLE IF EXISTS `%s_c_%s_word`;", Cfg.Prefix, prefix)) util.CheckErr(err) _, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_word ( word varchar(128) NOT NULL, lemma varchar(1024) NOT NULL) DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_unicode_ci;`) util.CheckErr(err) /* Inlezen van woorden die kandidaat zijn voor het zoeken via een lemma. De lijst moet in één keer ingelezen worden, omdat er anders door een time-out slechts een klein deel van de woorden wordt verwerkt. Nu maar hopen dat de complete woordenlijst in het geheugen past. */ fmt.Println("Tellingen van woorden opvragen ...") rows, err = db.Query("SELECT count(*), `word` FROM `" + Cfg.Prefix + "_c_" + prefix + "_deprel` WHERE `postag` IN (\"adj\", \"n\", \"ww\") GROUP BY `word` HAVING count(*) >= 10 ORDER BY `word`") util.CheckErr(err) woorden := make([]string, 0) var woord string for rows.Next() { var i int util.CheckErr(rows.Scan(&i, &woord)) woorden = append(woorden, woord) } util.CheckErr(rows.Err()) // zoek de lemma's bij elk woord fmt.Println("Zoeken naar lemma's bij woorden ...") for idx, woord := range woorden { var s, p string if n := len(woorden) - idx; n%100 == 0 { fmt.Printf(" %d \r", n) } lemmas := make([]string, 0) /* word -> lemma Deze stap is simpel: kijk voor elk woord met welk lemma het voorkomt. Dit werkt prima voor LassyDevelop */ rows, err := db.Query(fmt.Sprintf( "SELECT `lemma` FROM `"+Cfg.Prefix+"_c_"+prefix+"_deprel` WHERE `word` = %q GROUP BY `lemma`;", woord)) util.CheckErr(err) for rows.Next() { util.CheckErr(rows.Scan(&s)) lemmas = append(lemmas, s) } util.CheckErr(rows.Err()) /* word -> root+postag -> lemma In LassyLarge zijn vaak geen goede lemma's opgenomen. Het woord 'mannen' geeft lemma 'mannen'. De oplossing hier is te zoeken via root. Het woord 'mannen' geeft root 'man'. De root 'man' geeft lemma's 'man' en 'mannen'. Gevonden roots worden alleen gebruikt als ze ook dezelfde postag hebben. Dit voorkomt dat je voor het woord 'fietst' (ww) het lemma 'fiets' (n) krijgt. */ roots := make([][2]string, 0) rows, err = db.Query(fmt.Sprintf( "SELECT `root`,`postag` FROM `"+Cfg.Prefix+"_c_"+prefix+"_deprel` WHERE `word` = %q GROUP BY `root`,`postag`;", woord)) util.CheckErr(err) for rows.Next() { util.CheckErr(rows.Scan(&s, &p)) roots = append(roots, [2]string{s, p}) } util.CheckErr(rows.Err()) for _, root := range roots { rows, err := db.Query(fmt.Sprintf( "SELECT `lemma` FROM `"+Cfg.Prefix+"_c_"+prefix+"_deprel` WHERE `root` = %q AND `postag` = %q GROUP BY `lemma`;", root[0], root[1])) util.CheckErr(err) for rows.Next() { util.CheckErr(rows.Scan(&s)) if !has(lemmas, s) { lemmas = append(lemmas, s) } } util.CheckErr(rows.Err()) } /* stuur woord met lemma's naar de databasebuffer */ sort.Strings(lemmas) word_buf_put(woord, lemmas) } // stuur laatste data uit buffer naar de database buf_flush(WORD) _, err = db.Exec("COMMIT;") util.CheckErr(err) fmt.Println("Aanmaken index op " + Cfg.Prefix + "_c_" + prefix + "_word ...") _, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_word ADD UNIQUE INDEX (word);`) util.CheckErr(err) fmt.Println("Tijd:", time.Now().Sub(now)) // // ranges // fmt.Println("Ranges bepalen voor " + Cfg.Prefix + "_c_" + prefix + "_meta ...") _, err = db.Exec(fmt.Sprintf( "DROP TABLE IF EXISTS `%s_c_%s_mval`, %s_c_%s_minf; ", Cfg.Prefix, prefix, Cfg.Prefix, prefix)) util.CheckErr(err) _, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_mval ( id int NOT NULL DEFAULT 0, idx int NOT NULL DEFAULT 0, text varchar(260) NOT NULL DEFAULT 0, n int NOT NULL DEFAULT 0) DEFAULT CHARACTER SET utf8 DEFAULT COLLATE utf8_unicode_ci;`) util.CheckErr(err) _, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_minf ( id int NOT NULL DEFAULT 0, indexed boolean NOT NULL DEFAULT 1, size int NOT NULL DEFAULT 0, dmin datetime NOT NULL DEFAULT "1000-01-01 00:00:00", dmax datetime NOT NULL DEFAULT "1000-01-01 00:00:00", dtype int NOT NULL DEFAULT 0, fmin float NOT NULL DEFAULT 0.0, fstep float NOT NULL DEFAULT 0.0, imin int NOT NULL DEFAULT 0, istep int NOT NULL DEFAULT 0);`) util.CheckErr(err) metas := make([]string, 0) metat := make(map[string]string) metai := make(map[string]int) rows, err = db.Query(fmt.Sprintf("SELECT `id`,`name`,`type` FROM `%s_c_%s_midx` ORDER BY 2", Cfg.Prefix, prefix)) util.CheckErr(err) for rows.Next() { var i int var n, t string util.CheckErr(rows.Scan(&i, &n, &t)) metas = append(metas, n) metat[n] = t metai[n] = i } util.CheckErr(rows.Err()) for _, meta := range metas { idx := make(map[int]string) switch metat[meta] { case "TEXT": rows, err := db.Query(fmt.Sprintf( "SELECT DISTINCT `tval` FROM `%s_c_%s_meta` WHERE `id` = %d ORDER BY 1", Cfg.Prefix, prefix, metai[meta])) util.CheckErr(err) ix := 0 for rows.Next() { var s string util.CheckErr(rows.Scan(&s)) idx[ix] = s _, err = db.Exec(fmt.Sprintf( "UPDATE `%s_c_%s_meta` SET `idx` = %d WHERE `id` = %d AND `tval` = %q", Cfg.Prefix, prefix, ix, metai[meta], s)) util.CheckErr(err) ix++ } util.CheckErr(rows.Err()) case "INT": rows, err := db.Query(fmt.Sprintf( "SELECT MIN(`ival`), MAX(`ival`), COUNT(DISTINCT `ival`) FROM `%s_c_%s_meta` WHERE `id` = %d", Cfg.Prefix, prefix, metai[meta])) util.CheckErr(err) var v1, v2, vx int for rows.Next() { rows.Scan(&v1, &v2, &vx) } ir := newIrange(v1, v2, vx) indexed := 0 if ir.indexed { indexed = 1 } _, err = db.Exec(fmt.Sprintf( "INSERT `%s_c_%s_minf` (`id`,`imin`,`istep`,`indexed`,`size`) VALUES (%d,%d,%d,%d,%d)", Cfg.Prefix, prefix, metai[meta], ir.min, ir.step, indexed, len(ir.s))) util.CheckErr(err) rows, err = db.Query(fmt.Sprintf( "SELECT DISTINCT `ival` FROM `%s_c_%s_meta` WHERE `id` = %d", Cfg.Prefix, prefix, metai[meta])) util.CheckErr(err) var v int iis := make([][2]int, 0) for rows.Next() { util.CheckErr(rows.Scan(&v)) s, ix := ir.value(v) idx[ix] = s iis = append(iis, [2]int{ix, v}) } util.CheckErr(rows.Err()) for _, ii := range iis { _, err = db.Exec(fmt.Sprintf( "UPDATE `%s_c_%s_meta` SET `idx` = %d WHERE `id` = %d AND `ival` = %d", Cfg.Prefix, prefix, ii[0], metai[meta], ii[1])) util.CheckErr(err) } case "FLOAT": rows, err := db.Query(fmt.Sprintf( "SELECT MIN(`fval`), MAX(`fval`) FROM `%s_c_%s_meta` WHERE `id` = %d", Cfg.Prefix, prefix, metai[meta])) util.CheckErr(err) var v1, v2 float64 for rows.Next() { rows.Scan(&v1, &v2) } fr := newFrange(v1, v2) indexed := 0 if fr.indexed { indexed = 1 } _, err = db.Exec(fmt.Sprintf( "INSERT `%s_c_%s_minf` (`id`,`fmin`,`fstep`,`indexed`,`size`) VALUES (%d,%g,%g,%d,%d)", Cfg.Prefix, prefix, metai[meta], fr.min, fr.step, indexed, len(fr.s))) util.CheckErr(err) if fr.indexed { _, err = db.Exec(fmt.Sprintf( "UPDATE `%s_c_%s_meta` SET `idx` = FLOOR((`fval` - %g) / %g) WHERE `id` = %d", Cfg.Prefix, prefix, fr.min, fr.step, metai[meta])) util.CheckErr(err) } else { _, err = db.Exec(fmt.Sprintf( "UPDATE `%s_c_%s_meta` SET `idx` = 0 WHERE `id` = %d", Cfg.Prefix, prefix, metai[meta])) util.CheckErr(err) } rows, err = db.Query(fmt.Sprintf( "SELECT DISTINCT `idx` FROM `%s_c_%s_meta` WHERE `id` = %d", Cfg.Prefix, prefix, metai[meta])) util.CheckErr(err) for rows.Next() { var i int util.CheckErr(rows.Scan(&i)) idx[i] = fr.s[i] } util.CheckErr(rows.Err()) case "DATE", "DATETIME": dis := "0" if metat[meta] == "DATE" { dis = "COUNT(DISTINCT `dval`)" } rows, err := db.Query(fmt.Sprintf( "SELECT MIN(`dval`), MAX(`dval`), %s FROM `%s_c_%s_meta` WHERE `id` = %d", dis, Cfg.Prefix, prefix, metai[meta])) util.CheckErr(err) var v1, v2 time.Time var i int for rows.Next() { rows.Scan(&v1, &v2, &i) } dr := newDrange(v1, v2, i, metat[meta] == "DATETIME") indexed := 0 if dr.indexed { indexed = 1 } _, err = db.Exec(fmt.Sprintf( "INSERT `%s_c_%s_minf` (`id`,`dmin`,`dmax`,`dtype`,`indexed`,`size`) VALUES (%d,\"%04d-%02d-%02d %02d:%02d:%02d\",\"%04d-%02d-%02d %02d:%02d:%02d\",%d,%d,%d)", Cfg.Prefix, prefix, metai[meta], dr.min.Year(), dr.min.Month(), dr.min.Day(), dr.min.Hour(), dr.min.Minute(), dr.min.Second(), dr.max.Year(), dr.max.Month(), dr.max.Day(), dr.max.Hour(), dr.max.Minute(), dr.max.Second(), dr.r, indexed, len(dr.s))) util.CheckErr(err) rows, err = db.Query(fmt.Sprintf( "SELECT `dval` FROM `%s_c_%s_meta` WHERE `id` = %d", Cfg.Prefix, prefix, metai[meta])) util.CheckErr(err) var v time.Time for rows.Next() { util.CheckErr(rows.Scan(&v)) s, ix := dr.value(v) idx[ix] = s _, err = db.Exec(fmt.Sprintf( "UPDATE `%s_c_%s_meta` SET `idx` = %d WHERE `id` = %d AND `dval` = \"%04d-%02d-%02d %02d-%02d-%02d\"", Cfg.Prefix, prefix, ix, metai[meta], v.Year(), v.Month(), v.Day(), v.Hour(), v.Minute(), v.Second())) util.CheckErr(err) } util.CheckErr(rows.Err()) } _, err = db.Exec("COMMIT;") util.CheckErr(err) // zinnen waarvoor geen metadata is, die toevoegen _, err = db.Exec(fmt.Sprintf( "INSERT `%s_c_%s_meta` (`id`,`arch`,`file`,`idx`)"+ "SELECT DISTINCT %d, `arch`, `file`, 2147483647 FROM `%s_c_%s_sent` `s` WHERE NOT EXISTS ( "+ "SELECT `arch`, `file` FROM `%s_c_%s_meta` `m` WHERE `s`.`arch`=`m`.`arch` AND `s`.`file`=`m`.`file` AND `id`=%d )", Cfg.Prefix, prefix, metai[meta], Cfg.Prefix, prefix, Cfg.Prefix, prefix, metai[meta])) util.CheckErr(err) _, err = db.Exec("COMMIT;") util.CheckErr(err) // kijk of er echt metadata is toegevoegd rows, err = db.Query(fmt.Sprintf( "SELECT DISTINCT 1 FROM `%s_c_%s_meta` WHERE `id`=%d AND `idx`=2147483647", Cfg.Prefix, prefix, metai[meta])) util.CheckErr(err) for rows.Next() { idx[2147483647] = "" } util.CheckErr(rows.Err()) for ix := range idx { _, err = db.Exec(fmt.Sprintf( "INSERT `%s_c_%s_mval` (`id`,`idx`,`text`) VALUES (%d,%d,%q)", Cfg.Prefix, prefix, metai[meta], ix, idx[ix])) util.CheckErr(err) } _, err = db.Exec("COMMIT;") util.CheckErr(err) } fmt.Println("Aanmaken indexen op " + Cfg.Prefix + "_c_" + prefix + "_mval ...") _, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_mval ADD INDEX (id), ADD INDEX (idx);`) util.CheckErr(err) fmt.Println("Aanmaken index op " + Cfg.Prefix + "_c_" + prefix + "_minf ...") _, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_minf ADD INDEX (id);`) util.CheckErr(err) fmt.Println("Telling van ranges ...") for _, meta := range metas { sums := make(map[int]int) rows, err := db.Query(fmt.Sprintf( "SELECT COUNT(`idx`),`idx` FROM `%s_c_%s_meta` WHERE `id` = %d GROUP BY `idx`", Cfg.Prefix, prefix, metai[meta])) util.CheckErr(err) for rows.Next() { var c, i int util.CheckErr(rows.Scan(&c, &i)) sums[i] = c } util.CheckErr(rows.Err()) for s := range sums { _, err = db.Exec(fmt.Sprintf( "UPDATE `%s_c_%s_mval` SET `n` = %d WHERE `id` = %d AND `idx` = %d", Cfg.Prefix, prefix, sums[s], metai[meta], s)) util.CheckErr(err) } _, err = db.Exec("COMMIT;") util.CheckErr(err) } // // zet info over corpus in de database // lines := 0 rows, err = db.Query("SELECT COUNT(*) FROM " + Cfg.Prefix + "_c_" + prefix + "_sent") util.CheckErr(err) if rows.Next() { util.CheckErr(rows.Scan(&lines)) rows.Close() } hasmeta := 0 if len(metas) > 0 { hasmeta = 1 } else { db.Exec(fmt.Sprintf( "DROP TABLE IF EXISTS `%s_c_%s_meta`, `%s_c_%s_midx`, `%s_c_%s_minf`, `%s_c_%s_mval`;", Cfg.Prefix, prefix, Cfg.Prefix, prefix, Cfg.Prefix, prefix, Cfg.Prefix, prefix)) } if db_updatestatus { _, err = db.Exec(fmt.Sprintf("UPDATE `%s_info` SET `status` = \"FINISHED\", `nline` = %d, `active` = NOW(), `hasmeta` = %d WHERE `id` = %q", Cfg.Prefix, lines, hasmeta, prefix)) } else { _, err = db.Exec(fmt.Sprintf("UPDATE `%s_info` SET `nline` = %d, `active` = NOW(), `hasmeta` = %d WHERE `id` = %q", Cfg.Prefix, lines, hasmeta, prefix)) } util.CheckErr(err) user := owner if public == "1" { user = "******" } _, err = db.Exec(fmt.Sprintf("INSERT `%s_corpora` (`user`, `prefix`) VALUES (%q, %q);", Cfg.Prefix, user, prefix)) util.CheckErr(err) _, err = db.Exec("COMMIT;") util.CheckErr(err) //fmt.Println("Bijwerken menu's voor postag, rel en hpostag ...") //tags() // totale tijd fmt.Println("Tijd:", time.Now().Sub(now)) showmemstats() sizes() }
func main() { if runVersion { fmt.Fprintf(os.Stdout, "%s (go-gherkin %s)\n", VERSION, gherkin.VERSION) return } if inputPath != "" { inputReader, err = os.Open(inputPath) if err != nil { usageErr(err) return } } else { if !util.IsTerminal(os.Stdin) { inputReader = os.Stdin } } if inputReader == nil { usageErr(fmt.Errorf("Missing input (stdin OR -in flag)")) return } if outputPath != "" { outputWriter, err = os.Create(outputPath) if err != nil { usageErr(err) return } } else { outputWriter = os.Stdout } if colorsYes { colors = true } else if colorsNo { colors = false } else if outputWriter == os.Stdout { colors = util.IsTerminal(os.Stdout) && runtime.GOOS != "windows" } fmtr := &formater.GherkinPrettyFormater{ AnsiColors: colors, CenterSteps: centerSteps, SkipSteps: skipSteps, SkipComments: skipComments, NoAlignComments: noCommentAlign, AlignCommentsMinIndent: commentAlignMinIndent, } log.Printf("Formater Settings: %+v", fmtr) bytes, _ := ioutil.ReadAll(inputReader) content := string(bytes) gp := gherkin.NewGherkinDOMParser(content) gp.Init() err = gp.Parse() if err != nil { usageErrWithVerboseHint(fmt.Errorf("Parsing failed. invalid gherkin")) if verbose { fmt.Fprintln(os.Stderr, err) } return } fmtr.Format(gp, outputWriter) }
func main() { flag.Parse() if *opt_f == "" && flag.NArg() == 0 && util.IsTerminal(os.Stdin) && !*opt_a { fmt.Fprintf(os.Stderr, "\nUsage: %s [args] [text]\n\nargs with default values are:\n\n", os.Args[0]) flag.PrintDefaults() fmt.Fprintf(os.Stderr, "\nIf both -f and text are missing, read from stdin\n\n") return } extras := make([]string, 0) tc := textcat.NewTextCat() if *opt_p != "" { for _, i := range strings.Split(*opt_p, ",") { name := strings.Split(path.Base(i), ".")[0] extras = append(extras, name) e := tc.AddLanguage(name, i) util.CheckErr(e) } } if *opt_z { if *opt_r || *opt_b { for _, extra := range extras { tc.EnableLanguages(extra + ".raw") } } if *opt_b || !*opt_r { for _, extra := range extras { tc.EnableLanguages(extra + ".utf8") } } } else { if *opt_r || *opt_b { tc.EnableAllRawLanguages() } if *opt_b || !*opt_r { tc.EnableAllUtf8Languages() } } if *opt_i != "" { tc.DisableLanguages(strings.Split(*opt_i, ",")...) } if *opt_a { for _, i := range tc.ActiveLanguages() { fmt.Println(i) } return } if *opt_l { var r *util.Reader if *opt_f != "" { fp, err := os.Open(*opt_f) util.CheckErr(err) defer fp.Close() r = util.NewReader(fp) } else if flag.NArg() > 0 { b := bytes.NewBufferString(strings.Join(flag.Args(), " ")) r = util.NewReader(b) } else { r = util.NewReader(os.Stdin) } for { line, err := r.ReadLineString() if err == io.EOF { break } util.CheckErr(err) l, err := tc.Classify(line) if err != nil { fmt.Print(err) } else { fmt.Print(strings.Join(l, ",")) } fmt.Println("\t" + line) } return } var text string if *opt_f != "" { t, err := ioutil.ReadFile(*opt_f) util.CheckErr(err) text = string(t) } else if flag.NArg() > 0 { text = strings.Join(flag.Args(), " ") } else { t, err := ioutil.ReadAll(os.Stdin) util.CheckErr(err) text = string(t) } l, e := tc.Classify(text) if e != nil { fmt.Println(e) } else { fmt.Println(strings.Join(l, "\n")) } }
func main() { fmt.Println("stdin: ", util.IsTerminal(os.Stdin)) fmt.Println("stdout:", util.IsTerminal(os.Stdout)) fmt.Println("stderr:", util.IsTerminal(os.Stderr)) }