Ejemplo n.º 1
0
func main() {
	doUtf8 := true
	doRaw := true

	if util.IsTerminal(os.Stdin) {
		syntax()
		return
	}

	for _, arg := range os.Args[1:] {
		switch arg {
		case "-r":
			doUtf8 = false
		case "-u":
			doRaw = false
		default:
			syntax()
			return
		}
	}
	if !doUtf8 && !doRaw {
		syntax()
		return
	}

	data, err := ioutil.ReadAll(os.Stdin)
	util.CheckErr(err)
	str := string(data)

	if doRaw {
		fmt.Println("[[[RAW]]]")
		n := 0
		for i, p := range textcat.GetPatterns(str, false) {
			if i == textcat.MaxPatterns {
				break
			}
			n += 1
			fmt.Printf("%s\t%d\n", p.S, p.I)
		}
		if n < textcat.MaxPatterns {
			fmt.Fprintf(os.Stderr, "Warning: there are less than %d raw patterns\n", textcat.MaxPatterns)
		}
	}

	if doUtf8 {
		fmt.Println("[[[UTF8]]]")
		n := 0
		for i, p := range textcat.GetPatterns(str, true) {
			if i == textcat.MaxPatterns {
				break
			}
			n += 1
			fmt.Printf("%s\t%d\n", p.S, p.I)
		}
		if n < textcat.MaxPatterns {
			fmt.Fprintf(os.Stderr, "Warning: there are less than %d utf8 patterns\n", textcat.MaxPatterns)
		}
	}

}
Ejemplo n.º 2
0
func main() {

	parser := flags.NewParser(&opts, flags.Default)
	args, err := parser.Parse()
	if err != nil {
		os.Exit(1)
	}

	parser.Name = "pt"
	parser.Usage = "[OPTIONS] PATTERN [PATH]"

	if !terminal.IsTerminal(os.Stdout) {
		opts.NoColor = true
		opts.NoGroup = true
	}

	if len(args) == 0 {
		parser.WriteHelp(os.Stdout)
		os.Exit(1)
	}

	var root = "."
	if len(args) == 2 {
		root = args[1]
	}

	searcher := search.Searcher{root, args[0], &opts}
	searcher.Search()
}
Ejemplo n.º 3
0
func main() {
	termcols = getTermWidth()

	flag.Parse()
	buildPatterns()

	if len(*file) > 0 {
		f, err := os.Create(*file)
		if err != nil {
			log.Fatal("Error opening output file: "+*file, err)
		}
		outputFile = f
	}

	if *input == "auto" {
		if !util.IsTerminal(os.Stdin) {
			*input = "stdin"
		} else {
			*input = "adb"
		}
	}

	switch *input {
	case "adb":
		testEnv()
		deviceId, err := getDeviceId()
		if err != nil {
			log.Fatal("Error: ", err)
			return
		}

		if deviceId == "????????????" {
			log.Fatal("No permissions for device")
			return
		}

		fmt.Printf("Selected device: %s\n\n", deviceId)

		getPids()

		adbReadlog(deviceId)

	case "stdin":
		fileReadlog(os.Stdin)

	default:
		file, err := os.Open(*input)
		if err != nil {
			log.Fatal("Error: ", err)
			return
		}

		fileReadlog(file)
	}

}
Ejemplo n.º 4
0
func main() {

	if len(os.Args) != 2 || (os.Args[1] != "en" && os.Args[1] != "nl") || util.IsTerminal(os.Stdin) {

		fmt.Printf(`
Usage: %s language < text

   ... where language is one of: en nl

`, os.Args[0])
		return
	}

	scanner := bufio.NewScanner(os.Stdin)
	for scanner.Scan() {
		fmt.Println(untok(scanner.Text(), os.Args[1]))
	}
	if err := scanner.Err(); err != nil {
		log.Fatal(err)
	}

}
Ejemplo n.º 5
0
// useStdin determines if we're using the terminal's stdin or not
func useStdin() bool {
	return !util.IsTerminal(os.Stdin)
}
Ejemplo n.º 6
0
func (c *ShuttleConfig) UseStdin() bool {
	return !util.IsTerminal(os.Stdin)
}
Ejemplo n.º 7
0
func main() {
	var prefix = flag.String("prefix", ".", "Prefix used to match lines")
	var delimeter = flag.String("delimeter", "=", "Prefix used to match lines")
	flag.Parse()

	if util.IsTerminal(os.Stdin) {
		fmt.Println("We only support streams from unix pipes at the moment, please pipe something into pfdb")
		return
	}

	err := termbox.Init()
	if err != nil {
		panic(err)
	}
	defer termbox.Close()
	termbox.SetInputMode(termbox.InputEsc | termbox.InputMouse)

	state := ProgramState{
		0,
		[]string{},
		[]int64{},
		map[string]*HistoricalVariable{},
		[]string{},
		1,
		0,
		0,
		int64(0),
		true,
		time.Now().UnixNano(),
	}

	stdin_channel := make(chan string)
	input_channel := make(chan termbox.Event)

	go func() {
		bio := bufio.NewReader(os.Stdin)
		for {
			line, _, err := bio.ReadLine()
			if err != nil {
				break
			}
			stdin_channel <- string(line)
		}
	}()

	go func() {
		for {
			input_channel <- termbox.PollEvent()
		}
	}()

	draw_all(&state)

loop:
	for {
		select {
		case line := <-stdin_channel:
			fmt.Println("A LINE")
			timestamp := time.Now().UnixNano()
			if len(line) > 0 && strings.HasPrefix(line, *prefix) {
				slices := strings.SplitN(strings.TrimPrefix(string(line), *prefix), *delimeter, 2)
				if len(slices) == 2 {
					existing := state.log[slices[0]]
					if existing == nil {
						existing = &HistoricalVariable{slices[0],
							[]string{},
							[]int64{},
							0}
						state.log[slices[0]] = existing
						state.log_keys = append(state.log_keys, slices[0])
					}
					existing.Values = append(existing.Values, slices[1])
					existing.Timestamp = append(existing.Timestamp, timestamp)
					// Update the existing thing to realtime
					if state.realtime {
						existing.Focused = len(existing.Values) - 1
					}
				}
			}
			state.buffer = append(state.buffer, string(line))
			state.timestamps = append(state.timestamps, timestamp)
			if state.realtime {
				_, height := termbox.Size()
				state.offset = max(len(state.buffer)-height+1, 0)
				state.selected_buffer_line = len(state.buffer) - 1
			}
			draw_all(&state)

		case ev := <-input_channel:
			switch ev.Type {
			case termbox.EventKey:
				switch ev.Key {
				case termbox.KeyEsc:
					break loop
				case termbox.KeyArrowDown:
					switch state.focused_pane {
					case 0:
						state.realtime = false
						state.selected_buffer_line = min(state.selected_buffer_line+1, len(state.buffer)-1)

						_, height := termbox.Size()
						state.timecursor = state.timestamps[state.selected_buffer_line]
						state.offset = max(state.selected_buffer_line-height+2, 0)
						seek_to_time(state.timecursor, &state.log)
					case 1:
						state.selected_index = min(state.selected_index+1, len(state.log)-1)
					}
				case termbox.KeyArrowUp:
					switch state.focused_pane {
					case 0:
						state.realtime = false
						state.selected_buffer_line = max(0, state.selected_buffer_line-1)

						_, height := termbox.Size()
						state.timecursor = state.timestamps[state.selected_buffer_line]
						state.offset = max(state.selected_buffer_line-height+2, 0)
						seek_to_time(state.timecursor, &state.log)
					case 1:
						state.selected_index = max(0, state.selected_index-1)
					}
				case termbox.KeyArrowLeft:
					if state.focused_pane == 1 {
						state.realtime = false

						// Figure out where the new timecursor should be
						v := state.log[state.log_keys[state.selected_index]]
						v.Focused = max(0, min(len(v.Values)-1, v.Focused-1))
						state.timecursor = v.Timestamp[v.Focused]

						// Update
						_, height := termbox.Size()
						state.selected_buffer_line = scroll_to_time(state.timecursor, state.selected_buffer_line, &state.timestamps)
						state.offset = max(state.selected_buffer_line-height+1, 0)
						seek_to_time(state.timecursor, &state.log)
					}
				case termbox.KeyArrowRight:
					if state.focused_pane == 1 {
						state.realtime = false

						// Figure  out where the new timecursor should be
						v := state.log[state.log_keys[state.selected_index]]
						v.Focused = max(0, min(len(v.Values)-1, v.Focused+1))
						state.timecursor = v.Timestamp[v.Focused]

						// Update
						_, height := termbox.Size()
						state.selected_buffer_line = scroll_to_time(state.timecursor, state.selected_buffer_line, &state.timestamps)
						state.offset = max(state.selected_buffer_line-height+1, 0)
						seek_to_time(state.timecursor, &state.log)
					}
				case termbox.KeySpace:
					state.focused_pane = (state.focused_pane + 1) % 2
				case termbox.KeyCtrlR:
					state.realtime = true
					for _, v := range state.log {
						v.Focused = len(v.Values) - 1
					}
					state.selected_buffer_line = len(state.buffer) - 1
				}
				draw_all(&state)
			case termbox.EventMouse:
			case termbox.EventResize:
				draw_all(&state)
			}
		}
	}
}
Ejemplo n.º 8
0
func main() {

	now := time.Now()

	buffer[DPRL].Grow(50000)
	buffer[SENT].Grow(50000)
	buffer[WORD].Grow(50000)
	buffer[FILE].Grow(50000)
	buffer[ARCH].Grow(50000)
	buffer[META].Grow(50000)
	buffer[MIDX].Grow(50000)

	db_makeindex = true
	db_updatestatus = true
	for len(os.Args) > 1 {
		if os.Args[1] == "-a" {
			db_append = true
			db_overwrite = false
		} else if os.Args[1] == "-w" {
			db_append = false
			db_overwrite = true
		} else if os.Args[1] == "-i" {
			db_makeindex = false
		} else if os.Args[1] == "-s" {
			db_updatestatus = false
		} else if os.Args[1] == "-p" && len(os.Args) > 2 {
			db_strippath = true
			os.Args = append(os.Args[:1], os.Args[2:]...)
			rePath = regexp.MustCompile(os.Args[1])
		} else if os.Args[1] == "-d" {
			db_decode = true
		} else {
			break
		}
		os.Args = append(os.Args[:1], os.Args[2:]...)
	}

	if len(os.Args) != 5 || util.IsTerminal(os.Stdin) {
		fmt.Printf(`
Syntax: %s [-a] [-w] [-i] [-s] [-p regexp] [-d] id description owner public < bestandnamen

Opties:

 -a : toevoegen aan bestaande database
 -w : bestaande database overschrijven
 -i : geen tabel van woord naar lemmas aanmaken
 -s : status niet bijwerken als klaar
 -p : prefix die van bestandnaam wordt gestript voor label
 -d : bestandnaam decoderen voor label

  id:
  description:
  owner:       'none' of een e-mailadres
  public:      0 (private) of 1 (public)


`, os.Args[0])
		return
	}

	prefix = strings.TrimSpace(os.Args[1])
	desc = strings.TrimSpace(os.Args[2])
	owner = strings.TrimSpace(os.Args[3])
	public = strings.TrimSpace(os.Args[4])

	paqudir := os.Getenv("PAQU")
	if paqudir == "" {
		if DefaultPaquDir != "" {
			paqudir = DefaultPaquDir
		} else {
			paqudir = filepath.Join(os.Getenv("HOME"), ".paqu")
		}
	}
	_, err := TomlDecodeFile(filepath.Join(paqudir, "setup.toml"), &Cfg)
	util.CheckErr(err)

	if desc == "" {
		util.CheckErr(fmt.Errorf("De omschrijving mag niet leeg zijn"))
	}

	if owner != "none" && strings.Index(owner, "@") < 0 {
		util.CheckErr(fmt.Errorf("De eigenaar moet 'none' zijn of een e-mailadres"))
	}

	if prefix == "" {
		util.CheckErr(fmt.Errorf("De id mag niet leeg zijn"))
	}

	for _, c := range prefix {
		if c < 'a' || c > 'z' {
			util.CheckErr(fmt.Errorf("Ongeldige tekens in '%s'. Alleen kleine letters a tot z mogen.", prefix))
		}
	}

	db = connect()
	defer func() {
		fmt.Println("Verbinding met database wordt gesloten...")
		util.CheckErr(db.Close())
	}()

	//
	// kijk of de database al bestaat
	//

	rows, err := db.Query("SELECT `begin` FROM `" + Cfg.Prefix + "_c_" + prefix + "_deprel` LIMIT 0, 1;")
	if err == nil && rows.Next() {
		rows.Close()
		if !(db_append || db_overwrite) {
			util.CheckErr(fmt.Errorf("De database bestaat al, en er is geen optie -a of -w"))
		}
		db_exists = true

		if db_append {
			rows, err := db.Query("SELECT MAX(id) FROM " + Cfg.Prefix + "_c_" + prefix + "_arch")
			util.CheckErr(err)
			if rows.Next() {
				if rows.Scan(&toparch) != nil {
					toparch = -1
				}
				rows.Close()
			}
			rows, err = db.Query("SELECT MAX(id) FROM " + Cfg.Prefix + "_c_" + prefix + "_file")
			util.CheckErr(err)
			if rows.Next() {
				if rows.Scan(&topfile) != nil {
					topfile = -1
				}
				rows.Close()
			}
			rows, err = db.Query("SELECT MAX(id) FROM " + Cfg.Prefix + "_c_" + prefix + "_midx")
			util.CheckErr(err)
			if rows.Next() {
				if rows.Scan(&topmidx) != nil {
					topmidx = -1
				}
				rows.Close()
			}
			fmt.Println("Verwijderen indexen uit " + Cfg.Prefix + "_c_" + prefix + "_deprel ...")
			db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_deprel
				DROP INDEX word,
				DROP INDEX lemma,
				DROP INDEX root,
				DROP INDEX postag,
				DROP INDEX rel,
				DROP INDEX hword,
				DROP INDEX hlemma,
				DROP INDEX hroot,
				DROP INDEX hpostag,
				DROP INDEX file,
				DROP INDEX arch;`)
			fmt.Println("Verwijderen indexen uit " + Cfg.Prefix + "_c_" + prefix + "_sent ...")
			db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_sent
				DROP INDEX file,
				DROP INDEX arch,
				DROP INDEX lbl;`)
			fmt.Println("Verwijderen index uit " + Cfg.Prefix + "_c_" + prefix + "_file ...")
			db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_file
				DROP INDEX id`)
			fmt.Println("Verwijderen index uit " + Cfg.Prefix + "_c_" + prefix + "_arch ...")
			db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_arch
				DROP INDEX id;`)
			fmt.Println("Verwijderen indexen uit " + Cfg.Prefix + "_c_" + prefix + "_meta ...")
			db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_meta
				DROP INDEX id,
				DROP INDEX file,
				DROP INDEX arch,
				DROP INDEX tval,
				DROP INDEX ival,
				DROP INDEX fval,
				DROP INDEX dval,
				DROP INDEX idx;`)
			fmt.Println("Verwijderen indexen uit " + Cfg.Prefix + "_c_" + prefix + "_midx ...")
			db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_midx
				DROP INDEX id,
				DROP INDEX name;`)
		}
	}

	share := "PRIVATE"
	if public == "1" {
		share = "PUBLIC"
	}

	db.Exec(fmt.Sprintf("INSERT `%s_info` (`id`,`description`,`msg`,`params`) VALUES (%q,\"\",\"\",\"\");", Cfg.Prefix, prefix)) // negeer fout
	_, err = db.Exec(fmt.Sprintf("UPDATE `%s_info` SET `description` = %q, `owner` = %q, `status` = \"WORKING\", `shared` = %q WHERE `id` = %q",
		Cfg.Prefix, desc, owner, share, prefix))
	util.CheckErr(err)

	_, err = db.Exec("DELETE FROM " + Cfg.Prefix + "_corpora WHERE `prefix` = \"" + prefix + "\";")
	util.CheckErr(err)

	// oude tabellen weggooien
	if !db_exists || db_overwrite {
		_, err := db.Exec(fmt.Sprintf(
			"DROP TABLE IF EXISTS `%s_c_%s_deprel`, `%s_c_%s_sent`, `%s_c_%s_file`, `%s_c_%s_arch`, `%s_c_%s_meta`, `%s_c_%s_midx`;",
			Cfg.Prefix,
			prefix,
			Cfg.Prefix,
			prefix,
			Cfg.Prefix,
			prefix,
			Cfg.Prefix,
			prefix,
			Cfg.Prefix,
			prefix,
			Cfg.Prefix,
			prefix))
		util.CheckErr(err)
		// nieuwe tabellen aanmaken
		_, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_deprel (
            idd     int          NOT NULL AUTO_INCREMENT PRIMARY KEY,
			word    varchar(128) NOT NULL,
			lemma   varchar(128) NOT NULL,
			root    varchar(128) NOT NULL,
			postag  varchar(64)  NOT NULL,
			rel     varchar(64)  NOT NULL,
			hword   varchar(128) NOT NULL,
			hlemma  varchar(128) NOT NULL,
			hroot   varchar(128) NOT NULL,
			hpostag varchar(64)  NOT NULL,
			arch    int          NOT NULL,
			file    int          NOT NULL,
			begin   int          NOT NULL,
			end     int          NOT NULL,
			hbegin  int          NOT NULL,
			hend    int          NOT NULL,
			mark    varchar(128) NOT NULL)
			DEFAULT CHARACTER SET utf8
			DEFAULT COLLATE utf8_unicode_ci;`)
		util.CheckErr(err)
		_, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_sent (
			arch int          NOT NULL,
			file int          NOT NULL,
			sent text         NOT NULL,
			lbl  varchar(190) NOT NULL)
			DEFAULT CHARACTER SET utf8
			DEFAULT COLLATE utf8_unicode_ci;`)
		util.CheckErr(err)
		_, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_file (
			id   int          NOT NULL,
			file varchar(260) NOT NULL)
			DEFAULT CHARACTER SET utf8;`)
		util.CheckErr(err)
		_, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_arch (
			id   int          NOT NULL,
			arch varchar(260) NOT NULL)
			DEFAULT CHARACTER SET utf8;`)
		util.CheckErr(err)
		_, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_midx (
			id   int          NOT NULL,
			type enum('TEXT','INT','FLOAT','DATE','DATETIME') NOT NULL DEFAULT 'TEXT',
			name varchar(128) NOT NULL)
			DEFAULT CHARACTER SET utf8
			DEFAULT COLLATE utf8_unicode_ci;`)
		util.CheckErr(err)
		_, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_meta (
			id   int          NOT NULL,
			arch int          NOT NULL,
			file int          NOT NULL,
			tval varchar(128) NOT NULL DEFAULT "",
			ival int          NOT NULL DEFAULT 0,
			fval float        NOT NULL DEFAULT 0.0,
			dval datetime     NOT NULL DEFAULT "1000-01-01 00:00:00",
			idx  int          NOT NULL DEFAULT -1)
			DEFAULT CHARACTER SET utf8
			DEFAULT COLLATE utf8_unicode_ci;`)
		util.CheckErr(err)
	}

	//
	// Bestandnamen van stdin inlezen en verwerken.
	//

	scanner := bufio.NewScanner(os.Stdin)
	for scanner.Scan() {
		filename := strings.TrimSpace(scanner.Text())
		if filename == "" {
			continue
		}
		var err error
		filename, err = filepath.Abs(filename)
		util.CheckErr(err)
		filename = filepath.Clean(filename)
		lowername := strings.ToLower(filename)
		if strings.HasSuffix(lowername, ".xml") {
			data, err := ioutil.ReadFile(filename)
			util.CheckErr(err)
			do_data("", filename, data)
		} else if strings.HasSuffix(lowername, ".xml.gz") {
			fp, err := os.Open(filename)
			util.CheckErr(err)
			r, err := gzip.NewReader(fp)
			util.CheckErr(err)
			data, err := ioutil.ReadAll(r)
			r.Close()
			fp.Close()
			util.CheckErr(err)
			do_data("", filename[:len(filename)-3], data)
		} else if has_dbxml && strings.HasSuffix(lowername, ".dact") {
			do_dact(filename)
		} else if strings.HasSuffix(lowername, ".data.dz") {
			reader, err := compactcorpus.Open(filename)
			util.CheckErr(err)
			fmt.Println(">>>", filename)
			docs, err := reader.NewRange()
			util.CheckErr(err)
			for docs.HasNext() {
				name, xml := docs.Next()
				do_data(filename, name, xml)
			}
			showmemstats()
		} else {
			util.CheckErr(fmt.Errorf("Ongeldige extensie voor bestand '%s'", filename))
		}
	}
	util.CheckErr(scanner.Err())

	// stuur laatste data uit buffers naar de database
	buf_flush(DPRL)
	buf_flush(SENT)
	buf_flush(FILE)
	buf_flush(ARCH)
	buf_flush(META)
	buf_flush(MIDX)

	_, err = db.Exec("COMMIT;")
	util.CheckErr(err)

	fmt.Println("Tijd:", time.Now().Sub(now))

	if !db_makeindex {
		sizes()
		return
	}

	fmt.Println("Aanmaken indexen op " + Cfg.Prefix + "_c_" + prefix + "_deprel ...")
	_, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_deprel
		ADD INDEX (word),
		ADD INDEX (lemma),
		ADD INDEX (root),
		ADD INDEX (postag),
		ADD INDEX (rel),
		ADD INDEX (hword),
		ADD INDEX (hlemma),
		ADD INDEX (hroot),
		ADD INDEX (hpostag),
		ADD INDEX (file),
		ADD INDEX (arch);`)
	util.CheckErr(err)

	fmt.Println("Aanmaken indexen op " + Cfg.Prefix + "_c_" + prefix + "_sent ...")
	_, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_sent
		ADD INDEX (file),
		ADD INDEX (arch),
		ADD INDEX (lbl);`)
	util.CheckErr(err)

	fmt.Println("Aanmaken index op " + Cfg.Prefix + "_c_" + prefix + "_file ...")
	_, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_file
		ADD UNIQUE INDEX (id)`)
	util.CheckErr(err)

	fmt.Println("Aanmaken index op " + Cfg.Prefix + "_c_" + prefix + "_arch ...")
	_, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_arch
		ADD UNIQUE INDEX (id);`)
	util.CheckErr(err)

	fmt.Println("Aanmaken indexen op " + Cfg.Prefix + "_c_" + prefix + "_midx ...")
	_, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_midx
		ADD INDEX (name),
		ADD UNIQUE INDEX (id);`)
	util.CheckErr(err)

	fmt.Println("Aanmaken indexen op " + Cfg.Prefix + "_c_" + prefix + "_meta ...")
	_, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_meta
		ADD INDEX (id),
		ADD INDEX (file),
		ADD INDEX (arch),
		ADD INDEX (tval),
		ADD INDEX (ival),
		ADD INDEX (fval),
		ADD INDEX (dval),
		ADD INDEX (idx);`)
	util.CheckErr(err)

	// tijd voor aanmaken tabellen <prefix>_deprel en <prefix>_sent
	fmt.Println("Tijd:", time.Now().Sub(now))

	showmemstats()

	//
	// tabel <prefix>_word aanmaken
	//

	_, err = db.Exec(fmt.Sprintf(
		"DROP TABLE IF EXISTS `%s_c_%s_word`;",
		Cfg.Prefix,
		prefix))
	util.CheckErr(err)
	_, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_word (
		word  varchar(128) NOT NULL,
		lemma varchar(1024) NOT NULL)
		DEFAULT CHARACTER SET utf8
		DEFAULT COLLATE utf8_unicode_ci;`)
	util.CheckErr(err)

	/*
		Inlezen van woorden die kandidaat zijn voor het zoeken via een lemma.
		De lijst moet in één keer ingelezen worden, omdat er anders door een time-out
		slechts een klein deel van de woorden wordt verwerkt.
		Nu maar hopen dat de complete woordenlijst in het geheugen past.
	*/
	fmt.Println("Tellingen van woorden opvragen ...")
	rows, err = db.Query("SELECT count(*), `word` FROM `" + Cfg.Prefix + "_c_" + prefix +
		"_deprel` WHERE `postag` IN (\"adj\", \"n\", \"ww\") GROUP BY `word` HAVING count(*) >= 10 ORDER BY `word`")
	util.CheckErr(err)
	woorden := make([]string, 0)
	var woord string
	for rows.Next() {
		var i int
		util.CheckErr(rows.Scan(&i, &woord))
		woorden = append(woorden, woord)
	}
	util.CheckErr(rows.Err())

	// zoek de lemma's bij elk woord
	fmt.Println("Zoeken naar lemma's bij woorden ...")
	for idx, woord := range woorden {
		var s, p string

		if n := len(woorden) - idx; n%100 == 0 {
			fmt.Printf(" %d   \r", n)
		}

		lemmas := make([]string, 0)

		/*
			word -> lemma
			Deze stap is simpel: kijk voor elk woord met welk lemma het voorkomt.
			Dit werkt prima voor LassyDevelop
		*/
		rows, err := db.Query(fmt.Sprintf(
			"SELECT `lemma` FROM `"+Cfg.Prefix+"_c_"+prefix+"_deprel` WHERE `word` = %q GROUP BY `lemma`;",
			woord))
		util.CheckErr(err)
		for rows.Next() {
			util.CheckErr(rows.Scan(&s))
			lemmas = append(lemmas, s)
		}
		util.CheckErr(rows.Err())

		/*
			word -> root+postag -> lemma
			In LassyLarge zijn vaak geen goede lemma's opgenomen. Het woord 'mannen' geeft lemma 'mannen'.
			De oplossing hier is te zoeken via root. Het woord 'mannen' geeft root 'man'. De root 'man' geeft
			lemma's 'man' en 'mannen'.
			Gevonden roots worden alleen gebruikt als ze ook dezelfde postag hebben. Dit voorkomt dat je voor het
			woord 'fietst' (ww) het lemma 'fiets' (n) krijgt.
		*/
		roots := make([][2]string, 0)
		rows, err = db.Query(fmt.Sprintf(
			"SELECT `root`,`postag` FROM `"+Cfg.Prefix+"_c_"+prefix+"_deprel` WHERE `word` = %q GROUP BY `root`,`postag`;",
			woord))
		util.CheckErr(err)
		for rows.Next() {
			util.CheckErr(rows.Scan(&s, &p))
			roots = append(roots, [2]string{s, p})
		}
		util.CheckErr(rows.Err())
		for _, root := range roots {
			rows, err := db.Query(fmt.Sprintf(
				"SELECT `lemma` FROM `"+Cfg.Prefix+"_c_"+prefix+"_deprel` WHERE `root` = %q AND `postag` = %q GROUP BY `lemma`;",
				root[0], root[1]))
			util.CheckErr(err)
			for rows.Next() {
				util.CheckErr(rows.Scan(&s))
				if !has(lemmas, s) {
					lemmas = append(lemmas, s)
				}
			}
			util.CheckErr(rows.Err())
		}

		/* stuur woord met lemma's naar de databasebuffer */
		sort.Strings(lemmas)
		word_buf_put(woord, lemmas)
	}

	// stuur laatste data uit buffer naar de database
	buf_flush(WORD)
	_, err = db.Exec("COMMIT;")
	util.CheckErr(err)

	fmt.Println("Aanmaken index op " + Cfg.Prefix + "_c_" + prefix + "_word ...")
	_, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_word
		ADD UNIQUE INDEX (word);`)
	util.CheckErr(err)

	fmt.Println("Tijd:", time.Now().Sub(now))

	//
	// ranges
	//

	fmt.Println("Ranges bepalen voor " + Cfg.Prefix + "_c_" + prefix + "_meta ...")

	_, err = db.Exec(fmt.Sprintf(
		"DROP TABLE IF EXISTS `%s_c_%s_mval`, %s_c_%s_minf; ",
		Cfg.Prefix,
		prefix,
		Cfg.Prefix,
		prefix))
	util.CheckErr(err)

	_, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_mval (
			id    int          NOT NULL DEFAULT 0,
			idx   int          NOT NULL DEFAULT 0,
			text  varchar(260) NOT NULL DEFAULT 0,
			n     int          NOT NULL DEFAULT 0)
			DEFAULT CHARACTER SET utf8
			DEFAULT COLLATE utf8_unicode_ci;`)
	util.CheckErr(err)

	_, err = db.Exec(`CREATE TABLE ` + Cfg.Prefix + "_c_" + prefix + `_minf (
			id      int      NOT NULL DEFAULT 0,
			indexed boolean  NOT NULL DEFAULT 1,
			size    int      NOT NULL DEFAULT 0,
			dmin    datetime NOT NULL DEFAULT "1000-01-01 00:00:00",
			dmax    datetime NOT NULL DEFAULT "1000-01-01 00:00:00",
			dtype   int      NOT NULL DEFAULT 0,
			fmin    float    NOT NULL DEFAULT 0.0,
			fstep   float    NOT NULL DEFAULT 0.0,
			imin    int      NOT NULL DEFAULT 0,
			istep   int      NOT NULL DEFAULT 0);`)
	util.CheckErr(err)

	metas := make([]string, 0)
	metat := make(map[string]string)
	metai := make(map[string]int)
	rows, err = db.Query(fmt.Sprintf("SELECT `id`,`name`,`type` FROM `%s_c_%s_midx` ORDER BY 2", Cfg.Prefix, prefix))
	util.CheckErr(err)
	for rows.Next() {
		var i int
		var n, t string
		util.CheckErr(rows.Scan(&i, &n, &t))
		metas = append(metas, n)
		metat[n] = t
		metai[n] = i
	}
	util.CheckErr(rows.Err())
	for _, meta := range metas {
		idx := make(map[int]string)
		switch metat[meta] {
		case "TEXT":
			rows, err := db.Query(fmt.Sprintf(
				"SELECT DISTINCT `tval` FROM `%s_c_%s_meta` WHERE `id` = %d ORDER BY 1",
				Cfg.Prefix, prefix,
				metai[meta]))
			util.CheckErr(err)
			ix := 0
			for rows.Next() {
				var s string
				util.CheckErr(rows.Scan(&s))
				idx[ix] = s
				_, err = db.Exec(fmt.Sprintf(
					"UPDATE `%s_c_%s_meta` SET `idx` = %d WHERE `id` = %d AND `tval` = %q",
					Cfg.Prefix, prefix, ix, metai[meta], s))
				util.CheckErr(err)
				ix++
			}
			util.CheckErr(rows.Err())
		case "INT":
			rows, err := db.Query(fmt.Sprintf(
				"SELECT MIN(`ival`), MAX(`ival`), COUNT(DISTINCT `ival`) FROM `%s_c_%s_meta` WHERE `id` = %d",
				Cfg.Prefix, prefix,
				metai[meta]))
			util.CheckErr(err)
			var v1, v2, vx int
			for rows.Next() {
				rows.Scan(&v1, &v2, &vx)
			}
			ir := newIrange(v1, v2, vx)
			indexed := 0
			if ir.indexed {
				indexed = 1
			}
			_, err = db.Exec(fmt.Sprintf(
				"INSERT `%s_c_%s_minf` (`id`,`imin`,`istep`,`indexed`,`size`) VALUES (%d,%d,%d,%d,%d)",
				Cfg.Prefix, prefix,
				metai[meta], ir.min, ir.step, indexed, len(ir.s)))
			util.CheckErr(err)
			rows, err = db.Query(fmt.Sprintf(
				"SELECT DISTINCT `ival` FROM `%s_c_%s_meta` WHERE `id` = %d",
				Cfg.Prefix, prefix,
				metai[meta]))
			util.CheckErr(err)
			var v int
			iis := make([][2]int, 0)
			for rows.Next() {
				util.CheckErr(rows.Scan(&v))
				s, ix := ir.value(v)
				idx[ix] = s
				iis = append(iis, [2]int{ix, v})
			}
			util.CheckErr(rows.Err())
			for _, ii := range iis {
				_, err = db.Exec(fmt.Sprintf(
					"UPDATE `%s_c_%s_meta` SET `idx` = %d WHERE `id` = %d AND `ival` = %d",
					Cfg.Prefix, prefix,
					ii[0],
					metai[meta],
					ii[1]))
				util.CheckErr(err)
			}
		case "FLOAT":
			rows, err := db.Query(fmt.Sprintf(
				"SELECT MIN(`fval`), MAX(`fval`) FROM `%s_c_%s_meta` WHERE `id` = %d",
				Cfg.Prefix, prefix,
				metai[meta]))
			util.CheckErr(err)
			var v1, v2 float64
			for rows.Next() {
				rows.Scan(&v1, &v2)
			}
			fr := newFrange(v1, v2)
			indexed := 0
			if fr.indexed {
				indexed = 1
			}
			_, err = db.Exec(fmt.Sprintf(
				"INSERT `%s_c_%s_minf` (`id`,`fmin`,`fstep`,`indexed`,`size`) VALUES (%d,%g,%g,%d,%d)",
				Cfg.Prefix, prefix,
				metai[meta], fr.min, fr.step, indexed, len(fr.s)))
			util.CheckErr(err)
			if fr.indexed {
				_, err = db.Exec(fmt.Sprintf(
					"UPDATE `%s_c_%s_meta` SET `idx` = FLOOR((`fval` - %g) / %g) WHERE `id` = %d",
					Cfg.Prefix, prefix,
					fr.min,
					fr.step,
					metai[meta]))
				util.CheckErr(err)
			} else {
				_, err = db.Exec(fmt.Sprintf(
					"UPDATE `%s_c_%s_meta` SET `idx` = 0 WHERE `id` = %d",
					Cfg.Prefix, prefix,
					metai[meta]))
				util.CheckErr(err)
			}
			rows, err = db.Query(fmt.Sprintf(
				"SELECT DISTINCT `idx` FROM `%s_c_%s_meta` WHERE `id` = %d",
				Cfg.Prefix, prefix,
				metai[meta]))
			util.CheckErr(err)
			for rows.Next() {
				var i int
				util.CheckErr(rows.Scan(&i))
				idx[i] = fr.s[i]
			}
			util.CheckErr(rows.Err())
		case "DATE", "DATETIME":
			dis := "0"
			if metat[meta] == "DATE" {
				dis = "COUNT(DISTINCT `dval`)"
			}
			rows, err := db.Query(fmt.Sprintf(
				"SELECT MIN(`dval`), MAX(`dval`), %s FROM `%s_c_%s_meta` WHERE `id` = %d",
				dis,
				Cfg.Prefix, prefix,
				metai[meta]))
			util.CheckErr(err)
			var v1, v2 time.Time
			var i int
			for rows.Next() {
				rows.Scan(&v1, &v2, &i)
			}
			dr := newDrange(v1, v2, i, metat[meta] == "DATETIME")
			indexed := 0
			if dr.indexed {
				indexed = 1
			}
			_, err = db.Exec(fmt.Sprintf(
				"INSERT `%s_c_%s_minf` (`id`,`dmin`,`dmax`,`dtype`,`indexed`,`size`) VALUES (%d,\"%04d-%02d-%02d %02d:%02d:%02d\",\"%04d-%02d-%02d %02d:%02d:%02d\",%d,%d,%d)",
				Cfg.Prefix, prefix,
				metai[meta],
				dr.min.Year(), dr.min.Month(), dr.min.Day(), dr.min.Hour(), dr.min.Minute(), dr.min.Second(),
				dr.max.Year(), dr.max.Month(), dr.max.Day(), dr.max.Hour(), dr.max.Minute(), dr.max.Second(),
				dr.r, indexed, len(dr.s)))
			util.CheckErr(err)
			rows, err = db.Query(fmt.Sprintf(
				"SELECT `dval` FROM `%s_c_%s_meta` WHERE `id` = %d",
				Cfg.Prefix, prefix,
				metai[meta]))
			util.CheckErr(err)
			var v time.Time
			for rows.Next() {
				util.CheckErr(rows.Scan(&v))
				s, ix := dr.value(v)
				idx[ix] = s
				_, err = db.Exec(fmt.Sprintf(
					"UPDATE `%s_c_%s_meta` SET `idx` = %d WHERE `id` = %d AND `dval` = \"%04d-%02d-%02d %02d-%02d-%02d\"",
					Cfg.Prefix, prefix,
					ix,
					metai[meta],
					v.Year(), v.Month(), v.Day(),
					v.Hour(), v.Minute(), v.Second()))
				util.CheckErr(err)
			}
			util.CheckErr(rows.Err())
		}
		_, err = db.Exec("COMMIT;")
		util.CheckErr(err)

		// zinnen waarvoor geen metadata is, die toevoegen
		_, err = db.Exec(fmt.Sprintf(
			"INSERT `%s_c_%s_meta` (`id`,`arch`,`file`,`idx`)"+
				"SELECT DISTINCT %d, `arch`, `file`, 2147483647 FROM `%s_c_%s_sent` `s` WHERE NOT EXISTS ( "+
				"SELECT `arch`, `file` FROM `%s_c_%s_meta` `m` WHERE `s`.`arch`=`m`.`arch` AND `s`.`file`=`m`.`file` AND `id`=%d )",
			Cfg.Prefix, prefix,
			metai[meta], Cfg.Prefix, prefix,
			Cfg.Prefix, prefix, metai[meta]))
		util.CheckErr(err)
		_, err = db.Exec("COMMIT;")
		util.CheckErr(err)
		// kijk of er echt metadata is toegevoegd
		rows, err = db.Query(fmt.Sprintf(
			"SELECT DISTINCT 1 FROM `%s_c_%s_meta` WHERE `id`=%d AND `idx`=2147483647",
			Cfg.Prefix, prefix,
			metai[meta]))
		util.CheckErr(err)
		for rows.Next() {
			idx[2147483647] = ""
		}
		util.CheckErr(rows.Err())

		for ix := range idx {
			_, err = db.Exec(fmt.Sprintf(
				"INSERT `%s_c_%s_mval` (`id`,`idx`,`text`) VALUES (%d,%d,%q)",
				Cfg.Prefix, prefix,
				metai[meta],
				ix,
				idx[ix]))
			util.CheckErr(err)
		}
		_, err = db.Exec("COMMIT;")
		util.CheckErr(err)
	}

	fmt.Println("Aanmaken indexen op " + Cfg.Prefix + "_c_" + prefix + "_mval ...")
	_, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_mval
		ADD INDEX (id),
		ADD INDEX (idx);`)
	util.CheckErr(err)

	fmt.Println("Aanmaken index op " + Cfg.Prefix + "_c_" + prefix + "_minf ...")
	_, err = db.Exec(`ALTER TABLE ` + Cfg.Prefix + "_c_" + prefix + `_minf
		ADD INDEX (id);`)
	util.CheckErr(err)

	fmt.Println("Telling van ranges ...")
	for _, meta := range metas {
		sums := make(map[int]int)
		rows, err := db.Query(fmt.Sprintf(
			"SELECT COUNT(`idx`),`idx` FROM `%s_c_%s_meta` WHERE `id` = %d GROUP BY `idx`",
			Cfg.Prefix, prefix,
			metai[meta]))
		util.CheckErr(err)
		for rows.Next() {
			var c, i int
			util.CheckErr(rows.Scan(&c, &i))
			sums[i] = c
		}
		util.CheckErr(rows.Err())

		for s := range sums {
			_, err = db.Exec(fmt.Sprintf(
				"UPDATE `%s_c_%s_mval` SET `n` = %d WHERE `id` = %d AND `idx` = %d",
				Cfg.Prefix, prefix,
				sums[s],
				metai[meta], s))
			util.CheckErr(err)
		}
		_, err = db.Exec("COMMIT;")
		util.CheckErr(err)
	}

	//
	// zet info over corpus in de database
	//

	lines := 0
	rows, err = db.Query("SELECT COUNT(*) FROM " + Cfg.Prefix + "_c_" + prefix + "_sent")
	util.CheckErr(err)
	if rows.Next() {
		util.CheckErr(rows.Scan(&lines))
		rows.Close()
	}
	hasmeta := 0
	if len(metas) > 0 {
		hasmeta = 1
	} else {
		db.Exec(fmt.Sprintf(
			"DROP TABLE IF EXISTS `%s_c_%s_meta`, `%s_c_%s_midx`, `%s_c_%s_minf`, `%s_c_%s_mval`;",
			Cfg.Prefix,
			prefix,
			Cfg.Prefix,
			prefix,
			Cfg.Prefix,
			prefix,
			Cfg.Prefix,
			prefix))
	}
	if db_updatestatus {
		_, err = db.Exec(fmt.Sprintf("UPDATE `%s_info` SET `status` = \"FINISHED\", `nline` = %d, `active` = NOW(), `hasmeta` = %d WHERE `id` = %q",
			Cfg.Prefix, lines, hasmeta, prefix))
	} else {
		_, err = db.Exec(fmt.Sprintf("UPDATE `%s_info` SET `nline` = %d, `active` = NOW(), `hasmeta` = %d WHERE `id` = %q",
			Cfg.Prefix, lines, hasmeta, prefix))
	}
	util.CheckErr(err)

	user := owner
	if public == "1" {
		user = "******"
	}
	_, err = db.Exec(fmt.Sprintf("INSERT `%s_corpora` (`user`, `prefix`) VALUES (%q, %q);", Cfg.Prefix, user, prefix))
	util.CheckErr(err)

	_, err = db.Exec("COMMIT;")
	util.CheckErr(err)

	//fmt.Println("Bijwerken menu's voor postag, rel en hpostag ...")
	//tags()

	// totale tijd
	fmt.Println("Tijd:", time.Now().Sub(now))
	showmemstats()

	sizes()
}
Ejemplo n.º 9
0
func main() {
	if runVersion {
		fmt.Fprintf(os.Stdout, "%s (go-gherkin %s)\n", VERSION, gherkin.VERSION)
		return
	}
	if inputPath != "" {
		inputReader, err = os.Open(inputPath)
		if err != nil {
			usageErr(err)
			return
		}
	} else {
		if !util.IsTerminal(os.Stdin) {
			inputReader = os.Stdin
		}
	}
	if inputReader == nil {
		usageErr(fmt.Errorf("Missing input (stdin OR -in flag)"))
		return
	}

	if outputPath != "" {
		outputWriter, err = os.Create(outputPath)
		if err != nil {
			usageErr(err)
			return
		}
	} else {
		outputWriter = os.Stdout
	}

	if colorsYes {
		colors = true
	} else if colorsNo {
		colors = false
	} else if outputWriter == os.Stdout {
		colors = util.IsTerminal(os.Stdout) && runtime.GOOS != "windows"
	}

	fmtr := &formater.GherkinPrettyFormater{
		AnsiColors:             colors,
		CenterSteps:            centerSteps,
		SkipSteps:              skipSteps,
		SkipComments:           skipComments,
		NoAlignComments:        noCommentAlign,
		AlignCommentsMinIndent: commentAlignMinIndent,
	}

	log.Printf("Formater Settings: %+v", fmtr)

	bytes, _ := ioutil.ReadAll(inputReader)
	content := string(bytes)
	gp := gherkin.NewGherkinDOMParser(content)
	gp.Init()
	err = gp.Parse()
	if err != nil {
		usageErrWithVerboseHint(fmt.Errorf("Parsing failed. invalid gherkin"))
		if verbose {
			fmt.Fprintln(os.Stderr, err)
		}
		return
	}
	fmtr.Format(gp, outputWriter)
}
Ejemplo n.º 10
0
func main() {
	flag.Parse()

	if *opt_f == "" && flag.NArg() == 0 && util.IsTerminal(os.Stdin) && !*opt_a {
		fmt.Fprintf(os.Stderr, "\nUsage: %s [args] [text]\n\nargs with default values are:\n\n", os.Args[0])
		flag.PrintDefaults()
		fmt.Fprintf(os.Stderr, "\nIf both -f and text are missing, read from stdin\n\n")
		return
	}

	extras := make([]string, 0)
	tc := textcat.NewTextCat()
	if *opt_p != "" {
		for _, i := range strings.Split(*opt_p, ",") {
			name := strings.Split(path.Base(i), ".")[0]
			extras = append(extras, name)
			e := tc.AddLanguage(name, i)
			util.CheckErr(e)
		}
	}
	if *opt_z {
		if *opt_r || *opt_b {
			for _, extra := range extras {
				tc.EnableLanguages(extra + ".raw")
			}
		}
		if *opt_b || !*opt_r {
			for _, extra := range extras {
				tc.EnableLanguages(extra + ".utf8")
			}

		}
	} else {
		if *opt_r || *opt_b {
			tc.EnableAllRawLanguages()
		}
		if *opt_b || !*opt_r {
			tc.EnableAllUtf8Languages()
		}
	}
	if *opt_i != "" {
		tc.DisableLanguages(strings.Split(*opt_i, ",")...)
	}

	if *opt_a {
		for _, i := range tc.ActiveLanguages() {
			fmt.Println(i)
		}
		return
	}

	if *opt_l {
		var r *util.Reader
		if *opt_f != "" {
			fp, err := os.Open(*opt_f)
			util.CheckErr(err)
			defer fp.Close()
			r = util.NewReader(fp)
		} else if flag.NArg() > 0 {
			b := bytes.NewBufferString(strings.Join(flag.Args(), " "))
			r = util.NewReader(b)
		} else {
			r = util.NewReader(os.Stdin)
		}
		for {
			line, err := r.ReadLineString()
			if err == io.EOF {
				break
			}
			util.CheckErr(err)
			l, err := tc.Classify(line)
			if err != nil {
				fmt.Print(err)
			} else {
				fmt.Print(strings.Join(l, ","))
			}
			fmt.Println("\t" + line)
		}
		return
	}

	var text string
	if *opt_f != "" {
		t, err := ioutil.ReadFile(*opt_f)
		util.CheckErr(err)
		text = string(t)
	} else if flag.NArg() > 0 {
		text = strings.Join(flag.Args(), " ")
	} else {
		t, err := ioutil.ReadAll(os.Stdin)
		util.CheckErr(err)
		text = string(t)
	}

	l, e := tc.Classify(text)
	if e != nil {
		fmt.Println(e)
	} else {
		fmt.Println(strings.Join(l, "\n"))
	}
}
Ejemplo n.º 11
0
func main() {
	fmt.Println("stdin: ", util.IsTerminal(os.Stdin))
	fmt.Println("stdout:", util.IsTerminal(os.Stdout))
	fmt.Println("stderr:", util.IsTerminal(os.Stderr))
}