Example #1
0
File: load.go Project: gwenn/yacr
// http://download.geonames.org/export/dump/allCountries.zip
func main() {
	flag.Parse()
	if *cpuprofile != "" {
		f, err := os.Create(*cpuprofile)
		if err != nil {
			log.Fatal(err)
		}
		pprof.StartCPUProfile(f)
		defer pprof.StopCPUProfile()
	}

	r := yacr.NewReader(os.Stdin, '\t', false, false)
	w := yacr.NewWriter(os.Stdout, '\t', false)
	for r.Scan() && w.Write(r.Bytes()) {
		if r.EndOfRecord() {
			w.EndOfRecord()
		}
	}
	w.Flush()
	if err := r.Err(); err != nil {
		fmt.Fprintln(os.Stderr, err)
	}
	if err := w.Err(); err != nil {
		fmt.Fprintln(os.Stderr, err)
	}
}
Example #2
0
func makeReader(filepath string, c *config) (*yacr.Reader, io.ReadCloser) {
	in, err := yacr.Zopen(filepath)
	if err != nil {
		log.Fatalf("Error while opening file: '%s' (%s)\n", filepath, err)
	}
	reader := yacr.NewReader(in, c.sep, c.quoted, c.guess)
	return reader, in
}
Example #3
0
func (vc *csvTabCursor) Filter() error {
	v := vc.vTab
	/* seek back to start of first zRow */
	v.eof = false
	if _, err := vc.f.Seek(v.offsetFirstRow, os.SEEK_SET); err != nil {
		return err
	}
	vc.rowNumber = 0
	/* a new reader/scanner must be created because there is no way to reset its internal buffer/state (which has been invalidated by the SEEK_SET)*/
	vc.r = yacr.NewReader(vc.f, v.sep, v.quoted, false)
	/* read and parse next line */
	return vc.Next()
}
Example #4
0
func (vc *csvTabCursor) Next() error {
	v := vc.vTab
	if v.eof {
		return io.EOF
	}
	if vc.r == nil {
		vc.r = yacr.NewReader(vc.f, v.sep, v.quoted, false)
	}
	/* read the next row of data */
	err := v.readRow(vc.r)
	if err == nil {
		vc.rowNumber++
	}
	return err
}
Example #5
0
func Example() {
	r := yacr.NewReader(os.Stdin, '\t', false, false)
	w := yacr.NewWriter(os.Stdout, '\t', false)

	for r.Scan() && w.Write(r.Bytes()) {
		if r.EndOfRecord() {
			w.EndOfRecord()
		}
	}
	w.Flush()
	if err := r.Err(); err != nil {
		fmt.Fprintln(os.Stderr, err)
	}
	if err := w.Err(); err != nil {
		fmt.Fprintln(os.Stderr, err)
	}
}
Example #6
0
func grep(pattern *regexp.Regexp, f string, config *config) (found bool, err error) {
	//fmt.Println(f, config)
	in, err := yacr.Zopen(f)
	if err != nil {
		return
	}
	defer in.Close()
	reader := yacr.NewReader(in, config.sep, config.quoted, config.guess)

	var headers []string
	if config.noHeader && !config.descMode {
	} else {
		for reader.Scan() {
			headers = append(headers, reader.Text())
			if reader.EndOfRecord() {
				break
			}
		}
		// TODO Try to guess/fix the separator if an error occurs (or if only one column is found)
		if err = reader.Err(); err != nil {
			return
		}
		//fmt.Printf("Headers: %v (%d)\n", headers)
	}

	//tw := tabwriter.NewWriter(os.Stdout, 8, 1, 8, '\t', tabwriter.Debug)
	tw := tabwriter.NewWriter(os.Stdout, 0, 4, 2, ' ', 0)

	if config.descMode {
		fmt.Println(f, ":")
		for i, value := range headers {
			tw.Write([]byte(fmt.Sprintf("%d\t%s\n", i+config.start, value)))
		}
		tw.Flush()
		return
	}

	var values = make([][]byte, 0, 10)
	var v, cv []byte
	orig := values
	i := 0
	for reader.Scan() {
		v = reader.Bytes() // must be copied
		if i < len(orig) {
			cv = orig[i]
			cv = append(cv[:0], v...)
		} else {
			cv = make([]byte, len(v))
			copy(cv, v)
		}
		values = append(values, cv)
		if !reader.EndOfRecord() {
			i++
			continue
		}
		if match(config.fields, pattern, values) {
			if !found {
				fmt.Println(f, ":")
				found = true
			}
			fmt.Println("-")
			for i, value := range values {
				if config.noHeader {
					tw.Write([]byte(fmt.Sprintf("%d\t%s\n", i+config.start, value)))
				} else if i < len(headers) {
					tw.Write([]byte(fmt.Sprintf("%d\t%s\t%s\n", i+config.start, headers[i], value)))
				} else {
					tw.Write([]byte(fmt.Sprintf("%d\t%s\t%s\n", i+config.start, "???", value)))
				}
			}
			tw.Flush()
		}
		orig = values
		values = values[:0]
		i = 0
	}
	err = reader.Err()
	return
}
Example #7
0
// ImportCSV imports CSV data into the specified table (which may not exist yet).
// Code is adapted from .import command implementation in SQLite3 shell sources.
func (db *Conn) ImportCSV(in io.Reader, ic ImportConfig, dbName, table string) error {
	columns, err := db.Columns(dbName, table)
	if err != nil {
		return err
	}
	r := yacr.NewReader(in, ic.Separator, ic.Quoted, ic.Guess)
	r.Trim = ic.Trim
	r.Comment = ic.Comment
	nCol := len(columns)
	if nCol == 0 { // table does not exist, let's create it
		var sql string
		if len(dbName) == 0 {
			sql = fmt.Sprintf(`CREATE TABLE "%s" `, escapeQuote(table))
		} else {
			sql = fmt.Sprintf(`CREATE TABLE %s."%s" `, doubleQuote(dbName), escapeQuote(table))
		}
		sep := '('
		// TODO if headers flag is false...
		for i := 0; r.Scan(); i++ {
			if i == 0 && r.EndOfRecord() && len(r.Bytes()) == 0 { // empty line
				i = -1
				continue
			}
			sql += fmt.Sprintf("%c\n  \"%s\" %s", sep, r.Text(), ic.getType(i))
			sep = ','
			nCol++
			if r.EndOfRecord() {
				break
			}
		}
		if err = r.Err(); err != nil {
			return err
		}
		if sep == '(' {
			return errors.New("empty file/input")
		}
		sql += "\n)"
		if err = db.FastExec(sql); err != nil {
			return err
		}
	} else if ic.Headers { // skip headers line
		for r.Scan() {
			if r.EndOfRecord() {
				break
			}
		}
		if err = r.Err(); err != nil {
			return err
		}
	}

	var sql string
	if len(dbName) == 0 {
		sql = fmt.Sprintf(`INSERT INTO "%s" VALUES (?%s)`, escapeQuote(table), strings.Repeat(", ?", nCol-1))
	} else {
		sql = fmt.Sprintf(`INSERT INTO %s."%s" VALUES (?%s)`, doubleQuote(dbName), escapeQuote(table), strings.Repeat(", ?", nCol-1))
	}
	s, err := db.prepare(sql)
	if err != nil {
		return err
	}
	defer s.Finalize()
	ac := db.GetAutocommit()
	if ac {
		if err = db.Begin(); err != nil {
			return err
		}
	}
	defer func() {
		if err != nil && ac {
			_ = db.Rollback()
		}
	}()
	startLine := r.LineNumber()
	for i := 1; r.Scan(); i++ {
		if i == 1 && r.EndOfRecord() && len(r.Bytes()) == 0 { // empty line
			i = 0
			startLine = r.LineNumber()
			continue
		}
		if i <= nCol {
			if err = s.BindByIndex(i, r.Text()); err != nil {
				return err
			}
		}
		if r.EndOfRecord() {
			if i < nCol {
				if ic.Log != nil {
					fmt.Fprintf(ic.Log, "%s:%d: expected %d columns but found %d - filling the rest with NULL\n", ic.Name, startLine, nCol, i)
				}
				for ; i <= nCol; i++ {
					if err = s.BindByIndex(i, nil); err != nil {
						return err
					}
				}
			} else if i > nCol && ic.Log != nil {
				fmt.Fprintf(ic.Log, "%s:%d: expected %d columns but found %d - extras ignored\n", ic.Name, startLine, nCol, i)
			}
			if _, err = s.Next(); err != nil {
				return err
			}
			i = 0
			startLine = r.LineNumber()
		}
	}
	if err = r.Err(); err != nil {
		return err
	}
	if ac {
		if err = db.Commit(); err != nil {
			return err
		}
	}
	return nil
}
Example #8
0
// args[0] => module name
// args[1] => db name
// args[2] => table name
// args[3] => filename (maybe quoted: '...')
// args[i>3] :
//  - contains HEADER ignoring case => use first line in file as column names or skip first line if NAMES are specified
//  - contains NO_QUOTE ignoring case => no double quoted field expected in file
//  - single char (;) or quoted char (';') => values separator in file
//  - contains NAMES ignoring case => use args[i+1], ... as column names (until _TYPES_)
//  - contains TYPES ignoring case => use args[I+1], ... as column types
// Beware, empty args are skipped (..., ,...), use '' empty SQL string instead (..., '', ...).
// Adapted from:
//  - https://github.com/gwenn/sqlite-csv-ext
//  - http://www.ch-werner.de/sqliteodbc/html/csvtable_8c.html
func (m csvModule) Create(c *Conn, args []string) (VTab, error) {
	if len(args) < 4 {
		return nil, errors.New("no CSV file specified")
	}
	/* pull out name of csv file (remove quotes) */
	filename := args[3]
	if filename[0] == '\'' {
		filename = filename[1 : len(filename)-1]
	}
	/* if a custom delimiter specified, pull it out */
	var separator byte = ','
	/* should the header zRow be used */
	header := false
	quoted := true
	guess := true
	var cols, types []string
	for i := 4; i < len(args); i++ {
		arg := args[i]
		switch {
		case types != nil:
			if arg[0] == '\'' {
				arg = arg[1 : len(arg)-1]
			}
			types = append(types, arg)
		case cols != nil:
			if strings.ToUpper(arg) == "_TYPES_" {
				types = make([]string, 0, len(cols))
			} else {
				cols = append(cols, arg)
			}
		case len(arg) == 1:
			separator = arg[0]
			guess = false
		case len(arg) == 3 && arg[0] == '\'':
			separator = arg[1]
			guess = false
		case strings.Contains(strings.ToUpper(arg), "HEADER"):
			header = true
		case strings.Contains(strings.ToUpper(arg), "NO_QUOTE"):
			quoted = false
		case strings.Contains(strings.ToUpper(arg), "NAMES"):
			cols = make([]string, 0, 10)
		case strings.Contains(strings.ToUpper(arg), "TYPES"):
			types = make([]string, 0, 10)
		}
	}
	/* open the source csv file */
	file, err := os.Open(filename)
	if err != nil {
		return nil, fmt.Errorf("error opening CSV file: '%s'", filename)
	}
	defer file.Close()
	/* Read first zRow to obtain column names/number */
	vTab := &csvTab{f: filename, sep: separator, quoted: quoted, cols: make([]string, 0, 10)}
	vTab.maxLength = int(c.Limit(LimitLength))
	vTab.maxColumn = int(c.Limit(LimitColumn))

	reader := yacr.NewReader(file, separator, quoted, guess)
	if header {
		reader.Split(vTab.split(reader.ScanField))
	}
	if err = vTab.readRow(reader); err != nil {
		return nil, err
	}
	named := header
	if len(cols) > 0 { // headers ignored
		// TODO check len(cols) == len(vTab.cols) ?
		vTab.cols = cols
		named = true
	}
	if len(vTab.cols) == 0 {
		if len(types) == 0 {
			return nil, errors.New("no column name/type specified")
		}
		vTab.cols = types
	}

	if guess {
		vTab.sep = reader.Sep()
	}
	/* Create the underlying relational database schema. If
	 * that is successful, call sqlite3_declare_vtab() to configure
	 * the csv table schema.
	 */
	sql := "CREATE TABLE x("
	tail := ", "
	for i, col := range vTab.cols {
		if i == len(vTab.cols)-1 {
			tail = ");"
		}
		colType := ""
		if len(types) > i {
			colType = " " + types[i]
		}
		if named {
			if len(col) == 0 {
				return nil, errors.New("no column name found")
			}
			sql = fmt.Sprintf("%s\"%s\"%s%s", sql, col, colType, tail)
		} else {
			sql = fmt.Sprintf("%scol%d%s%s", sql, i+1, colType, tail)
		}
	}
	if err = c.DeclareVTab(sql); err != nil {
		return nil, err
	}

	vTab.affinities = make([]Affinity, len(vTab.cols))
	if len(types) > 0 {
		for i, typ := range types {
			if i >= len(vTab.affinities) {
				break
			}
			vTab.affinities[i] = typeAffinity(typ)
		}
	}
	return vTab, nil
}