// http://download.geonames.org/export/dump/allCountries.zip func main() { flag.Parse() if *cpuprofile != "" { f, err := os.Create(*cpuprofile) if err != nil { log.Fatal(err) } pprof.StartCPUProfile(f) defer pprof.StopCPUProfile() } r := yacr.NewReader(os.Stdin, '\t', false, false) w := yacr.NewWriter(os.Stdout, '\t', false) for r.Scan() && w.Write(r.Bytes()) { if r.EndOfRecord() { w.EndOfRecord() } } w.Flush() if err := r.Err(); err != nil { fmt.Fprintln(os.Stderr, err) } if err := w.Err(); err != nil { fmt.Fprintln(os.Stderr, err) } }
func makeReader(filepath string, c *config) (*yacr.Reader, io.ReadCloser) { in, err := yacr.Zopen(filepath) if err != nil { log.Fatalf("Error while opening file: '%s' (%s)\n", filepath, err) } reader := yacr.NewReader(in, c.sep, c.quoted, c.guess) return reader, in }
func (vc *csvTabCursor) Filter() error { v := vc.vTab /* seek back to start of first zRow */ v.eof = false if _, err := vc.f.Seek(v.offsetFirstRow, os.SEEK_SET); err != nil { return err } vc.rowNumber = 0 /* a new reader/scanner must be created because there is no way to reset its internal buffer/state (which has been invalidated by the SEEK_SET)*/ vc.r = yacr.NewReader(vc.f, v.sep, v.quoted, false) /* read and parse next line */ return vc.Next() }
func (vc *csvTabCursor) Next() error { v := vc.vTab if v.eof { return io.EOF } if vc.r == nil { vc.r = yacr.NewReader(vc.f, v.sep, v.quoted, false) } /* read the next row of data */ err := v.readRow(vc.r) if err == nil { vc.rowNumber++ } return err }
func Example() { r := yacr.NewReader(os.Stdin, '\t', false, false) w := yacr.NewWriter(os.Stdout, '\t', false) for r.Scan() && w.Write(r.Bytes()) { if r.EndOfRecord() { w.EndOfRecord() } } w.Flush() if err := r.Err(); err != nil { fmt.Fprintln(os.Stderr, err) } if err := w.Err(); err != nil { fmt.Fprintln(os.Stderr, err) } }
func grep(pattern *regexp.Regexp, f string, config *config) (found bool, err error) { //fmt.Println(f, config) in, err := yacr.Zopen(f) if err != nil { return } defer in.Close() reader := yacr.NewReader(in, config.sep, config.quoted, config.guess) var headers []string if config.noHeader && !config.descMode { } else { for reader.Scan() { headers = append(headers, reader.Text()) if reader.EndOfRecord() { break } } // TODO Try to guess/fix the separator if an error occurs (or if only one column is found) if err = reader.Err(); err != nil { return } //fmt.Printf("Headers: %v (%d)\n", headers) } //tw := tabwriter.NewWriter(os.Stdout, 8, 1, 8, '\t', tabwriter.Debug) tw := tabwriter.NewWriter(os.Stdout, 0, 4, 2, ' ', 0) if config.descMode { fmt.Println(f, ":") for i, value := range headers { tw.Write([]byte(fmt.Sprintf("%d\t%s\n", i+config.start, value))) } tw.Flush() return } var values = make([][]byte, 0, 10) var v, cv []byte orig := values i := 0 for reader.Scan() { v = reader.Bytes() // must be copied if i < len(orig) { cv = orig[i] cv = append(cv[:0], v...) } else { cv = make([]byte, len(v)) copy(cv, v) } values = append(values, cv) if !reader.EndOfRecord() { i++ continue } if match(config.fields, pattern, values) { if !found { fmt.Println(f, ":") found = true } fmt.Println("-") for i, value := range values { if config.noHeader { tw.Write([]byte(fmt.Sprintf("%d\t%s\n", i+config.start, value))) } else if i < len(headers) { tw.Write([]byte(fmt.Sprintf("%d\t%s\t%s\n", i+config.start, headers[i], value))) } else { tw.Write([]byte(fmt.Sprintf("%d\t%s\t%s\n", i+config.start, "???", value))) } } tw.Flush() } orig = values values = values[:0] i = 0 } err = reader.Err() return }
// ImportCSV imports CSV data into the specified table (which may not exist yet). // Code is adapted from .import command implementation in SQLite3 shell sources. func (db *Conn) ImportCSV(in io.Reader, ic ImportConfig, dbName, table string) error { columns, err := db.Columns(dbName, table) if err != nil { return err } r := yacr.NewReader(in, ic.Separator, ic.Quoted, ic.Guess) r.Trim = ic.Trim r.Comment = ic.Comment nCol := len(columns) if nCol == 0 { // table does not exist, let's create it var sql string if len(dbName) == 0 { sql = fmt.Sprintf(`CREATE TABLE "%s" `, escapeQuote(table)) } else { sql = fmt.Sprintf(`CREATE TABLE %s."%s" `, doubleQuote(dbName), escapeQuote(table)) } sep := '(' // TODO if headers flag is false... for i := 0; r.Scan(); i++ { if i == 0 && r.EndOfRecord() && len(r.Bytes()) == 0 { // empty line i = -1 continue } sql += fmt.Sprintf("%c\n \"%s\" %s", sep, r.Text(), ic.getType(i)) sep = ',' nCol++ if r.EndOfRecord() { break } } if err = r.Err(); err != nil { return err } if sep == '(' { return errors.New("empty file/input") } sql += "\n)" if err = db.FastExec(sql); err != nil { return err } } else if ic.Headers { // skip headers line for r.Scan() { if r.EndOfRecord() { break } } if err = r.Err(); err != nil { return err } } var sql string if len(dbName) == 0 { sql = fmt.Sprintf(`INSERT INTO "%s" VALUES (?%s)`, escapeQuote(table), strings.Repeat(", ?", nCol-1)) } else { sql = fmt.Sprintf(`INSERT INTO %s."%s" VALUES (?%s)`, doubleQuote(dbName), escapeQuote(table), strings.Repeat(", ?", nCol-1)) } s, err := db.prepare(sql) if err != nil { return err } defer s.Finalize() ac := db.GetAutocommit() if ac { if err = db.Begin(); err != nil { return err } } defer func() { if err != nil && ac { _ = db.Rollback() } }() startLine := r.LineNumber() for i := 1; r.Scan(); i++ { if i == 1 && r.EndOfRecord() && len(r.Bytes()) == 0 { // empty line i = 0 startLine = r.LineNumber() continue } if i <= nCol { if err = s.BindByIndex(i, r.Text()); err != nil { return err } } if r.EndOfRecord() { if i < nCol { if ic.Log != nil { fmt.Fprintf(ic.Log, "%s:%d: expected %d columns but found %d - filling the rest with NULL\n", ic.Name, startLine, nCol, i) } for ; i <= nCol; i++ { if err = s.BindByIndex(i, nil); err != nil { return err } } } else if i > nCol && ic.Log != nil { fmt.Fprintf(ic.Log, "%s:%d: expected %d columns but found %d - extras ignored\n", ic.Name, startLine, nCol, i) } if _, err = s.Next(); err != nil { return err } i = 0 startLine = r.LineNumber() } } if err = r.Err(); err != nil { return err } if ac { if err = db.Commit(); err != nil { return err } } return nil }
// args[0] => module name // args[1] => db name // args[2] => table name // args[3] => filename (maybe quoted: '...') // args[i>3] : // - contains HEADER ignoring case => use first line in file as column names or skip first line if NAMES are specified // - contains NO_QUOTE ignoring case => no double quoted field expected in file // - single char (;) or quoted char (';') => values separator in file // - contains NAMES ignoring case => use args[i+1], ... as column names (until _TYPES_) // - contains TYPES ignoring case => use args[I+1], ... as column types // Beware, empty args are skipped (..., ,...), use '' empty SQL string instead (..., '', ...). // Adapted from: // - https://github.com/gwenn/sqlite-csv-ext // - http://www.ch-werner.de/sqliteodbc/html/csvtable_8c.html func (m csvModule) Create(c *Conn, args []string) (VTab, error) { if len(args) < 4 { return nil, errors.New("no CSV file specified") } /* pull out name of csv file (remove quotes) */ filename := args[3] if filename[0] == '\'' { filename = filename[1 : len(filename)-1] } /* if a custom delimiter specified, pull it out */ var separator byte = ',' /* should the header zRow be used */ header := false quoted := true guess := true var cols, types []string for i := 4; i < len(args); i++ { arg := args[i] switch { case types != nil: if arg[0] == '\'' { arg = arg[1 : len(arg)-1] } types = append(types, arg) case cols != nil: if strings.ToUpper(arg) == "_TYPES_" { types = make([]string, 0, len(cols)) } else { cols = append(cols, arg) } case len(arg) == 1: separator = arg[0] guess = false case len(arg) == 3 && arg[0] == '\'': separator = arg[1] guess = false case strings.Contains(strings.ToUpper(arg), "HEADER"): header = true case strings.Contains(strings.ToUpper(arg), "NO_QUOTE"): quoted = false case strings.Contains(strings.ToUpper(arg), "NAMES"): cols = make([]string, 0, 10) case strings.Contains(strings.ToUpper(arg), "TYPES"): types = make([]string, 0, 10) } } /* open the source csv file */ file, err := os.Open(filename) if err != nil { return nil, fmt.Errorf("error opening CSV file: '%s'", filename) } defer file.Close() /* Read first zRow to obtain column names/number */ vTab := &csvTab{f: filename, sep: separator, quoted: quoted, cols: make([]string, 0, 10)} vTab.maxLength = int(c.Limit(LimitLength)) vTab.maxColumn = int(c.Limit(LimitColumn)) reader := yacr.NewReader(file, separator, quoted, guess) if header { reader.Split(vTab.split(reader.ScanField)) } if err = vTab.readRow(reader); err != nil { return nil, err } named := header if len(cols) > 0 { // headers ignored // TODO check len(cols) == len(vTab.cols) ? vTab.cols = cols named = true } if len(vTab.cols) == 0 { if len(types) == 0 { return nil, errors.New("no column name/type specified") } vTab.cols = types } if guess { vTab.sep = reader.Sep() } /* Create the underlying relational database schema. If * that is successful, call sqlite3_declare_vtab() to configure * the csv table schema. */ sql := "CREATE TABLE x(" tail := ", " for i, col := range vTab.cols { if i == len(vTab.cols)-1 { tail = ");" } colType := "" if len(types) > i { colType = " " + types[i] } if named { if len(col) == 0 { return nil, errors.New("no column name found") } sql = fmt.Sprintf("%s\"%s\"%s%s", sql, col, colType, tail) } else { sql = fmt.Sprintf("%scol%d%s%s", sql, i+1, colType, tail) } } if err = c.DeclareVTab(sql); err != nil { return nil, err } vTab.affinities = make([]Affinity, len(vTab.cols)) if len(types) > 0 { for i, typ := range types { if i >= len(vTab.affinities) { break } vTab.affinities[i] = typeAffinity(typ) } } return vTab, nil }