func readCsv(ch chan []string) { var reader *csv.Reader if inputFn == "" { reader = csv.NewReader(os.Stdin) } else { file, err := os.Open(inputFn) if err != nil { fmt.Println("Error:", err) os.Exit(1) } defer file.Close() reader = csv.NewReader(file) } if !strictLen { reader.FieldsPerRecord = -1 } r, _ := utf8.DecodeRuneInString(inputSep) reader.Comma = r reader.LazyQuotes = lazyQuotes for { record, err := reader.Read() if err == io.EOF { close(ch) break } else if err != nil { fmt.Println("Error:", err) close(ch) break } ch <- record } }
func (t *table) start(reader *csv.Reader) { defer t.Stop() defer close(t.rows) headers, err := reader.Read() if err != nil { if perr, ok := err.(*csv.ParseError); ok { // Modifies the underlying err perr.Err = fmt.Errorf("%s. %s", perr.Err, "This can happen when the CSV is malformed, or when the wrong delimiter is used") } t.handleErr(err) return } reader.FieldsPerRecord = len(headers) for { if t.stopped { break } line, err := reader.Read() if err != nil { t.handleErr(err) return } t.rows <- convertLineToRow(line, headers) } }
/* Given a CSV reader, populate a histogram table of the field counts */ func countFields(csvReader *csv.Reader, histogram []int64) { histogramLen := len(histogram) csvReader.FieldsPerRecord = -1 // Tell the CVS reader to expect an unknown field count for { strs, err := csvReader.Read() if nil != err { break } f := len(strs) if f < histogramLen { if 0 < f { histogram[f]++ } // There's no such thing as a 0 length field record. } else { fmt.Print("\nWARNING:", histogramLen, "<", f, "histogram length.") } } return }
// Run is the block's main loop. Here we listen on the different channels we set up. func (b *ParseCSV) Run() { var tree *jee.TokenTree var path string var err error var headers []string var csvReader *csv.Reader for { select { case ruleI := <-b.inrule: // set a parameter of the block path, err = util.ParseString(ruleI, "Path") if err != nil { b.Error(err) continue } token, err := jee.Lexer(path) if err != nil { b.Error(err) continue } tree, err = jee.Parser(token) if err != nil { b.Error(err) continue } headers, err = util.ParseArrayString(ruleI, "Headers") if err != nil { b.Error(err) continue } case <-b.quit: // quit the block return case msg := <-b.in: // deal with inbound data if tree == nil { continue } var data string dataI, err := jee.Eval(tree, msg) if err != nil { b.Error(err) continue } switch value := dataI.(type) { case []byte: data = string(value[:]) case string: data = value default: b.Error("data should be a string or a []byte") continue } csvReader = csv.NewReader(strings.NewReader(data)) csvReader.TrimLeadingSpace = true // allow records to have variable numbers of fields csvReader.FieldsPerRecord = -1 case <-b.inpoll: if csvReader == nil { b.Error("this block needs data to be pollable") break } record, err := csvReader.Read() if err != nil && err != io.EOF { b.Error(err) continue } row := make(map[string]interface{}) for fieldIndex, field := range record { if fieldIndex >= len(headers) { row[strconv.Itoa(fieldIndex)] = field } else { header := headers[fieldIndex] row[header] = field } } b.out <- row case MsgChan := <-b.queryrule: // deal with a query request MsgChan <- map[string]interface{}{ "Path": path, "Headers": headers, } } } }
func importCSV(filename string, connStr string, schema string, tableName string, ignoreErrors bool, skipHeader bool, fields string, delimiter string) error { db, err := connect(connStr, schema) if err != nil { return err } defer db.Close() var reader *csv.Reader var bar *pb.ProgressBar if filename != "" { file, err := os.Open(filename) if err != nil { return err } defer file.Close() bar = NewProgressBar(file) reader = csv.NewReader(io.TeeReader(file, bar)) } else { reader = csv.NewReader(os.Stdin) } reader.Comma, _ = utf8.DecodeRuneInString(delimiter) reader.LazyQuotes = true columns, err := parseColumns(reader, skipHeader, fields) if err != nil { return err } reader.FieldsPerRecord = len(columns) i, err := NewCSVImport(db, schema, tableName, columns) if err != nil { return err } var success, failed int if filename != "" { bar.Start() err, success, failed = copyCSVRows(i, reader, ignoreErrors, delimiter, columns) bar.Finish() } else { err, success, failed = copyCSVRows(i, reader, ignoreErrors, delimiter, columns) } if err != nil { lineNumber := success + failed if !skipHeader { lineNumber++ } return errors.New(fmt.Sprintf("line %d: %s", lineNumber, err)) } else { fmt.Println(fmt.Sprintf("%d rows imported into %s.%s", success, schema, tableName)) if ignoreErrors && failed > 0 { fmt.Println(fmt.Sprintf("%d rows could not be imported into %s.%s and have been written to stderr.", failed, schema, tableName)) } return i.Commit() } }
func StreamCSV(csvChannel chan<- map[string]string, filename string) { var err error // open compressed csv file var file *os.File if file, err = os.Open(filename); err != nil { log.Fatal(err) } defer file.Close() var ioReader io.Reader filename = strings.ToLower(filename) var csvReader *csv.Reader if strings.HasSuffix(filename, ".bz2") { ioReader = bzip2.NewReader(file) csvReader = csv.NewReader(ioReader) } else if strings.HasSuffix(filename, ".xz") { ioReader = bufio.NewReader(file) //if ioReader, err = xz.NewReader(ioReader, 0); err != nil { if ioReader, err = xz.NewReader(file, 0); err != nil { log.Fatal(err) } csvReader = csv.NewReader(ioReader) } else { // log.Fatal("input mush be copressed") //reader = bufio.NewReader(file) //reader = os.Open(file) // bufio.NewReader(file) csvReader = csv.NewReader(file) } // create csv reader csvReader.FieldsPerRecord = 0 // ident colum names colMapping := make(map[int]string) rec, err := csvReader.Read() if err != nil { log.Fatalf("ERROR: %v\n", err) } for key, value := range rec { colMapping[key] = value } for { rec, err := csvReader.Read() if err != nil { break } line := make(map[string]string) for key, value := range rec { if value == "" { continue } line[colMapping[key]] = value } csvChannel <- line } close(csvChannel) }
func ImportDictionaries() map[string][]*models.SuggestItem { var itemMap = make(map[string][]*models.SuggestItem) fileInfo, err := ioutil.ReadDir(DataDirectory) if err != nil { log.Print(err) return itemMap } numberOfDictionaries := 0 for _, file := range fileInfo { if !file.IsDir() && (strings.HasSuffix(file.Name(), ".txt") || strings.HasSuffix(file.Name(), ".txt.gz")) { dictionaryFile := fmt.Sprintf("%s%s%s", DataDirectory, string(os.PathSeparator), file.Name()) dictionaryName := strings.TrimSuffix(strings.TrimSuffix(file.Name(), ".gz"), ".txt") log.Printf("Importing dictionary %s from file %s", dictionaryName, dictionaryFile) csvFile, err := os.Open(dictionaryFile) if err != nil { log.Print(err) continue } defer csvFile.Close() var csvReader *csv.Reader if strings.HasSuffix(file.Name(), ".txt.gz") { gzipReader, gzerr := gzip.NewReader(csvFile) if gzerr == nil { defer gzipReader.Close() csvReader = csv.NewReader(gzipReader) } else { log.Print(gzerr) continue } } else { csvReader = csv.NewReader(csvFile) } csvReader.FieldsPerRecord = 2 csvReader.Comma = '|' csvReader.LazyQuotes = true csvReader.TrimLeadingSpace = true rawCSVdata, err := csvReader.ReadAll() if err != nil { log.Print(err) continue } for _, each := range rawCSVdata { var suggestItem = new(models.SuggestItem) suggestItem.Term = each[0] weight, err := strconv.Atoi(each[1]) if err == nil { suggestItem.Weight = weight itemMap[dictionaryName] = append(itemMap[dictionaryName], suggestItem) } } numberOfDictionaries++ } } log.Printf("Imported %d dictionaries", numberOfDictionaries) return itemMap }
func load(db *sql.DB, tbl string, cr *csv.Reader) error { head, err := cr.Read() if err != nil { return err } cr.FieldsPerRecord = len(head) marks := make([]string, len(head)) for i := range marks { marks[i] = "?" } qry := "INSERT INTO " + tbl + " (" + strings.Join(head, ",") + ") VALUES (" + strings.Join(marks, ",") + ")" Log.Info("insert", "qry", qry) var wg sync.WaitGroup conc := runtime.GOMAXPROCS(-1) blocks := make(chan [][]string, conc) errs := make(chan error, conc) rowCount := new(int32) R := func(f func() error) { defer wg.Done() errs <- f() } for i := 0; i < conc; i++ { wg.Add(1) go R(func() error { var ( tx *sql.Tx st *sql.Stmt ) n := 0 values := make([]interface{}, len(marks)) for block := range blocks { for _, row := range block { for i, v := range row { values[i] = v } if tx == nil { if st != nil { st.Close() st = nil } if tx, err = db.Begin(); err != nil { return err } if st, err = tx.Prepare(qry); err != nil { return err } } if _, err = st.Exec(values...); err != nil { return fmt.Errorf("error inserting %q with %q: %v", row, qry, err) } n++ atomic.AddInt32(rowCount, 1) if n%1000 == 0 { if err = tx.Commit(); err != nil { return err } tx = nil Log.Info("commit", "n", n, "rowCount", atomic.LoadInt32(rowCount)) } } } Log.Info("commit", "n", n, "rowCount", atomic.LoadInt32(rowCount)) if st != nil { st.Close() } if tx != nil { return tx.Commit() } return nil }) } var block [][]string t := time.Now() for { row, err := cr.Read() if err != nil { if err == io.EOF { break } Log.Error("read row", "error", err) continue } if block == nil { block = make([][]string, 0, batchLen) } block = append(block, row) if len(block) == batchLen { blocks <- block block = nil } } if len(block) > 0 { blocks <- block } close(blocks) wg.Wait() n, d := atomic.LoadInt32(rowCount), time.Since(t) fmt.Fprintf(os.Stderr, "Written %d rows under %s: %.3f rows/sec\n", n, d, float64(n)/float64(d/time.Second)) close(errs) for err := range errs { if err == nil { continue } return err } return nil }