// ProcessData defers to util.SQLInsertData func (s *SQLWriter) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { // handle panics a bit more gracefully defer func() { if err := recover(); err != nil { util.KillPipelineIfErr(err.(error), killChan) } }() // First check for SQLWriterData var wd SQLWriterData err := data.ParseJSONSilent(d, &wd) logger.Info("SQLWriter: Writing data...") if err == nil && wd.TableName != "" && wd.InsertData != nil { logger.Debug("SQLWriter: SQLWriterData scenario") dd, err := data.NewJSON(wd.InsertData) util.KillPipelineIfErr(err, killChan) err = util.SQLInsertData(s.writeDB, dd, wd.TableName, s.OnDupKeyUpdate, s.OnDupKeyFields, s.BatchSize) util.KillPipelineIfErr(err, killChan) } else { logger.Debug("SQLWriter: normal data scenario") err = util.SQLInsertData(s.writeDB, d, s.TableName, s.OnDupKeyUpdate, s.OnDupKeyFields, s.BatchSize) util.KillPipelineIfErr(err, killChan) } logger.Info("SQLWriter: Write complete") }
func (p *Pipeline) runStages(killChan chan error) { for n, stage := range p.layout.stages { for _, dp := range stage.processors { p.wg.Add(1) // Each DataProcessor runs in a separate gorountine. go func(n int, dp *dataProcessor) { // This is where the main DataProcessor interface // functions are called. logger.Info(p.Name, "- stage", n+1, dp, "waiting to receive data") for d := range dp.inputChan { logger.Info(p.Name, "- stage", n+1, dp, "received data") if p.PrintData { logger.Debug(p.Name, "- stage", n+1, dp, "data =", string(d)) } dp.recordDataReceived(d) dp.processData(d, killChan) } logger.Info(p.Name, "- stage", n+1, dp, "input closed, calling Finish") dp.Finish(dp.outputChan, killChan) if dp.outputChan != nil { logger.Info(p.Name, "- stage", n+1, dp, "closing output") close(dp.outputChan) } p.wg.Done() }(n, dp) } } }
func insertObjects(db *sql.DB, objects []map[string]interface{}, tableName string, onDupKeyUpdate bool, onDupKeyFields []string) error { logger.Info("SQLInsertData: building INSERT for len(objects) =", len(objects)) insertSQL, vals := buildInsertSQL(objects, tableName, onDupKeyUpdate, onDupKeyFields) logger.Debug("SQLInsertData:", insertSQL) logger.Debug("SQLInsertData: values", vals) stmt, err := db.Prepare(insertSQL) if err != nil { logger.Debug("SQLInsertData: error preparing SQL") return err } defer stmt.Close() res, err := stmt.Exec(vals...) if err != nil { return err } lastID, err := res.LastInsertId() if err != nil { return err } rowCnt, err := res.RowsAffected() if err != nil { return err } logger.Info(fmt.Sprintf("SQLInsertData: rows affected = %d, last insert ID = %d", rowCnt, lastID)) return nil }
// ProcessData defers to WriterBatch func (w *BigQueryWriter) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { queuedRows, err := data.ObjectsFromJSON(d) util.KillPipelineIfErr(err, killChan) logger.Info("BigQueryWriter: Writing -", len(queuedRows)) err = w.WriteBatch(queuedRows) if err != nil { util.KillPipelineIfErr(err, killChan) } logger.Info("BigQueryWriter: Write complete") }
// ProcessData runs the SQL statements, deferring to util.ExecuteSQLQuery func (s *SQLExecutor) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { // handle panics a bit more gracefully defer func() { if err := recover(); err != nil { util.KillPipelineIfErr(err.(error), killChan) } }() sql := "" var err error if s.query == "" && s.sqlGenerator != nil { sql, err = s.sqlGenerator(d) util.KillPipelineIfErr(err, killChan) } else if s.query != "" { sql = s.query } else { killChan <- errors.New("SQLExecutor: must have either static query or sqlGenerator func") } logger.Debug("SQLExecutor: Running - ", sql) // See sql.go err = util.ExecuteSQLQuery(s.readDB, sql) util.KillPipelineIfErr(err, killChan) logger.Info("SQLExecutor: Query complete") }
// ForEachQueryData handles generating the SQL (in case of dynamic mode), // running the query and retrieving the data in data.JSON format, and then // passing the results back witih the function call to forEach. func (r *BigQueryReader) ForEachQueryData(d data.JSON, killChan chan error, forEach func(d data.JSON)) { sql := "" var err error if r.query == "" && r.sqlGenerator != nil { sql, err = r.sqlGenerator(d) util.KillPipelineIfErr(err, killChan) } else if r.query != "" { sql = r.query } else { killChan <- errors.New("BigQueryReader: must have either static query or sqlGenerator func") } logger.Debug("BigQueryReader: Running -", sql) bqDataChan := make(chan bigquery.Data) go r.bqClient().AsyncQuery(r.PageSize, r.config.DatasetID, r.config.ProjectID, sql, bqDataChan) aggregatedData := bigquery.Data{} for bqd := range bqDataChan { util.KillPipelineIfErr(bqd.Err, killChan) logger.Info("BigQueryReader: received bqData: len(rows) =", len(bqd.Rows)) // logger.Debug(" %+v", bqd) if bqd.Rows != nil && bqd.Headers != nil && len(bqd.Rows) > 0 { if r.AggregateResults { logger.Debug("BigQueryReader: aggregating results") aggregatedData.Headers = bqd.Headers aggregatedData.Rows = append(aggregatedData.Rows, bqd.Rows...) } else { // Send data as soon as we get it back logger.Debug("BigQueryReader: sending data without aggregation") d, err := data.JSONFromHeaderAndRows(bqd.Headers, bqd.Rows) util.KillPipelineIfErr(err, killChan) forEach(d) // pass back out via the forEach func } } } if r.AggregateResults { logger.Info("BigQueryReader: sending aggregated results: len(rows) =", len(aggregatedData.Rows)) d, err := data.JSONFromHeaderAndRows(aggregatedData.Headers, aggregatedData.Rows) util.KillPipelineIfErr(err, killChan) forEach(d) // pass back out via the forEach func } }
// ensureInitialized calls connect and then creates the output file on the sftp server at the specified path func (w *SftpWriter) ensureInitialized(killChan chan error) { if w.initialized { return } client, err := util.SftpClient(w.parameters.Server, w.parameters.Username, w.parameters.AuthMethods) util.KillPipelineIfErr(err, killChan) logger.Info("Path", w.parameters.Path) file, err := client.Create(w.parameters.Path) util.KillPipelineIfErr(err, killChan) w.client = client w.file = file w.initialized = true }