// ProcessData runs the SQL statements, deferring to util.ExecuteSQLQuery func (s *SQLExecutor) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { // handle panics a bit more gracefully defer func() { if err := recover(); err != nil { util.KillPipelineIfErr(err.(error), killChan) } }() sql := "" var err error if s.query == "" && s.sqlGenerator != nil { sql, err = s.sqlGenerator(d) util.KillPipelineIfErr(err, killChan) } else if s.query != "" { sql = s.query } else { killChan <- errors.New("SQLExecutor: must have either static query or sqlGenerator func") } logger.Debug("SQLExecutor: Running - ", sql) // See sql.go err = util.ExecuteSQLQuery(s.readDB, sql) util.KillPipelineIfErr(err, killChan) logger.Info("SQLExecutor: Query complete") }
// ProcessData defers to util.SQLInsertData func (s *SQLWriter) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { // handle panics a bit more gracefully defer func() { if err := recover(); err != nil { util.KillPipelineIfErr(err.(error), killChan) } }() // First check for SQLWriterData var wd SQLWriterData err := data.ParseJSONSilent(d, &wd) logger.Info("SQLWriter: Writing data...") if err == nil && wd.TableName != "" && wd.InsertData != nil { logger.Debug("SQLWriter: SQLWriterData scenario") dd, err := data.NewJSON(wd.InsertData) util.KillPipelineIfErr(err, killChan) err = util.SQLInsertData(s.writeDB, dd, wd.TableName, s.OnDupKeyUpdate, s.OnDupKeyFields, s.BatchSize) util.KillPipelineIfErr(err, killChan) } else { logger.Debug("SQLWriter: normal data scenario") err = util.SQLInsertData(s.writeDB, d, s.TableName, s.OnDupKeyUpdate, s.OnDupKeyFields, s.BatchSize) util.KillPipelineIfErr(err, killChan) } logger.Info("SQLWriter: Write complete") }
// ForEachQueryData handles generating the SQL (in case of dynamic mode), // running the query and retrieving the data in data.JSON format, and then // passing the results back witih the function call to forEach. func (s *SQLReader) ForEachQueryData(d data.JSON, killChan chan error, forEach func(d data.JSON)) { sql := "" var err error if s.query == "" && s.sqlGenerator != nil { sql, err = s.sqlGenerator(d) util.KillPipelineIfErr(err, killChan) } else if s.query != "" { sql = s.query } else { killChan <- errors.New("SQLReader: must have either static query or sqlGenerator func") } logger.Debug("SQLReader: Running - ", sql) // See sql.go dataChan, err := util.GetDataFromSQLQuery(s.readDB, sql, s.BatchSize, s.StructDestination) util.KillPipelineIfErr(err, killChan) for d := range dataChan { // First check if an error was returned back from the SQL processing // helper, then if not call forEach with the received data. var derr dataErr if err := data.ParseJSONSilent(d, &derr); err == nil { util.KillPipelineIfErr(errors.New(derr.Error), killChan) } else { forEach(d) } } }
// ProcessData sends data to outputChan if the response body is not null func (r *HTTPRequest) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { resp, err := r.Client.Do(r.Request) util.KillPipelineIfErr(err, killChan) if resp != nil && resp.Body != nil { dd, err := ioutil.ReadAll(resp.Body) resp.Body.Close() util.KillPipelineIfErr(err, killChan) outputChan <- dd } }
// ProcessData defers to WriterBatch func (w *BigQueryWriter) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { queuedRows, err := data.ObjectsFromJSON(d) util.KillPipelineIfErr(err, killChan) logger.Info("BigQueryWriter: Writing -", len(queuedRows)) err = w.WriteBatch(queuedRows) if err != nil { util.KillPipelineIfErr(err, killChan) } logger.Info("BigQueryWriter: Write complete") }
func (r *SftpReader) sendFile(path string, outputChan chan data.JSON, killChan chan error) { file, err := r.client.Open(path) util.KillPipelineIfErr(err, killChan) defer file.Close() r.IoReader.Reader = file r.IoReader.ProcessData(nil, outputChan, killChan) if r.DeleteObjects { err = r.client.Remove(path) util.KillPipelineIfErr(err, killChan) } }
func (r *IoReader) scanLines(killChan chan error, forEach func(d data.JSON)) { scanner := bufio.NewScanner(r.Reader) for scanner.Scan() { forEach(data.JSON(scanner.Text())) } err := scanner.Err() util.KillPipelineIfErr(err, killChan) }
// ensureInitialized calls connect and then creates the output file on the sftp server at the specified path func (w *SftpWriter) ensureInitialized(killChan chan error) { if w.initialized { return } client, err := util.SftpClient(w.parameters.Server, w.parameters.Username, w.parameters.AuthMethods) util.KillPipelineIfErr(err, killChan) logger.Info("Path", w.parameters.Path) file, err := client.Create(w.parameters.Path) util.KillPipelineIfErr(err, killChan) w.client = client w.file = file w.initialized = true }
func (r *SftpReader) walk(outputChan chan data.JSON, killChan chan error) { walker := r.client.Walk(r.parameters.Path) for walker.Step() { util.KillPipelineIfErr(walker.Err(), killChan) if !walker.Stat().IsDir() { r.sendObject(walker.Path(), outputChan, killChan) } } }
// connect - opens a connection to the provided ftp host and then authenticates with the host with the username, password attributes func (f *FtpWriter) connect(killChan chan error) { conn, err := ftp.Dial(f.host) if err != nil { util.KillPipelineIfErr(err, killChan) } lerr := conn.Login(f.username, f.password) if lerr != nil { util.KillPipelineIfErr(lerr, killChan) } r, w := io.Pipe() f.conn = conn go f.conn.Stor(f.path, r) f.fileWriter = w f.authenticated = true }
// ForEachQueryData handles generating the SQL (in case of dynamic mode), // running the query and retrieving the data in data.JSON format, and then // passing the results back witih the function call to forEach. func (r *BigQueryReader) ForEachQueryData(d data.JSON, killChan chan error, forEach func(d data.JSON)) { sql := "" var err error if r.query == "" && r.sqlGenerator != nil { sql, err = r.sqlGenerator(d) util.KillPipelineIfErr(err, killChan) } else if r.query != "" { sql = r.query } else { killChan <- errors.New("BigQueryReader: must have either static query or sqlGenerator func") } logger.Debug("BigQueryReader: Running -", sql) bqDataChan := make(chan bigquery.Data) go r.bqClient().AsyncQuery(r.PageSize, r.config.DatasetID, r.config.ProjectID, sql, bqDataChan) aggregatedData := bigquery.Data{} for bqd := range bqDataChan { util.KillPipelineIfErr(bqd.Err, killChan) logger.Info("BigQueryReader: received bqData: len(rows) =", len(bqd.Rows)) // logger.Debug(" %+v", bqd) if bqd.Rows != nil && bqd.Headers != nil && len(bqd.Rows) > 0 { if r.AggregateResults { logger.Debug("BigQueryReader: aggregating results") aggregatedData.Headers = bqd.Headers aggregatedData.Rows = append(aggregatedData.Rows, bqd.Rows...) } else { // Send data as soon as we get it back logger.Debug("BigQueryReader: sending data without aggregation") d, err := data.JSONFromHeaderAndRows(bqd.Headers, bqd.Rows) util.KillPipelineIfErr(err, killChan) forEach(d) // pass back out via the forEach func } } } if r.AggregateResults { logger.Info("BigQueryReader: sending aggregated results: len(rows) =", len(aggregatedData.Rows)) d, err := data.JSONFromHeaderAndRows(aggregatedData.Headers, aggregatedData.Rows) util.KillPipelineIfErr(err, killChan) forEach(d) // pass back out via the forEach func } }
// ProcessData overwrites the reader if the content is Gzipped, then defers to ForEachData func (r *IoReader) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { if r.Gzipped { gzReader, err := gzip.NewReader(r.Reader) util.KillPipelineIfErr(err, killChan) r.Reader = gzReader } r.ForEachData(killChan, func(d data.JSON) { outputChan <- d }) }
// ProcessData sends the data it receives to the outputChan only if it matches the supplied regex func (r *RegexpMatcher) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { matches, err := regexp.Match(r.pattern, d) util.KillPipelineIfErr(err, killChan) if r.DebugLog { logger.Debug("RegexpMatcher: checking if", string(d), "matches pattern", r.pattern, ". MATCH=", matches) } if matches { outputChan <- d } }
// ProcessData writes data as is directly to the output file func (f *FtpWriter) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { logger.Debug("FTPWriter Process data:", string(d)) if !f.authenticated { f.connect(killChan) } _, e := f.fileWriter.Write([]byte(d)) if e != nil { util.KillPipelineIfErr(e, killChan) } }
func (r *SftpReader) ensureInitialized(killChan chan error) { if r.initialized { return } client, err := util.SftpClient(r.parameters.Server, r.parameters.Username, r.parameters.AuthMethods) util.KillPipelineIfErr(err, killChan) r.client = client r.initialized = true }
// ProcessData writes the data func (w *IoWriter) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { var bytesWritten int var err error if w.AddNewline { bytesWritten, err = fmt.Fprintln(w.Writer, string(d)) } else { bytesWritten, err = w.Writer.Write(d) } util.KillPipelineIfErr(err, killChan) logger.Debug("IoWriter:", bytesWritten, "bytes written") }
// Run executes the scp command from the attributes of the SCP struct func (s *SCP) Run(killChan chan error) { scpParams := []string{} if s.Port != "" { scpParams = append(scpParams, fmt.Sprintf("-P %v", s.Port)) } scpParams = append(scpParams, s.Object) scpParams = append(scpParams, s.Destination) cmd := exec.Command("scp", scpParams...) _, err := cmd.Output() util.KillPipelineIfErr(err, killChan) }
// ProcessData reads an entire directory if a prefix is provided (sending each file in that // directory to outputChan), or just sends the single file to outputChan if a complete // file path is provided (not a prefix/directory). // // It optionally deletes all processed objects once the contents have been sent to outputChan func (r *S3Reader) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { if r.prefix != "" { logger.Debug("S3Reader: process data for prefix", r.prefix) objects, err := util.ListS3Objects(r.client, r.bucket, r.prefix) logger.Debug("S3Reader: list =", objects) util.KillPipelineIfErr(err, killChan) for _, o := range objects { obj, err := util.GetS3Object(r.client, r.bucket, o) util.KillPipelineIfErr(err, killChan) r.processObject(obj, outputChan, killChan) r.processedObjectKeys = append(r.processedObjectKeys, o) } } else { logger.Debug("S3Reader: process data for object", r.object) obj, err := util.GetS3Object(r.client, r.bucket, r.object) util.KillPipelineIfErr(err, killChan) r.processObject(obj, outputChan, killChan) r.processedObjectKeys = append(r.processedObjectKeys, r.object) } if r.DeleteObjects { _, err := util.DeleteS3Objects(r.client, r.bucket, r.processedObjectKeys) util.KillPipelineIfErr(err, killChan) } }
// ProcessData writes data as is directly to the output file func (w *SftpWriter) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { logger.Debug("SftpWriter Process data:", string(d)) w.ensureInitialized(killChan) _, e := w.file.Write([]byte(d)) util.KillPipelineIfErr(e, killChan) }
func (r *SftpReader) sendFilePath(path string, outputChan chan data.JSON, killChan chan error) { sftpPath := util.SftpPath{Path: path} d, err := data.NewJSON(sftpPath) util.KillPipelineIfErr(err, killChan) outputChan <- d }
// ProcessData reads a file and sends its contents to outputChan func (r *FileReader) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) { d, err := ioutil.ReadFile(r.filename) util.KillPipelineIfErr(err, killChan) outputChan <- d }