Exemple #1
0
// ProcessData defers to util.SQLInsertData
func (s *SQLWriter) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) {
	// handle panics a bit more gracefully
	defer func() {
		if err := recover(); err != nil {
			util.KillPipelineIfErr(err.(error), killChan)
		}
	}()

	// First check for SQLWriterData
	var wd SQLWriterData
	err := data.ParseJSONSilent(d, &wd)
	logger.Info("SQLWriter: Writing data...")
	if err == nil && wd.TableName != "" && wd.InsertData != nil {
		logger.Debug("SQLWriter: SQLWriterData scenario")
		dd, err := data.NewJSON(wd.InsertData)
		util.KillPipelineIfErr(err, killChan)
		err = util.SQLInsertData(s.writeDB, dd, wd.TableName, s.OnDupKeyUpdate, s.OnDupKeyFields, s.BatchSize)
		util.KillPipelineIfErr(err, killChan)
	} else {
		logger.Debug("SQLWriter: normal data scenario")
		err = util.SQLInsertData(s.writeDB, d, s.TableName, s.OnDupKeyUpdate, s.OnDupKeyFields, s.BatchSize)
		util.KillPipelineIfErr(err, killChan)
	}
	logger.Info("SQLWriter: Write complete")
}
Exemple #2
0
func (p *Pipeline) runStages(killChan chan error) {
	for n, stage := range p.layout.stages {
		for _, dp := range stage.processors {
			p.wg.Add(1)
			// Each DataProcessor runs in a separate gorountine.
			go func(n int, dp *dataProcessor) {
				// This is where the main DataProcessor interface
				// functions are called.
				logger.Info(p.Name, "- stage", n+1, dp, "waiting to receive data")
				for d := range dp.inputChan {
					logger.Info(p.Name, "- stage", n+1, dp, "received data")
					if p.PrintData {
						logger.Debug(p.Name, "- stage", n+1, dp, "data =", string(d))
					}
					dp.recordDataReceived(d)
					dp.processData(d, killChan)
				}
				logger.Info(p.Name, "- stage", n+1, dp, "input closed, calling Finish")
				dp.Finish(dp.outputChan, killChan)
				if dp.outputChan != nil {
					logger.Info(p.Name, "- stage", n+1, dp, "closing output")
					close(dp.outputChan)
				}
				p.wg.Done()
			}(n, dp)
		}
	}
}
Exemple #3
0
func insertObjects(db *sql.DB, objects []map[string]interface{}, tableName string, onDupKeyUpdate bool, onDupKeyFields []string) error {
	logger.Info("SQLInsertData: building INSERT for len(objects) =", len(objects))
	insertSQL, vals := buildInsertSQL(objects, tableName, onDupKeyUpdate, onDupKeyFields)

	logger.Debug("SQLInsertData:", insertSQL)
	logger.Debug("SQLInsertData: values", vals)

	stmt, err := db.Prepare(insertSQL)
	if err != nil {
		logger.Debug("SQLInsertData: error preparing SQL")
		return err
	}
	defer stmt.Close()

	res, err := stmt.Exec(vals...)
	if err != nil {
		return err
	}
	lastID, err := res.LastInsertId()
	if err != nil {
		return err
	}
	rowCnt, err := res.RowsAffected()
	if err != nil {
		return err
	}

	logger.Info(fmt.Sprintf("SQLInsertData: rows affected = %d, last insert ID = %d", rowCnt, lastID))
	return nil
}
// ProcessData defers to WriterBatch
func (w *BigQueryWriter) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) {
	queuedRows, err := data.ObjectsFromJSON(d)
	util.KillPipelineIfErr(err, killChan)

	logger.Info("BigQueryWriter: Writing -", len(queuedRows))
	err = w.WriteBatch(queuedRows)
	if err != nil {
		util.KillPipelineIfErr(err, killChan)
	}
	logger.Info("BigQueryWriter: Write complete")
}
Exemple #5
0
// ProcessData runs the SQL statements, deferring to util.ExecuteSQLQuery
func (s *SQLExecutor) ProcessData(d data.JSON, outputChan chan data.JSON, killChan chan error) {
	// handle panics a bit more gracefully
	defer func() {
		if err := recover(); err != nil {
			util.KillPipelineIfErr(err.(error), killChan)
		}
	}()

	sql := ""
	var err error
	if s.query == "" && s.sqlGenerator != nil {
		sql, err = s.sqlGenerator(d)
		util.KillPipelineIfErr(err, killChan)
	} else if s.query != "" {
		sql = s.query
	} else {
		killChan <- errors.New("SQLExecutor: must have either static query or sqlGenerator func")
	}

	logger.Debug("SQLExecutor: Running - ", sql)
	// See sql.go
	err = util.ExecuteSQLQuery(s.readDB, sql)
	util.KillPipelineIfErr(err, killChan)
	logger.Info("SQLExecutor: Query complete")
}
// ForEachQueryData handles generating the SQL (in case of dynamic mode),
// running the query and retrieving the data in data.JSON format, and then
// passing the results back witih the function call to forEach.
func (r *BigQueryReader) ForEachQueryData(d data.JSON, killChan chan error, forEach func(d data.JSON)) {
	sql := ""
	var err error
	if r.query == "" && r.sqlGenerator != nil {
		sql, err = r.sqlGenerator(d)
		util.KillPipelineIfErr(err, killChan)
	} else if r.query != "" {
		sql = r.query
	} else {
		killChan <- errors.New("BigQueryReader: must have either static query or sqlGenerator func")
	}

	logger.Debug("BigQueryReader: Running -", sql)

	bqDataChan := make(chan bigquery.Data)
	go r.bqClient().AsyncQuery(r.PageSize, r.config.DatasetID, r.config.ProjectID, sql, bqDataChan)
	aggregatedData := bigquery.Data{}

	for bqd := range bqDataChan {
		util.KillPipelineIfErr(bqd.Err, killChan)
		logger.Info("BigQueryReader: received bqData: len(rows) =", len(bqd.Rows))
		// logger.Debug("   %+v", bqd)

		if bqd.Rows != nil && bqd.Headers != nil && len(bqd.Rows) > 0 {
			if r.AggregateResults {
				logger.Debug("BigQueryReader: aggregating results")
				aggregatedData.Headers = bqd.Headers
				aggregatedData.Rows = append(aggregatedData.Rows, bqd.Rows...)
			} else {
				// Send data as soon as we get it back
				logger.Debug("BigQueryReader: sending data without aggregation")
				d, err := data.JSONFromHeaderAndRows(bqd.Headers, bqd.Rows)
				util.KillPipelineIfErr(err, killChan)
				forEach(d) // pass back out via the forEach func
			}
		}
	}
	if r.AggregateResults {
		logger.Info("BigQueryReader: sending aggregated results: len(rows) =", len(aggregatedData.Rows))
		d, err := data.JSONFromHeaderAndRows(aggregatedData.Headers, aggregatedData.Rows)
		util.KillPipelineIfErr(err, killChan)
		forEach(d) // pass back out via the forEach func
	}
}
Exemple #7
0
// ensureInitialized calls connect and then creates the output file on the sftp server at the specified path
func (w *SftpWriter) ensureInitialized(killChan chan error) {
	if w.initialized {
		return
	}

	client, err := util.SftpClient(w.parameters.Server, w.parameters.Username, w.parameters.AuthMethods)
	util.KillPipelineIfErr(err, killChan)

	logger.Info("Path", w.parameters.Path)

	file, err := client.Create(w.parameters.Path)
	util.KillPipelineIfErr(err, killChan)

	w.client = client
	w.file = file
	w.initialized = true
}