func (ds *MongoDBDataSource) Run() { defer close(ds.OutputChannel) session, err := mgo.Dial(ds.Url) if err != nil { log.Printf("Error connecting to MongoDB") return } c := session.DB(ds.database).C(ds.collection) theq := map[string]interface{}{"$query": ds.filter, "$orderby": ds.sort} q := c.Find(theq) q.Batch(ds.batchSize) if ds.limit > 0 { q.Limit(ds.limit) } if ds.offset > 0 { q.Skip(ds.offset) } iter := q.Iter() result := make(map[string]interface{}) for iter.Next(&result) { result = datasources.WrapDocWithDatasourceAs(ds.As, result) ds.OutputChannel <- result result = make(map[string]interface{}) } if iter.Err() != nil { log.Printf("got error %v", iter.Err()) } }
func (ds *DataSourcesDataSource) Run() { defer close(ds.OutputChannel) for k, v := range datasources.DataSources { doc := planner.Document{"name": k, "definition": v} doc = datasources.WrapDocWithDatasourceAs(ds.As, doc) ds.OutputChannel <- doc } }
func (ds *JsonDirDataSource) Run() { defer close(ds.OutputChannel) files, err := ioutil.ReadDir(ds.dirname) if err != nil { log.Printf("Error reading files in directory: %v", err) return } for _, file := range files { data, err := ioutil.ReadFile(ds.dirname + "/" + file.Name()) if err != nil { log.Printf("Error reading file: %v", err) continue } var parsedJson map[string]interface{} err = json.Unmarshal(data, &parsedJson) if err != nil { log.Printf("Error parsing json: %v", err) continue } id := file.Name() if strings.HasSuffix(id, ".json") { dotjsonindex := strings.LastIndex(id, ".json") id = id[0:dotjsonindex] } result := planner.Document{ "doc": parsedJson, "meta": map[string]interface{}{ "id": id}} result = datasources.WrapDocWithDatasourceAs(ds.As, result) ds.OutputChannel <- result } }
func (ds *ElasticSearchDataSource) DocsFromSearchResults(sr core.SearchResult) ([]planner.Document, error) { var docBodies []planner.Document var err error if ds.docBodyDataSource != nil { docIds := make([]string, 0, len(sr.Hits.Hits)) for _, val := range sr.Hits.Hits { docIds = append(docIds, val.Id) } docBodies, err = ds.docBodyDataSource.DocsFromIds(docIds) if err != nil { return nil, err } } result := make([]planner.Document, 0, len(sr.Hits.Hits)) for i, val := range sr.Hits.Hits { var source planner.Document // marshall into json jsonErr := json.Unmarshal(val.Source, &source) if jsonErr != nil { return nil, jsonErr } // add bodies if present if docBodies != nil { source["doc"] = docBodies[i] } source = datasources.WrapDocWithDatasourceAs(ds.As, source) result = append(result, source) } return result, nil }
func (ds *CSVDataSource) Run() { defer close(ds.OutputChannel) var sr io.Reader if strings.HasPrefix(ds.filename, "http") { resp, err := http.Get(ds.filename) if err != nil { log.Printf("Error:", err) return } sr = resp.Body defer resp.Body.Close() } else { log.Printf("opening file %v", ds.filename) file, err := os.Open(ds.filename) if err != nil { log.Printf("Error:", err) return } sr = file defer file.Close() } reader := csv.NewReader(sr) var columnHeaders []string row := 0 for { record, err := reader.Read() if err == io.EOF { break } else if err != nil { fmt.Println("Error:", err) return } if row == 0 { columnHeaders = record } else { doc := make(planner.Document) for i, v := range record { // try to do some guesswork here about the datatype // we're not trying to be perfect, but we would like to support // string, int, float, bool as best we can if v == "true" { doc[columnHeaders[i]] = true } else if v == "false" { doc[columnHeaders[i]] = false } else { // now try int v_i, err := strconv.ParseInt(v, 0, 64) if err != nil { // now try float v_f, err := strconv.ParseFloat(v, 64) if err != nil { // leave it as string doc[columnHeaders[i]] = v } else { doc[columnHeaders[i]] = v_f } } else { doc[columnHeaders[i]] = v_i } } } doc = datasources.WrapDocWithDatasourceAs(ds.As, doc) ds.OutputChannel <- doc } row += 1 } }
func (ds *CouchbaseDataSource) Run() { defer close(ds.OutputChannel) //get a connection to the bucket bucket, err := ds.getCachedBucket(ds.bucketName) if err != nil { log.Printf("Error getting bucket: %v", err) return } skey := ds.startkey // FIXME is include_docs=false and mget faster than include docs=true? vres, err := bucket.View(ds.ddoc, ds.view, map[string]interface{}{ "limit": ds.batchSize + 1, "include_docs": true, "startkey": skey, "endkey": ds.endkey}) if err != nil { log.Printf("Error accessing view: %v", err) return } for i, row := range vres.Rows { if i < ds.batchSize { // dont process the last row, its just used to see if we // need to continue processing rowdoc := (*row.Doc).(map[string]interface{}) rowdoc["doc"] = rowdoc["json"] delete(rowdoc, "json") rowdoc = datasources.WrapDocWithDatasourceAs(ds.As, rowdoc) ds.OutputChannel <- rowdoc } } // as long as we continue to get batchSize + 1 results back we have to keep going for len(vres.Rows) > ds.batchSize { skey = vres.Rows[ds.batchSize].Key skeydocid := vres.Rows[ds.batchSize].ID vres, err = bucket.View(ds.ddoc, ds.view, map[string]interface{}{ "limit": ds.batchSize + 1, "include_docs": true, "startkey": skey, "startkey_docid": skeydocid, "endkey": ds.endkey}) if err != nil { log.Printf("Error accessing view: %v", err) return } for i, row := range vres.Rows { if i < ds.batchSize { // dont process the last row, its just used to see if we // need to continue processing rowdoc := (*row.Doc).(map[string]interface{}) rowdoc["doc"] = rowdoc["json"] delete(rowdoc, "json") rowdoc = datasources.WrapDocWithDatasourceAs(ds.As, rowdoc) ds.OutputChannel <- rowdoc } } } }