Ejemplo n.º 1
0
func (ds *MongoDBDataSource) Run() {
	defer close(ds.OutputChannel)

	session, err := mgo.Dial(ds.Url)
	if err != nil {
		log.Printf("Error connecting to MongoDB")
		return
	}
	c := session.DB(ds.database).C(ds.collection)

	theq := map[string]interface{}{"$query": ds.filter,
		"$orderby": ds.sort}
	q := c.Find(theq)

	q.Batch(ds.batchSize)
	if ds.limit > 0 {
		q.Limit(ds.limit)
	}
	if ds.offset > 0 {
		q.Skip(ds.offset)
	}

	iter := q.Iter()
	result := make(map[string]interface{})
	for iter.Next(&result) {
		result = datasources.WrapDocWithDatasourceAs(ds.As, result)
		ds.OutputChannel <- result
		result = make(map[string]interface{})
	}
	if iter.Err() != nil {
		log.Printf("got error %v", iter.Err())
	}

}
Ejemplo n.º 2
0
func (ds *DataSourcesDataSource) Run() {
	defer close(ds.OutputChannel)

	for k, v := range datasources.DataSources {
		doc := planner.Document{"name": k, "definition": v}
		doc = datasources.WrapDocWithDatasourceAs(ds.As, doc)
		ds.OutputChannel <- doc
	}
}
Ejemplo n.º 3
0
func (ds *JsonDirDataSource) Run() {
	defer close(ds.OutputChannel)

	files, err := ioutil.ReadDir(ds.dirname)
	if err != nil {
		log.Printf("Error reading files in directory: %v", err)
		return
	}

	for _, file := range files {

		data, err := ioutil.ReadFile(ds.dirname + "/" + file.Name())
		if err != nil {
			log.Printf("Error reading file: %v", err)
			continue
		}

		var parsedJson map[string]interface{}
		err = json.Unmarshal(data, &parsedJson)
		if err != nil {
			log.Printf("Error parsing json: %v", err)
			continue
		}

		id := file.Name()
		if strings.HasSuffix(id, ".json") {
			dotjsonindex := strings.LastIndex(id, ".json")
			id = id[0:dotjsonindex]
		}

		result := planner.Document{
			"doc": parsedJson,
			"meta": map[string]interface{}{
				"id": id}}

		result = datasources.WrapDocWithDatasourceAs(ds.As, result)
		ds.OutputChannel <- result

	}
}
Ejemplo n.º 4
0
func (ds *ElasticSearchDataSource) DocsFromSearchResults(sr core.SearchResult) ([]planner.Document, error) {

	var docBodies []planner.Document
	var err error
	if ds.docBodyDataSource != nil {

		docIds := make([]string, 0, len(sr.Hits.Hits))
		for _, val := range sr.Hits.Hits {
			docIds = append(docIds, val.Id)
		}

		docBodies, err = ds.docBodyDataSource.DocsFromIds(docIds)
		if err != nil {
			return nil, err
		}

	}

	result := make([]planner.Document, 0, len(sr.Hits.Hits))
	for i, val := range sr.Hits.Hits {
		var source planner.Document
		// marshall into json
		jsonErr := json.Unmarshal(val.Source, &source)
		if jsonErr != nil {
			return nil, jsonErr
		}

		// add bodies if present
		if docBodies != nil {
			source["doc"] = docBodies[i]
		}

		source = datasources.WrapDocWithDatasourceAs(ds.As, source)

		result = append(result, source)
	}

	return result, nil
}
Ejemplo n.º 5
0
func (ds *CSVDataSource) Run() {
	defer close(ds.OutputChannel)

	var sr io.Reader
	if strings.HasPrefix(ds.filename, "http") {
		resp, err := http.Get(ds.filename)
		if err != nil {
			log.Printf("Error:", err)
			return
		}
		sr = resp.Body
		defer resp.Body.Close()
	} else {
		log.Printf("opening file %v", ds.filename)
		file, err := os.Open(ds.filename)
		if err != nil {
			log.Printf("Error:", err)
			return
		}
		sr = file
		defer file.Close()
	}

	reader := csv.NewReader(sr)
	var columnHeaders []string
	row := 0
	for {
		record, err := reader.Read()
		if err == io.EOF {
			break
		} else if err != nil {
			fmt.Println("Error:", err)
			return
		}

		if row == 0 {
			columnHeaders = record
		} else {
			doc := make(planner.Document)
			for i, v := range record {
				// try to do some guesswork here about the datatype
				// we're not trying to be perfect, but we would like to support
				// string, int, float, bool as best we can
				if v == "true" {
					doc[columnHeaders[i]] = true
				} else if v == "false" {
					doc[columnHeaders[i]] = false
				} else {

					// now try int
					v_i, err := strconv.ParseInt(v, 0, 64)
					if err != nil {
						// now try float
						v_f, err := strconv.ParseFloat(v, 64)
						if err != nil {
							// leave it as string
							doc[columnHeaders[i]] = v
						} else {
							doc[columnHeaders[i]] = v_f
						}
					} else {
						doc[columnHeaders[i]] = v_i
					}
				}
			}
			doc = datasources.WrapDocWithDatasourceAs(ds.As, doc)
			ds.OutputChannel <- doc
		}
		row += 1
	}
}
Ejemplo n.º 6
0
func (ds *CouchbaseDataSource) Run() {
	defer close(ds.OutputChannel)

	//get a connection to the bucket
	bucket, err := ds.getCachedBucket(ds.bucketName)
	if err != nil {
		log.Printf("Error getting bucket: %v", err)
		return
	}

	skey := ds.startkey

	// FIXME is include_docs=false and mget faster than include docs=true?
	vres, err := bucket.View(ds.ddoc, ds.view, map[string]interface{}{
		"limit":        ds.batchSize + 1,
		"include_docs": true,
		"startkey":     skey,
		"endkey":       ds.endkey})

	if err != nil {
		log.Printf("Error accessing view: %v", err)
		return
	}

	for i, row := range vres.Rows {
		if i < ds.batchSize {
			// dont process the last row, its just used to see if we
			// need to continue processing
			rowdoc := (*row.Doc).(map[string]interface{})
			rowdoc["doc"] = rowdoc["json"]
			delete(rowdoc, "json")

			rowdoc = datasources.WrapDocWithDatasourceAs(ds.As, rowdoc)

			ds.OutputChannel <- rowdoc
		}
	}

	// as long as we continue to get batchSize + 1 results back we have to keep going
	for len(vres.Rows) > ds.batchSize {
		skey = vres.Rows[ds.batchSize].Key
		skeydocid := vres.Rows[ds.batchSize].ID

		vres, err = bucket.View(ds.ddoc, ds.view, map[string]interface{}{
			"limit":          ds.batchSize + 1,
			"include_docs":   true,
			"startkey":       skey,
			"startkey_docid": skeydocid,
			"endkey":         ds.endkey})

		if err != nil {
			log.Printf("Error accessing view: %v", err)
			return
		}

		for i, row := range vres.Rows {
			if i < ds.batchSize {
				// dont process the last row, its just used to see if we
				// need to continue processing
				rowdoc := (*row.Doc).(map[string]interface{})
				rowdoc["doc"] = rowdoc["json"]
				delete(rowdoc, "json")

				rowdoc = datasources.WrapDocWithDatasourceAs(ds.As, rowdoc)

				ds.OutputChannel <- rowdoc
			}
		}

	}

}