Esempio n. 1
0
func (fs *FileService) UpdateFile(subdom string, fileGUID string, templateGUID string) ([]*templates.TransformationUpdateParams, error) {
	var newTrans []*templates.TransformationUpdateParams
	fileObj, err := fs.GetFile(fileGUID)
	ts := templates.NewTemplateService(database.GetDatabase())
	templateObj, err := ts.GetTemplate(templateGUID)

	switch fileObj.Extension {
	case "csv":
		{
			tempFile, _ := ioutil.TempFile("storage", "temp")
			fileName, _ := fileObj.DownloadFromUrl("storage")

			//Open file and apply transformations
			finalCSVData := make([][]string, len(templateObj.Fields)+1)
			csv := csvElement.NewCSVService(fileName, true)
			for i, field := range templateObj.Fields {
				col := csv.GetColumn(field.Field)
				result, allNewDS := transformations.ApplyTransformations(col, field.Transformations, templateGUID)

				if len(allNewDS) > 0 {
					for _, newDS := range allNewDS {
						//create and store new Datasource
						var settings []datasources.Setting
						for _, aMap := range newDS {
							newSetting := datasources.Setting{
								Label: aMap.Old,
								Value: aMap.New,
							}
							settings = append(settings, newSetting)
						}
						ds := datasources.NewDatasourceService(database.GetDatabase())
						dsGUID := ds.CreateDatasource(subdom, settings)
						/* Can't update template; other datasets might use it.  Instead, save the data in the queue
						//update Template
						ts := templates.NewTemplateService(database.GetDatabase())
						ts.AddTransformation(templateGUID, field.Field, "map", dsGUID)
						*/
						newTrans = append(newTrans, templates.NewTransformationUpdateParams(templateGUID, field.Field, "map", dsGUID))
					}
				}

				finalCSVData[i] = make([]string, len(result))
				for j, finalResult := range result {
					finalCSVData[i][j] = finalResult
				}
			}

			finalCSVData[len(templateObj.Fields)] = csv.GetColumn("ID")

			//Create local temporary file with transformed data
			newcsv := csvElement.NewCSVService(tempFile.Name(), false)
			newcsv.Headers = csv.Headers
			finalCSVData = transpose(finalCSVData)
			newcsv.Write(finalCSVData, true)

			//Send To AWS S3
			fs.SendToS3(fileObj.Path, tempFile.Name())

			//Delete temporary files
			os.Remove(fileName)
			os.Remove(tempFile.Name())
		}
	}
	return newTrans, err
}
Esempio n. 2
0
//apply transforms an array of data
func apply(data []string, transformation templates.Transformation) ([]string, []Mapping) {
	p := transformation.Parameters
	var wg sync.WaitGroup
	var mapping []Mapping

	switch transformation.Operation {
	case "toDate":
		if len(p) != 2 {
			log.Fatal("toDate transformation requires 2 parameters:  current format, new format")
		}

		oldFormat := p[0]
		newFormat := p[1]

		for i, x := range data {
			y, err := time.Parse(oldFormat, x)
			if err != nil {
				log.Print("Error parsing date with index ", i, " with format: ", oldFormat)
			} else {
				data[i] = y.Format(newFormat)
			}
		}
	case "setNull":
		for i, x := range data {
			if arrayPos(x, p) != -1 {
				data[i] = ""
			}
		}
	case "standardize":
		if len(p) != 1 {
			log.Fatal("standardize transformation requires 1 parameter:  type (min-max|z-score)")
		}

		stype := p[0]
		switch stype {
		case "min-max":
			newData := strArrToFloatArr(data)
			min, err := stats.Min(newData)
			if err != nil {
				log.Fatal("Error finding minimum of data: ", err)
			}
			max, err := stats.Max(newData)
			if err != nil {
				log.Fatal("Error finding maximum of data: ", err)
			}
			srange := max - min

			for i, x := range newData {
				data[i] = floatToString((x - min) / srange)
			}
		case "z-score":
			newData := strArrToFloatArr(data)
			mean, err := stats.Mean(newData)
			if err != nil {
				log.Fatal("Error finding mean of data: ", err)
			}
			sd, err := stats.StandardDeviation(newData)
			if err != nil {
				log.Fatal("Error finding standard deviation of data: ", err)
			}

			for i, x := range newData {
				data[i] = floatToString((x - mean) / sd)
			}
		case "decimal":
			newData := strArrToFloatArr(data)
			max, err := stats.Max(newData)
			if err != nil {
				log.Fatal("Error finding maximum of data: ", err)
			}
			min, err := stats.Min(newData)
			if err != nil {
				log.Fatal("Error finding minimum of data: ", err)
			}

			var maxAbs float64
			if math.Abs(max) > math.Abs(min) {
				maxAbs = math.Abs(max)
			} else {
				maxAbs = math.Abs(min)
			}
			c := math.Ceil(math.Log10(maxAbs))
			for i, x := range newData {
				data[i] = floatToString(x / math.Pow10(int(c)))
			}
		}
	case "binPercent":
		table := NewPivotTable(data)
		intP := strArrToIntArr(p)
		sort.Ints(intP)
		ps := NewPercentileService(*table, intP)
		mapping = ps.CreateMappings()
		ps.Bin(mapping, data)
	case "fuzzyMap":
		if len(p) != 3 {
			log.Fatal("fuzzyMap transformation requires 3 parameters:  datasource GUID, match, put")
		}

		dsGUID := p[0]
		ds := datasources.NewDatasourceService(database.GetDatabase())
		dsObj, err := ds.GetDatasource(dsGUID)
		if err != nil {
			log.Fatal("Error finding Datasource: ", err)
		}
		distinctValues := getDistinctValues(data)
		for i, datum := range distinctValues {
			wg.Add(1)
			go func(i int, datum string, dsObj datasources.Datasource) {
				result := fuzzyMap(datum, dsObj.Settings)
				fuzzyMapping := NewMapping(datum, result)
				mapping = append(mapping, *fuzzyMapping)
				defer wg.Done()
			}(i, datum, dsObj)
		}
		wg.Wait()
		data = applyMappings(mapping, data)
	}

	return data, mapping
}