func (fs *FileService) UpdateFile(subdom string, fileGUID string, templateGUID string) ([]*templates.TransformationUpdateParams, error) { var newTrans []*templates.TransformationUpdateParams fileObj, err := fs.GetFile(fileGUID) ts := templates.NewTemplateService(database.GetDatabase()) templateObj, err := ts.GetTemplate(templateGUID) switch fileObj.Extension { case "csv": { tempFile, _ := ioutil.TempFile("storage", "temp") fileName, _ := fileObj.DownloadFromUrl("storage") //Open file and apply transformations finalCSVData := make([][]string, len(templateObj.Fields)+1) csv := csvElement.NewCSVService(fileName, true) for i, field := range templateObj.Fields { col := csv.GetColumn(field.Field) result, allNewDS := transformations.ApplyTransformations(col, field.Transformations, templateGUID) if len(allNewDS) > 0 { for _, newDS := range allNewDS { //create and store new Datasource var settings []datasources.Setting for _, aMap := range newDS { newSetting := datasources.Setting{ Label: aMap.Old, Value: aMap.New, } settings = append(settings, newSetting) } ds := datasources.NewDatasourceService(database.GetDatabase()) dsGUID := ds.CreateDatasource(subdom, settings) /* Can't update template; other datasets might use it. Instead, save the data in the queue //update Template ts := templates.NewTemplateService(database.GetDatabase()) ts.AddTransformation(templateGUID, field.Field, "map", dsGUID) */ newTrans = append(newTrans, templates.NewTransformationUpdateParams(templateGUID, field.Field, "map", dsGUID)) } } finalCSVData[i] = make([]string, len(result)) for j, finalResult := range result { finalCSVData[i][j] = finalResult } } finalCSVData[len(templateObj.Fields)] = csv.GetColumn("ID") //Create local temporary file with transformed data newcsv := csvElement.NewCSVService(tempFile.Name(), false) newcsv.Headers = csv.Headers finalCSVData = transpose(finalCSVData) newcsv.Write(finalCSVData, true) //Send To AWS S3 fs.SendToS3(fileObj.Path, tempFile.Name()) //Delete temporary files os.Remove(fileName) os.Remove(tempFile.Name()) } } return newTrans, err }
//apply transforms an array of data func apply(data []string, transformation templates.Transformation) ([]string, []Mapping) { p := transformation.Parameters var wg sync.WaitGroup var mapping []Mapping switch transformation.Operation { case "toDate": if len(p) != 2 { log.Fatal("toDate transformation requires 2 parameters: current format, new format") } oldFormat := p[0] newFormat := p[1] for i, x := range data { y, err := time.Parse(oldFormat, x) if err != nil { log.Print("Error parsing date with index ", i, " with format: ", oldFormat) } else { data[i] = y.Format(newFormat) } } case "setNull": for i, x := range data { if arrayPos(x, p) != -1 { data[i] = "" } } case "standardize": if len(p) != 1 { log.Fatal("standardize transformation requires 1 parameter: type (min-max|z-score)") } stype := p[0] switch stype { case "min-max": newData := strArrToFloatArr(data) min, err := stats.Min(newData) if err != nil { log.Fatal("Error finding minimum of data: ", err) } max, err := stats.Max(newData) if err != nil { log.Fatal("Error finding maximum of data: ", err) } srange := max - min for i, x := range newData { data[i] = floatToString((x - min) / srange) } case "z-score": newData := strArrToFloatArr(data) mean, err := stats.Mean(newData) if err != nil { log.Fatal("Error finding mean of data: ", err) } sd, err := stats.StandardDeviation(newData) if err != nil { log.Fatal("Error finding standard deviation of data: ", err) } for i, x := range newData { data[i] = floatToString((x - mean) / sd) } case "decimal": newData := strArrToFloatArr(data) max, err := stats.Max(newData) if err != nil { log.Fatal("Error finding maximum of data: ", err) } min, err := stats.Min(newData) if err != nil { log.Fatal("Error finding minimum of data: ", err) } var maxAbs float64 if math.Abs(max) > math.Abs(min) { maxAbs = math.Abs(max) } else { maxAbs = math.Abs(min) } c := math.Ceil(math.Log10(maxAbs)) for i, x := range newData { data[i] = floatToString(x / math.Pow10(int(c))) } } case "binPercent": table := NewPivotTable(data) intP := strArrToIntArr(p) sort.Ints(intP) ps := NewPercentileService(*table, intP) mapping = ps.CreateMappings() ps.Bin(mapping, data) case "fuzzyMap": if len(p) != 3 { log.Fatal("fuzzyMap transformation requires 3 parameters: datasource GUID, match, put") } dsGUID := p[0] ds := datasources.NewDatasourceService(database.GetDatabase()) dsObj, err := ds.GetDatasource(dsGUID) if err != nil { log.Fatal("Error finding Datasource: ", err) } distinctValues := getDistinctValues(data) for i, datum := range distinctValues { wg.Add(1) go func(i int, datum string, dsObj datasources.Datasource) { result := fuzzyMap(datum, dsObj.Settings) fuzzyMapping := NewMapping(datum, result) mapping = append(mapping, *fuzzyMapping) defer wg.Done() }(i, datum, dsObj) } wg.Wait() data = applyMappings(mapping, data) } return data, mapping }