func (ec *ExportController) Export(w http.ResponseWriter, r *http.Request) ([]byte, error) { vars := mux.Vars(r) fileGUID := vars["fileGUID"] templateGUID := vars["templateGUID"] fs := files.NewFileService(database.GetDatabase()) fileObj, err := fs.GetFile(fileGUID) ts := templates.NewTemplateService(database.GetDatabase()) templateObj, err := ts.GetTemplate(templateGUID) var result []string switch fileObj.Extension { case "csv": { fileName, _ := fileObj.DownloadFromUrl("storage") csv := csv.NewCSVService(fileName, true) for _, field := range templateObj.Fields { col := csv.GetColumn(field.Field) result = transformations.ApplyTransformations(col, field.Transformations) fmt.Println(result) } } } //tj,_ := json.Marshal(t) fj, _ := json.Marshal(fileObj) return fj, err }
//handle takes a job and appropriate sends it to a function that will run it func (qs *QueueService) handle(job *QueueJob) { switch job.Job { case "updateDatasetFile": udfjd := UpdateDatasetFileJobData{} jobJson, err := json.Marshal(job.Data) if err != nil { log.Fatal("Error creating JSON: ", err) } err = json.Unmarshal(jobJson, &udfjd) if err != nil { log.Fatal("Error parsing JSON: ", err) } /* pds := NewPredictDatasetService(database.GetDatabase()) pds.UpdateFile(udfjd) */ fs := files.NewFileService(database.GetDatabase()) newTrans, err := fs.UpdateFile(udfjd.Extra.Subdom, udfjd.FileGUID, udfjd.TemplateGUID) //Alert luminous that its Done queue := NewQueueService("luminous") jobData := structs.Map(udfjd) jobData["NewTrans"] = newTrans qj := &QueueJob{ Job: `Spark451\Luminous\v2\Predict\Classes\Datasets@finishDatasetCreation`, Data: jobData, } queue.Push(qj) } }
func (ac AnalyticsController) GetPivotTables(w http.ResponseWriter, r *http.Request) ([]byte, error) { vars := mux.Vars(r) fileGUID := vars["fileGUID"] //templateGUID := vars["templateGUID"] //ts := services.NewTemplateService(database.GetDatabase()) //t := ts.GetTemplate(templateGUID) fs := files.NewFileService(database.GetDatabase()) fileObj, err := fs.GetFile(fileGUID) if fileObj.Extension == "csv" { fileName, _ := fileObj.DownloadFromUrl("storage") csv := csvElement.NewCSVService(fileName, true) fmt.Println(csv) //os.Remove(fileName) } //tj,_ := json.Marshal(t) fj, _ := json.Marshal(fileObj) return fj, err }
func (fs *FileService) UpdateFile(subdom string, fileGUID string, templateGUID string) ([]*templates.TransformationUpdateParams, error) { var newTrans []*templates.TransformationUpdateParams fileObj, err := fs.GetFile(fileGUID) ts := templates.NewTemplateService(database.GetDatabase()) templateObj, err := ts.GetTemplate(templateGUID) switch fileObj.Extension { case "csv": { tempFile, _ := ioutil.TempFile("storage", "temp") fileName, _ := fileObj.DownloadFromUrl("storage") //Open file and apply transformations finalCSVData := make([][]string, len(templateObj.Fields)+1) csv := csvElement.NewCSVService(fileName, true) for i, field := range templateObj.Fields { col := csv.GetColumn(field.Field) result, allNewDS := transformations.ApplyTransformations(col, field.Transformations, templateGUID) if len(allNewDS) > 0 { for _, newDS := range allNewDS { //create and store new Datasource var settings []datasources.Setting for _, aMap := range newDS { newSetting := datasources.Setting{ Label: aMap.Old, Value: aMap.New, } settings = append(settings, newSetting) } ds := datasources.NewDatasourceService(database.GetDatabase()) dsGUID := ds.CreateDatasource(subdom, settings) /* Can't update template; other datasets might use it. Instead, save the data in the queue //update Template ts := templates.NewTemplateService(database.GetDatabase()) ts.AddTransformation(templateGUID, field.Field, "map", dsGUID) */ newTrans = append(newTrans, templates.NewTransformationUpdateParams(templateGUID, field.Field, "map", dsGUID)) } } finalCSVData[i] = make([]string, len(result)) for j, finalResult := range result { finalCSVData[i][j] = finalResult } } finalCSVData[len(templateObj.Fields)] = csv.GetColumn("ID") //Create local temporary file with transformed data newcsv := csvElement.NewCSVService(tempFile.Name(), false) newcsv.Headers = csv.Headers finalCSVData = transpose(finalCSVData) newcsv.Write(finalCSVData, true) //Send To AWS S3 fs.SendToS3(fileObj.Path, tempFile.Name()) //Delete temporary files os.Remove(fileName) os.Remove(tempFile.Name()) } } return newTrans, err }
//apply transforms an array of data func apply(data []string, transformation templates.Transformation) ([]string, []Mapping) { p := transformation.Parameters var wg sync.WaitGroup var mapping []Mapping switch transformation.Operation { case "toDate": if len(p) != 2 { log.Fatal("toDate transformation requires 2 parameters: current format, new format") } oldFormat := p[0] newFormat := p[1] for i, x := range data { y, err := time.Parse(oldFormat, x) if err != nil { log.Print("Error parsing date with index ", i, " with format: ", oldFormat) } else { data[i] = y.Format(newFormat) } } case "setNull": for i, x := range data { if arrayPos(x, p) != -1 { data[i] = "" } } case "standardize": if len(p) != 1 { log.Fatal("standardize transformation requires 1 parameter: type (min-max|z-score)") } stype := p[0] switch stype { case "min-max": newData := strArrToFloatArr(data) min, err := stats.Min(newData) if err != nil { log.Fatal("Error finding minimum of data: ", err) } max, err := stats.Max(newData) if err != nil { log.Fatal("Error finding maximum of data: ", err) } srange := max - min for i, x := range newData { data[i] = floatToString((x - min) / srange) } case "z-score": newData := strArrToFloatArr(data) mean, err := stats.Mean(newData) if err != nil { log.Fatal("Error finding mean of data: ", err) } sd, err := stats.StandardDeviation(newData) if err != nil { log.Fatal("Error finding standard deviation of data: ", err) } for i, x := range newData { data[i] = floatToString((x - mean) / sd) } case "decimal": newData := strArrToFloatArr(data) max, err := stats.Max(newData) if err != nil { log.Fatal("Error finding maximum of data: ", err) } min, err := stats.Min(newData) if err != nil { log.Fatal("Error finding minimum of data: ", err) } var maxAbs float64 if math.Abs(max) > math.Abs(min) { maxAbs = math.Abs(max) } else { maxAbs = math.Abs(min) } c := math.Ceil(math.Log10(maxAbs)) for i, x := range newData { data[i] = floatToString(x / math.Pow10(int(c))) } } case "binPercent": table := NewPivotTable(data) intP := strArrToIntArr(p) sort.Ints(intP) ps := NewPercentileService(*table, intP) mapping = ps.CreateMappings() ps.Bin(mapping, data) case "fuzzyMap": if len(p) != 3 { log.Fatal("fuzzyMap transformation requires 3 parameters: datasource GUID, match, put") } dsGUID := p[0] ds := datasources.NewDatasourceService(database.GetDatabase()) dsObj, err := ds.GetDatasource(dsGUID) if err != nil { log.Fatal("Error finding Datasource: ", err) } distinctValues := getDistinctValues(data) for i, datum := range distinctValues { wg.Add(1) go func(i int, datum string, dsObj datasources.Datasource) { result := fuzzyMap(datum, dsObj.Settings) fuzzyMapping := NewMapping(datum, result) mapping = append(mapping, *fuzzyMapping) defer wg.Done() }(i, datum, dsObj) } wg.Wait() data = applyMappings(mapping, data) } return data, mapping }