func NormalizeSlice(records [][]float64) [][]float64 { data := make([][]float64, len(records)) for i, record := range records { data[i] = make([]float64, len(record)) for j, entry := range record { data[i][j] = parse.Normalize(entry) } } return data }
func main() { var rphashObject *reader.StreamObject var rphashStream *stream.Stream var centroids []types.Centroid t1 := time.Now() // Split the data into shards and send them to the Agents to work on. f.Source(func(out chan Vector) { records, err := utils.ReadLines(dataFilePath) if err != nil { panic(err) } // Convert the record to standard floating points. for i, record := range records { if i == 0 { // Create a new RPHash stream. rphashObject = reader.NewStreamObject(len(record), numClusters) rphashStream = stream.NewStream(rphashObject) rphashStream.RunCount = 1 } data := make([]float64, len(record)) for j, entry := range record { f, err := strconv.ParseFloat(entry, 64) f = parse.Normalize(f) if err != nil { panic(err) } data[j] = f } out <- Vector{Data: data} } }, numShards).Map(func(vec Vector) { centroids = append(centroids, rphashStream.AddVectorOnlineStep(vec.Data)) }).Run() for _, cent := range centroids { rphashStream.CentroidCounter.Add(cent) } normalizedResults := rphashStream.GetCentroids() t2 := time.Now() log.Println("Time: ", t2.Sub(t1)) denormalizedResults := make([][]float64, len(normalizedResults)) for i, result := range normalizedResults { row := make([]float64, len(result)) for j, dimension := range result { row[j] = parse.DeNormalize(dimension) } denormalizedResults[i] = row } labels := make([]string, len(denormalizedResults)) xPlotValues := make([][]float64, len(denormalizedResults)) yPlotValues := make([][]float64, len(denormalizedResults)) for i, result := range denormalizedResults { xPlotValues[i] = make([]float64, len(result)) yPlotValues[i] = make([]float64, len(result)) for j, val := range result { xPlotValues[i][j] = float64(j) yPlotValues[i][j] = val } Paint(result, i) sI := strconv.FormatInt(int64(i), 16) labels[i] = "Digit " + sI + " (by Classifier Centroid)" } GeneratePlots(xPlotValues, yPlotValues, "High Dimension Handwritting Digits 0-9 Classification", "Dimension", "Strength of Visual Pixel Recognition (0-1000)", "plots/centroid-dimensions-", labels) }