func main() { gob.Register(Centroid{}) gob.Register(itemset.Centroid{}) gob.Register(utils.Hash64Set{}) flag.Parse() t1 := time.Now() records := utils.ReadCSV("./dataset.csv") Object := reader.NewStreamObject(len(records[0]), numClusters) Stream := stream.NewStream(Object) outChannel := make(chan Centroid) ch := make(chan []float64) source := f.Channel(ch) f1 := source.Map(func(record []float64) Centroid { return Centroid{C: Stream.AddVectorOnlineStep(record)} }).AddOutput(outChannel) flow.Ready() var wg sync.WaitGroup goStart(&wg, func() { f1.Run() }) goStart(&wg, func() { for out := range outChannel { Stream.CentroidCounter.Add(out.C) } }) for _, record := range records { ch <- record } close(ch) wg.Wait() normalizedResults := Stream.GetCentroids() ts := time.Since(t1) file, err := os.OpenFile("./results.txt", os.O_WRONLY|os.O_CREATE, 0644) if err != nil { panic(err) } defer file.Close() for _, result := range normalizedResults { for _, dimension := range result { file.WriteString(fmt.Sprintf("%f ", parse.DeNormalize(dimension))) } file.WriteString("\n") } file.WriteString("Time: " + ts.String()) }
func main() { var rphashObject *reader.StreamObject var rphashStream *stream.Stream var centroids []types.Centroid t1 := time.Now() // Split the data into shards and send them to the Agents to work on. f.Source(func(out chan Vector) { records, err := utils.ReadLines(dataFilePath) if err != nil { panic(err) } // Convert the record to standard floating points. for i, record := range records { if i == 0 { // Create a new RPHash stream. rphashObject = reader.NewStreamObject(len(record), numClusters) rphashStream = stream.NewStream(rphashObject) rphashStream.RunCount = 1 } data := make([]float64, len(record)) for j, entry := range record { f, err := strconv.ParseFloat(entry, 64) f = parse.Normalize(f) if err != nil { panic(err) } data[j] = f } out <- Vector{Data: data} } }, numShards).Map(func(vec Vector) { centroids = append(centroids, rphashStream.AddVectorOnlineStep(vec.Data)) }).Run() for _, cent := range centroids { rphashStream.CentroidCounter.Add(cent) } normalizedResults := rphashStream.GetCentroids() t2 := time.Now() log.Println("Time: ", t2.Sub(t1)) denormalizedResults := make([][]float64, len(normalizedResults)) for i, result := range normalizedResults { row := make([]float64, len(result)) for j, dimension := range result { row[j] = parse.DeNormalize(dimension) } denormalizedResults[i] = row } labels := make([]string, len(denormalizedResults)) xPlotValues := make([][]float64, len(denormalizedResults)) yPlotValues := make([][]float64, len(denormalizedResults)) for i, result := range denormalizedResults { xPlotValues[i] = make([]float64, len(result)) yPlotValues[i] = make([]float64, len(result)) for j, val := range result { xPlotValues[i][j] = float64(j) yPlotValues[i][j] = val } Paint(result, i) sI := strconv.FormatInt(int64(i), 16) labels[i] = "Digit " + sI + " (by Classifier Centroid)" } GeneratePlots(xPlotValues, yPlotValues, "High Dimension Handwritting Digits 0-9 Classification", "Dimension", "Strength of Visual Pixel Recognition (0-1000)", "plots/centroid-dimensions-", labels) }