func ProcessVisitsLine(dataQueue <-chan []string, facebookUniqueIds map[string]*synced.IntSetSynced,
	facebookPageViews *synced.MapViewsSynced, waitGroup *sync.WaitGroup) {
	for line := range dataQueue {
		visitLine := new(VisitsLine)
		visitLine.Id = line[0]
		visitLine.Date = line[1]
		decompressLine, err := file_processing.Decompress(line[2])

		if err != nil {
			fmt.Println("Failed decompression visit line")
		}

		if err := json.Unmarshal(decompressLine, &visitLine.Data); err != nil {
			fmt.Printf("Failed to unmarshal data")
		}

		if visitLine.Id == "facebook.com" {
			for userId, pageViews := range visitLine.Data {
				parsedId, _ := strconv.ParseInt(userId, 10, 64)
				if _, ok := facebookUniqueIds[visitLine.Date]; !ok {
					facebookUniqueIds[visitLine.Date] = new(synced.IntSetSynced)
					facebookUniqueIds[visitLine.Date].Set = make(map[int64]bool)
				}

				facebookUniqueIds[visitLine.Date].AddToSet(parsedId)
				for _, pageView := range pageViews {
					if _, ok := facebookPageViews.Map[visitLine.Date]; !ok {
						facebookPageViews.Map[visitLine.Date] = 0
					}
					facebookPageViews.AddToViews(visitLine.Date, pageView[1])
				}
			}
		}
	}
	defer waitGroup.Done()
}
Exemple #2
0
func main() {
	visitsFile := flag.String("visits_file", "data/domain_visits_cache", "string")
	spreadsFile := flag.String("spreads_file", "data/domain_spread_cache", "string")
	numberOfProcess := flag.Int("process_num", 2, "int")
	flag.Parse()

	runtime.GOMAXPROCS(*numberOfProcess)

	dir, err := filepath.Abs(filepath.Dir(os.Args[0]))
	if err != nil {
		log.Fatal(err)
	}

	file, err := os.Open(dir + "/" + *visitsFile)

	if err != nil {
		log.Fatal("File not found ", dir+"/"+*visitsFile)
	}

	defer file.Close()

	/*

		SOLUTION 1

	*/

	dataQueue := make(chan []string)
	facebookUniqueIds := make(map[string]*synced.IntSetSynced)
	facebookPageViews := new(synced.MapViewsSynced)
	facebookPageViews.Map = make(map[string]int64)

	waitGroup.Add(*numberOfProcess + 1)

	go file_processing.ReadFileSync(file, dataQueue, &waitGroup)

	/*

		Creating map[DATE]uniqueIds for Facebook, created whole unique ids map instead of just counts
		so it can be reused in second solution

		Creating facebookPageViews map with number of pageViews per day so we can calculate standard deviaton
		of page views per day

	*/

	for processId := 0; processId < *numberOfProcess; processId++ {
		go wakoopa.ProcessVisitsLine(dataQueue, facebookUniqueIds, facebookPageViews, &waitGroup)
	}

	waitGroup.Wait()

	uniqueIdsPerDay := make(map[string]int64)
	for date, uniqueIds := range facebookUniqueIds {
		uniqueIdsPerDay[date] = uniqueIds.Len()
		fmt.Printf("Date: %s, UniqueIds: %d \n", date, uniqueIdsPerDay[date])
	}

	/*

		SOLUTION 2

	*/

	waitGroup.Add(*numberOfProcess + 1)

	file, err = os.Open(dir + "/" + *spreadsFile)

	if err != nil {
		log.Fatal("File not found ", dir+"/"+*spreadsFile)
	}

	defer file.Close()

	dataQueue = make(chan []string)
	averageSecondsPerDay := make(map[string]float64)

	go file_processing.ReadFileSync(file, dataQueue, &waitGroup)

	/*

		Create map of average seconds spend on google.* between 20:00 and 23:00 and also visited facebook
		(reused facebook visits from first soltion)

	*/

	for processId := 0; processId < *numberOfProcess; processId++ {
		go wakoopa.ProcessSpreadsLine(dataQueue, facebookUniqueIds, averageSecondsPerDay, &waitGroup)
	}

	waitGroup.Wait()

	for date, seconds := range averageSecondsPerDay {
		fmt.Printf("Date: %s, AverageSecondsSpendOnGoogle: %f \n", date, seconds)
	}

	meanFacebookPageViews := statistics.GetMean(facebookPageViews.Map)
	std := statistics.GetStandardDeviation(facebookPageViews.Map, meanFacebookPageViews)
	fmt.Printf("Standard deviaton of pageViews on Facebook per day: %f \n", std)
}