Beispiel #1
0
// NameToNutrientMigration ...
func NameToNutrientMigration(s *State) {
	for _, recipe := range s.Recipes {
		recipe.Nutrients = *SetDietaryInfo(&recipe.Nutrients, recipe.Name)
		fmt.Println(models.RemoveMetaData(recipe.Name))
		recipe.Name = models.RemoveMetaData(recipe.Name)

		x := struct {
			Name      string                      `json:"name"`
			Nutrients models.NutrientInfoResponse `json:"nutrients"`
			ID        string                      `json:"objectId"`
			UUID      string                      `json:"uuid"`
		}{
			Name:      models.RemoveMetaData(recipe.Name),
			Nutrients: *SetDietaryInfo(&recipe.Nutrients, recipe.Name),
			ID:        recipe.ObjectID(),
			UUID:      lib.GetMD5Hash(models.RemoveMetaData(recipe.Name)),
		}
		xString, _ := json.Marshal(x)
		fmt.Println(string(xString))
		_, status, errs := s.DB.Put(x, "Recipe", recipe.ID)

		if errs != nil || status == 400 {
			log.Error(status)
			log.Error(errs)
			log.Error(errors.Errorf("Unable to post recipe with ID: %s", recipe.ID))
			break
		}
		// time.Sleep(1 * time.Second)
	}
}
Beispiel #2
0
func saveRecipes(s *State, v models.VenueInfo) {
	u := uniqueRecipes(v.Recipes)
	var duplicates, new int
	for _, recipe := range u {
		if s.Recipes[recipe.ID].DartmouthID != recipe.ID {
			c := models.CreatedBy{
				Kind:      "Pointer",
				ClassName: "_User",
				ObjectID:  "95xfYTL7GG",
			}
			returnObj, status, errs := s.DB.Post(models.ParseRecipe{
				Name:        models.RemoveMetaData(recipe.Name),
				Category:    recipe.Category,
				DartmouthID: recipe.ID,
				Rank:        recipe.Rank,
				UUID:        lib.GetMD5Hash(models.RemoveMetaData(recipe.Name)),
				Nutrients:   *SetDietaryInfo(&recipe.Nutrients, recipe.Name),
				Class:       "Recipe",
				CreatedBy:   c,
			})
			if errs != nil || status == 400 {
				log.Error(status)
				log.Error(errors.Errorf("Unable to post recipe with ID: %d", recipe.ID))
				continue
			}
			returnedRecipe := returnObj.(models.ParseRecipe)
			s.Recipes[recipe.ID] = returnedRecipe
			log.Debug("Created new recipe with objectId: ", returnedRecipe.ObjectID())
			new++
		} else {
			duplicates++
		}
	}

	log.WithFields(logrus.Fields{
		"Saved":     new,
		"Duplicate": duplicates,
	}).Info("Scraped Recipes")
}
Beispiel #3
0
func scrape(c *cli.Context) {
	log.Info("Initializing Scraper")
	p := parse.Client{
		BaseURL:       "https://api.parse.com/1",
		ApplicationID: "BAihtNGpVTx4IJsuuFV5f9LibJGnD1ZBOsnXk9qp",
		Key:           "zJYR2d3dFN3bXL6vUANZyoVLZ3bcTF7fpXTCrU7s",
	}

	s := State{
		DB:            &p,
		Recipes:       make(map[int]models.ParseRecipe),
		Nutrients:     make(map[int]bool),
		Offerings:     make(map[string]models.ParseOffering),
		Subscriptions: make(map[int][]string),
		Notifications: make(map[string]models.ParseNotification),
	}

	if c.Bool("nameNutrientMigration") {
		fmt.Println("Running Migration...")
		InitParse(&s)
		NameToNutrientMigration(&s)
		return
	}

	if c.Bool("mock") {
		log.Info("Mocked Scrape")
		InitParse(&s)
		file, err := os.Open("output_DDS.json")
		if err != nil {
			log.Fatal(err)
		}
		info := models.VenueInfo{}
		if err := json.NewDecoder(file).Decode(&info); err != nil {
			log.Fatal(err)
		}
		saveToParse(&s, info)
		log.Info("End Mocked Scrape")
		return
	}
	InitParse(&s)
	pwd, err := os.Getwd()
	if err != nil {
		log.Fatal("Could not get working directory!")
	}

	if c.Bool("write-files") {
		fmt.Println()
		fmt.Println("Output files will be placed in", pwd)
	}

	rDate := ""
	template := "01/02/06"
	if rDate = c.String("startDate"); rDate != "" {
		rDate = c.String("startDate")
	}

	date, err := time.Parse(template, rDate)
	if err != nil {
		log.Fatal("Unable to parse date make sure it looks like MM/dd/YY")
	}

	dateArray := []time.Time{}
	for i := 0; i < 7; i++ {
		dateToAdd := date.AddDate(0, 0, i)
		dateArray = append(dateArray, dateToAdd)
	}
	shouldPost := c.Bool("save")
	notificationsToCreate := []models.Notification{}
	for _, date := range dateArray {
		log.WithFields(logrus.Fields{
			"date": date.Format(template),
		}).Info("Start Scrape")

		// We want to get all Available SIDS
		sids, err := lib.AvailableSIDS()
		if err != nil {
			log.Fatal(err)
		}
		log.WithFields(logrus.Fields{
			"count": len(sids),
		}).Info("SIDS")

		// How many nutrition routines we want to make at a time
		nutritionRoutines := 50

		for key, value := range sids {
			throttleRequests := make(chan bool, nutritionRoutines)
			defer close(throttleRequests)
			log.WithFields(logrus.Fields{
				"venue": key,
			}).Info("Venue Scrape")
			info := models.VenueInfo{
				Date: date,
			}
			sid, err := lib.SID(key)
			if err != nil {
				log.Error(err)
				continue
			}

			info.Venue = value
			info.Key = key
			info.SID = sid

			info.Menus, err = lib.MenuList(sid)
			log.WithFields(logrus.Fields{
				"count": len(info.Menus),
			}).Info("Got Menus")
			if err != nil {
				log.Error(err)
				continue
			}

			info.Meals, err = lib.MealList(sid)
			if err != nil {
				log.Error(err)
			}
			log.WithFields(logrus.Fields{
				"count": len(info.Meals),
			}).Info("Got Meals")

			for _, meal := range info.Meals {
				menuMeal := models.MenuMeal{
					Meal:  meal,
					Menus: models.MenuInfoSlice{},
				}
				for _, menu := range info.Menus {
					newRecipes, err := lib.
						RecipesMenuMealDate(sid, menu.ID, meal.ID, date)
					if err != nil {
						log.Error(err)
						continue
					}
					for _, recipe := range newRecipes {
						if len(s.Subscriptions[recipe.ID]) > 0 {
							notificationsToCreate = append(notificationsToCreate, models.Notification{
								RecipeID: recipe.ID,
								Name:     models.RemoveMetaData(recipe.Name),
								Day:      date.Day(),
								Month:    int(date.Month()),
								Year:     date.Year(),
								OnDate:   date,
								MenuName: menu.Name,
								MealName: meal.Name,
								Venue:    info.Key,
							})
						}
					}
					// We need to scrape the recipes so that we can create notifications
					// but if the offering exists then we can just skip everything else
					if offeringExists(&s, info.Key, menu.Name, meal.Name, date) {
						log.WithFields(logrus.Fields{
							"meal":  meal.ID,
							"menu":  menu.ID,
							"venue": info.Key,
							"date":  date.Format(template),
						}).Info("Offering Exists")
						// newRecipes, err := lib.RecipesMenuMealDate(sid, menu.ID, meal.ID, date)
						continue
					}
					if len(newRecipes) > 0 {
						menuMeal.Menus = append(menuMeal.Menus, menu)
					}
					info.Recipes = append(info.Recipes, newRecipes...)
				}
				info.MealsList = append(info.MealsList, menuMeal)
			}

			// This section is the part that benefits the most from concurrency
			// the top parts finish in about 5 seconds but this will take up to
			// 15 minutes if done one by one.
			log.WithFields(logrus.Fields{
				"count": len(info.Recipes),
			}).Info("Start Recipe Scrape")
			for index := range info.Recipes {
				// Start a new goroutine for each nutrition request
				go func(key string, index int, info *models.VenueInfo) {
					// Read from the semaphore after we are done to free up a space for
					// the next connection.
					defer func() { <-throttleRequests }()

					// GetNutrients returns a pointer but we don't really care about it
					// simply ignore it. We pass &info.Recipes[index] so that the actual
					// pointer in the info object will be updated, otherwise a copy
					// will be worked on and we won't see the result
					_, err := lib.GetNutrients(info.SID, &info.Recipes[index])
					if err != nil {
						log.Error(err)
					}

				}(key, index, &info)
				/// Add our request to the list of running requests.
				throttleRequests <- true
			}

			// We want to fill them up by default..
			for i := 0; i < cap(throttleRequests); i++ {
				throttleRequests <- true
			}

			log.WithFields(logrus.Fields{
				"count": len(info.Recipes),
			}).Info("Finish Recipe Scrape")
			if shouldPost {
				saveToParse(&s, info)
			}
			log.WithFields(logrus.Fields{
				"venue": info.Key,
			}).Info("Finish Venue Scrape")
			// Write a file to the directory it is run under with the output
			if c.Bool("write-files") {
				fileName := fmt.Sprintf("output_%s.json", info.Key)
				filePath := path.Join(pwd, fileName)
				b, err := json.MarshalIndent(info, "", "  ")
				if err != nil {
					fmt.Println("error:", err)
				}
				fmt.Println("Wrote to:", fileName)
				err = ioutil.WriteFile(filePath, b, 0644)
				if err != nil {
					log.Println(err)
					continue
				}
			}
		}
	}
	ns := createNotifications(&s, notificationsToCreate)
	saveNotifications(&s, ns)
	removeOldNotifications(&s)
}