if err != nil { panic(err) } } getElapsedTime(&startTime) }, } var cmdExtractRecipiesFromUrl = &cobra.Command{ Use: "process-url", Short: "Extract recipes from the given URL", Run: func(cmd *cobra.Command, args []string) { startTime = time.Now() recipes := r.DownloadRecipesFromUrls(args).Recipes fmt.Printf("Found %d recipes.\n", len(recipes)) for i, recipe := range recipes { fmt.Printf("Recipe #%d: %s\n", i, recipe.String()) } if globalConfig.MongoUrl != "" { err := r.SaveRecipes(globalConfig.MongoUrl, recipes) if err != nil { panic(err) } } getElapsedTime(&startTime) },
for _, url := range unprocessedFilteredURLs { if !existingURLsTable[url] { processableURLs = append(processableURLs, url) } } fmt.Printf("Found existing recipes for %d of %d URLs. Will process %d URLs.\n", len(existingURLs), len(unprocessedFilteredURLs), len(processableURLs)) for _, url := range existingURLs { fmt.Printf("Existing: %s\n", url) } for _, url := range processableURLs { fmt.Printf("Will process: %s\n", url) } } result := r.DownloadRecipesFromUrls(processableURLs) pageStats := RecipeStats{} pageStats.RecipeCount = len(result.Recipes) pageStats.ArticlesWithoutRecipesCount = len(result.URLsWithoutRecipes) pageStats.articlesProcessedCount = len(result.URLs) pageStats.articlesIgnoredAsDuplicatesCount = len(filteredURLs) - len(unprocessedFilteredURLs) pageStats.articlesIgnoredAsExistingCount = len(unprocessedFilteredURLs) - len(processableURLs) pageStats.articlesIgnoredInWrongSectionCount = len(urls) - len(filteredURLs) pageStats.URLsWithoutRecipes = result.URLsWithoutRecipes pageStats.TotalURLsWithRecipesCount = (pageStats.articlesProcessedCount - pageStats.ArticlesWithoutRecipesCount) + pageStats.articlesIgnoredAsExistingCount overallStats.merge(pageStats) for _, url := range unprocessedFilteredURLs { processedURLsTable[url] = true