Beispiel #1
0
func Scan(
	db *bolt.DB, done chan bool, locales []string, minPopulation int,
	countriesFile string, citiesFile string, alternateNamesFile string,
) {
	startTime := time.Now()

	ds.CreateCountriesBucket(db)
	ds.CreateCitiesBucket(db)
	ds.CreateCityNamesBucket(db)
	ds.CreateStatisticsBucket(db)

	var citiesCount int
	var cityNamesCount int

	fmt.Println("[PARSER] Started countries parsing")
	countriesCount, err := scanCountries(db, countriesFile)
	if err == nil {
		fmt.Println("[PARSER] Started cities parsing")
		citiesCount, err = scanCities(db, citiesFile, minPopulation)
		if err == nil {
			fmt.Println("[PARSER] Started alternate names parsing")
			cityNamesCount, err = scanAlternateNames(db, alternateNamesFile, locales)
		}
	}

	if err != nil {
		panic(fmt.Sprintf("[PARSER] Error: %v", err))
	} else {
		ds.Statistics{
			CountriesCount: countriesCount,
			CitiesCount:    citiesCount,
			CityNamesCount: citiesCount + cityNamesCount,
		}.Save(db)

		fmt.Printf("[PARSER] Added %d countries\n", countriesCount)
		fmt.Printf("[PARSER] Added %d cities\n", citiesCount)
		fmt.Printf("[PARSER] Added %d city names\n", citiesCount+cityNamesCount)
		fmt.Printf("[PARSER] Parsing done (in %s)\n", time.Since(startTime))
		done <- true
	}
}
func TestScanAlternateNames(t *testing.T) {
	Convey("Test scan alternate names", t, func() {
		db := h.CreateDB(t)
		ds.CreateCitiesBucket(db)
		ds.CreateCityNamesBucket(db)
		ds.CreateCountriesBucket(db)

		h.PutToBucket(t, db, ds.CitiesBucketName, "1", "Montreal\t\t\t\t\t")
		h.PutToBucket(t, db, ds.CitiesBucketName, "2", "Moscow\t\t\t\t\t")
		h.PutToBucket(t, db, ds.CountriesBucketName, "3", "DE\tGermany\ten|Germany")

		locales := []string{"de", "ru"}

		Convey("When alternate names file exists", func() {
			filename := h.CreateTempfile(
				t,
				"10\t1\tfr\tMontréal\t\t\t\t\n11\t2\tde\tMoskau\t\t\t\t\n12\t2\tru\tМосква\t\t\t\t13\t9\tde\tMünchen\t\t\t\t\n"+
					"14\t3\tde\tDeutschland\t\t\t\t\n15\t3\ten\tWest Germany\t\t\t\t\n16\t3\tit\tGermania\t\t\t\t",
			)

			count, err := scanAlternateNames(db, filename, locales)
			country, _ := ds.FindCountry(db, "3")

			Convey("Returns number of scanned records", func() {
				So(count, ShouldEqual, 2)
			})

			Convey("When the locale is supported", func() {
				Convey("Stores the record if the city exists", func() {
					actual := h.ReadFromBucket(t, db, ds.CityNamesBucketName, "moskau")
					So(actual, ShouldEqual, "Moskau\t2\tde\t0")
				})

				Convey("Doesn't store the record if the city doesn't exist", func() {
					actual := h.ReadFromBucket(t, db, ds.CityNamesBucketName, "münchen")
					So(actual, ShouldEqual, "")
				})

				Convey("Adds translations for countries", func() {
					So(country.Translations["de"], ShouldEqual, "Deutschland")
				})

				Convey("Doesn't override en names for countries", func() {
					So(country.Translations["en"], ShouldEqual, "Germany")
				})
			})

			Convey("When the locale is not supported", func() {
				Convey("Doesn't store the record", func() {
					actual := h.ReadFromBucket(t, db, ds.CityNamesBucketName, "montréal")
					So(actual, ShouldEqual, "")
				})

				Convey("Doesn't add translations for countries", func() {
					So(country.Translations["it"], ShouldEqual, "")
				})
			})

			Convey("Returns no error", func() {
				So(err, ShouldBeNil)
			})
		})

		Convey("When alternate names file does not exist", func() {
			count, err := scanAlternateNames(db, "fake.txt", locales)

			Convey("Returns a zero number of scanned records", func() {
				So(count, ShouldEqual, 0)
			})

			Convey("Returns an error", func() {
				So(err, ShouldNotBeNil)
			})
		})
	})
}
func TestScanCities(t *testing.T) {
	Convey("Test scan cities", t, func() {
		db := h.CreateDB(t)
		ds.CreateCitiesBucket(db)
		ds.CreateCityNamesBucket(db)

		Convey("When cities files exists", func() {
			filename := h.CreateTempfile(
				t,
				"890516\tGwanda\tGwanda\tJawunda\t-20.93333\t29\tP\tPPLA\tZW\t\t07\t\t\t\t14450\t\t982\tAfrica/Harare\t2009-06-30\n"+
					"890983\tGokwe\tGokwe\tGokwe\t-18.20476\t28.9349\tP\tPPL\tZW\t\t02\t\t\t\t18942\t\t1237\tAfrica/Harare\t2012-05-05\n"+
					"890984\tSmall city\tGokwe\tJawunda\t-18.20576\t29.9349\tP\tPPL\tZW\t\t02\t\t\t\t1942\t\t1237\tAfrica/Harare\t2012-05-05",
			)

			count, err := scanCities(db, filename, 2000)

			Convey("Stores parsed cities to the db", func() {
				actual := h.ReadFromBucket(t, db, ds.CitiesBucketName, "890516")
				So(actual, ShouldEqual, "Gwanda\tZW\t14450\t-20.93333\t29\tAfrica/Harare")

				actual = h.ReadFromBucket(t, db, ds.CitiesBucketName, "890983")
				So(actual, ShouldEqual, "Gokwe\tZW\t18942\t-18.20476\t28.9349\tAfrica/Harare")
			})

			Convey("Stores parsed city names to the db", func() {
				actual := h.ReadFromBucket(t, db, ds.CityNamesBucketName, "gwanda")
				So(actual, ShouldEqual, "Gwanda\t890516\ten\t14450")

				actual = h.ReadFromBucket(t, db, ds.CityNamesBucketName, "gokwe")
				So(actual, ShouldEqual, "Gokwe\t890983\ten\t18942")
			})

			Convey("Returns number of scanned records", func() {
				So(count, ShouldEqual, 2)
			})

			Convey("Returns no error", func() {
				So(err, ShouldBeNil)
			})
		})

		Convey("When the file has invalid data", func() {
			filename := h.CreateTempfile(t, "crap\ncrap\ncrap")
			count, err := scanCities(db, filename, 2000)

			Convey("Returns a zero number of scanned records", func() {
				So(count, ShouldEqual, 0)
			})

			Convey("Returns an error", func() {
				So(err, ShouldNotBeNil)
			})
		})

		Convey("When cities file does not exist", func() {
			count, err := scanCities(db, "fake.txt", 2000)

			Convey("Returns a zero number of scanned records", func() {
				So(count, ShouldEqual, 0)
			})

			Convey("Returns an error", func() {
				So(err, ShouldNotBeNil)
			})
		})
	})
}