Exemple #1
0
func Scan(
	db *bolt.DB, done chan bool, locales []string, minPopulation int,
	countriesFile string, citiesFile string, alternateNamesFile string,
) {
	startTime := time.Now()

	ds.CreateCountriesBucket(db)
	ds.CreateCitiesBucket(db)
	ds.CreateCityNamesBucket(db)
	ds.CreateStatisticsBucket(db)

	var citiesCount int
	var cityNamesCount int

	fmt.Println("[PARSER] Started countries parsing")
	countriesCount, err := scanCountries(db, countriesFile)
	if err == nil {
		fmt.Println("[PARSER] Started cities parsing")
		citiesCount, err = scanCities(db, citiesFile, minPopulation)
		if err == nil {
			fmt.Println("[PARSER] Started alternate names parsing")
			cityNamesCount, err = scanAlternateNames(db, alternateNamesFile, locales)
		}
	}

	if err != nil {
		panic(fmt.Sprintf("[PARSER] Error: %v", err))
	} else {
		ds.Statistics{
			CountriesCount: countriesCount,
			CitiesCount:    citiesCount,
			CityNamesCount: citiesCount + cityNamesCount,
		}.Save(db)

		fmt.Printf("[PARSER] Added %d countries\n", countriesCount)
		fmt.Printf("[PARSER] Added %d cities\n", citiesCount)
		fmt.Printf("[PARSER] Added %d city names\n", citiesCount+cityNamesCount)
		fmt.Printf("[PARSER] Parsing done (in %s)\n", time.Since(startTime))
		done <- true
	}
}
func TestScanAlternateNames(t *testing.T) {
	Convey("Test scan alternate names", t, func() {
		db := h.CreateDB(t)
		ds.CreateCitiesBucket(db)
		ds.CreateCityNamesBucket(db)
		ds.CreateCountriesBucket(db)

		h.PutToBucket(t, db, ds.CitiesBucketName, "1", "Montreal\t\t\t\t\t")
		h.PutToBucket(t, db, ds.CitiesBucketName, "2", "Moscow\t\t\t\t\t")
		h.PutToBucket(t, db, ds.CountriesBucketName, "3", "DE\tGermany\ten|Germany")

		locales := []string{"de", "ru"}

		Convey("When alternate names file exists", func() {
			filename := h.CreateTempfile(
				t,
				"10\t1\tfr\tMontréal\t\t\t\t\n11\t2\tde\tMoskau\t\t\t\t\n12\t2\tru\tМосква\t\t\t\t13\t9\tde\tMünchen\t\t\t\t\n"+
					"14\t3\tde\tDeutschland\t\t\t\t\n15\t3\ten\tWest Germany\t\t\t\t\n16\t3\tit\tGermania\t\t\t\t",
			)

			count, err := scanAlternateNames(db, filename, locales)
			country, _ := ds.FindCountry(db, "3")

			Convey("Returns number of scanned records", func() {
				So(count, ShouldEqual, 2)
			})

			Convey("When the locale is supported", func() {
				Convey("Stores the record if the city exists", func() {
					actual := h.ReadFromBucket(t, db, ds.CityNamesBucketName, "moskau")
					So(actual, ShouldEqual, "Moskau\t2\tde\t0")
				})

				Convey("Doesn't store the record if the city doesn't exist", func() {
					actual := h.ReadFromBucket(t, db, ds.CityNamesBucketName, "münchen")
					So(actual, ShouldEqual, "")
				})

				Convey("Adds translations for countries", func() {
					So(country.Translations["de"], ShouldEqual, "Deutschland")
				})

				Convey("Doesn't override en names for countries", func() {
					So(country.Translations["en"], ShouldEqual, "Germany")
				})
			})

			Convey("When the locale is not supported", func() {
				Convey("Doesn't store the record", func() {
					actual := h.ReadFromBucket(t, db, ds.CityNamesBucketName, "montréal")
					So(actual, ShouldEqual, "")
				})

				Convey("Doesn't add translations for countries", func() {
					So(country.Translations["it"], ShouldEqual, "")
				})
			})

			Convey("Returns no error", func() {
				So(err, ShouldBeNil)
			})
		})

		Convey("When alternate names file does not exist", func() {
			count, err := scanAlternateNames(db, "fake.txt", locales)

			Convey("Returns a zero number of scanned records", func() {
				So(count, ShouldEqual, 0)
			})

			Convey("Returns an error", func() {
				So(err, ShouldNotBeNil)
			})
		})
	})
}
func TestUtils(t *testing.T) {
	db := h.CreateDB(t)
	ds.CreateCityNamesBucket(db)
	ds.CreateCountriesBucket(db)

	Convey("Test prepare country bytes", t, func() {
		Convey("When the data is correct", func() {
			data := []string{
				"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
				"11", "12", "13", "14", "15", "16", "17", "18",
			}

			expected := []byte("0\t4\ten|4")
			actual, err := prepareCountryBytes(data)

			Convey("Joins the array in the right order with tabs", func() {
				So(actual, ShouldResemble, expected)
			})

			Convey("Returns no error", func() {
				So(err, ShouldBeNil)
			})
		})

		Convey("When the data is incorrect", func() {
			data := []string{"yolo"}
			actual, err := prepareCountryBytes(data)

			Convey("Returns an empty bytes array", func() {
				var bytes []byte
				So(actual, ShouldResemble, bytes)
			})

			Convey("Returns an error", func() {
				So(err, ShouldNotBeNil)
			})
		})
	})

	Convey("Test add translations to country", t, func() {
		translations := []string{"de|Deutschland", "ru|Германия"}

		countryAttrs := []string{"DE", "Germany", "en|Germany"}
		countryString := strings.Join(countryAttrs, "\t")
		h.PutToBucket(t, db, ds.CountriesBucketName, "1", countryString)

		err := db.Batch(func(tx *bolt.Tx) error {
			b := tx.Bucket(ds.CountriesBucketName)
			return addTranslationsToCountry(b, 1, translations)
		})

		country, err := ds.FindCountry(db, "1")

		Convey("Does not modify country data", func() {
			So(country.Code, ShouldEqual, countryAttrs[0])
			So(country.Name, ShouldEqual, countryAttrs[1])
		})

		Convey("Keeps old translations", func() {
			So(country.Translations["en"], ShouldEqual, "Germany")
		})

		Convey("Adds new translations", func() {
			So(country.Translations["de"], ShouldEqual, "Deutschland")
			So(country.Translations["ru"], ShouldEqual, "Германия")
		})

		Convey("Returns no error", func() {
			So(err, ShouldBeNil)
		})
	})

	Convey("Test prepare city bytes", t, func() {
		Convey("When the data is correct", func() {
			data := []string{
				"0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10",
				"11", "12", "13", "14", "15", "16", "17", "18",
			}

			expected := []byte("1\t8\t14\t4\t5\t17")
			actual, err := prepareCityBytes(data)

			Convey("Joins the array in the right order with tabs", func() {
				So(actual, ShouldResemble, expected)
			})

			Convey("Returns no error", func() {
				So(err, ShouldBeNil)
			})
		})

		Convey("When the data is incorrect", func() {
			data := []string{"yolo"}
			actual, err := prepareCityBytes(data)

			Convey("Returns an empty bytes array", func() {
				var bytes []byte
				So(actual, ShouldResemble, bytes)
			})

			Convey("Returns an error", func() {
				So(err, ShouldNotBeNil)
			})
		})
	})

	Convey("Test add city to index", t, func() {
		Convey("If no record for the key exist yet", func() {
			err := db.Batch(func(tx *bolt.Tx) error {
				b := tx.Bucket(ds.CityNamesBucketName)
				return addCityToIndex(b, "1", "Berlin", "en", 2000000)
			})

			Convey("Puts the city name string to the bucket", func() {
				actual := h.ReadFromBucket(t, db, ds.CityNamesBucketName, "berlin")
				So(actual, ShouldEqual, "Berlin\t1\ten\t2000000")
			})

			Convey("Not return an error", func() {
				So(err, ShouldBeNil)
			})
		})

		Convey("If a record for the key exists", func() {
			existing := "Moscow\t1\ten\t12000000"
			h.PutToBucket(t, db, ds.CityNamesBucketName, "moscow", existing)

			err := db.Batch(func(tx *bolt.Tx) error {
				b := tx.Bucket(ds.CityNamesBucketName)
				return addCityToIndex(b, "2", "Moscow", "en", 20000)
			})

			Convey("Does not overwrites the existing entry", func() {
				actual := h.ReadFromBucket(t, db, ds.CityNamesBucketName, "moscow")
				So(actual, ShouldEqual, existing)
			})

			Convey("Adds city id as postfix for a new entry key", func() {
				actual := h.ReadFromBucket(t, db, ds.CityNamesBucketName, "moscow|2")
				So(actual, ShouldEqual, "Moscow\t2\ten\t20000")
			})

			Convey("Not return an error", func() {
				So(err, ShouldBeNil)
			})
		})
	})

	Convey("Test is supported locale", t, func() {
		locales := []string{"ru", "en", "de"}

		Convey("Returns true for supported locales", func() {
			for _, locale := range locales {
				So(isSupportedLocale(locale, locales), ShouldBeTrue)
			}
		})

		Convey("Returns false for unsupported locales", func() {
			So(isSupportedLocale("jp", locales), ShouldBeFalse)
		})
	})
}
func TestScanCities(t *testing.T) {
	Convey("Test scan cities", t, func() {
		db := h.CreateDB(t)
		ds.CreateCitiesBucket(db)
		ds.CreateCityNamesBucket(db)

		Convey("When cities files exists", func() {
			filename := h.CreateTempfile(
				t,
				"890516\tGwanda\tGwanda\tJawunda\t-20.93333\t29\tP\tPPLA\tZW\t\t07\t\t\t\t14450\t\t982\tAfrica/Harare\t2009-06-30\n"+
					"890983\tGokwe\tGokwe\tGokwe\t-18.20476\t28.9349\tP\tPPL\tZW\t\t02\t\t\t\t18942\t\t1237\tAfrica/Harare\t2012-05-05\n"+
					"890984\tSmall city\tGokwe\tJawunda\t-18.20576\t29.9349\tP\tPPL\tZW\t\t02\t\t\t\t1942\t\t1237\tAfrica/Harare\t2012-05-05",
			)

			count, err := scanCities(db, filename, 2000)

			Convey("Stores parsed cities to the db", func() {
				actual := h.ReadFromBucket(t, db, ds.CitiesBucketName, "890516")
				So(actual, ShouldEqual, "Gwanda\tZW\t14450\t-20.93333\t29\tAfrica/Harare")

				actual = h.ReadFromBucket(t, db, ds.CitiesBucketName, "890983")
				So(actual, ShouldEqual, "Gokwe\tZW\t18942\t-18.20476\t28.9349\tAfrica/Harare")
			})

			Convey("Stores parsed city names to the db", func() {
				actual := h.ReadFromBucket(t, db, ds.CityNamesBucketName, "gwanda")
				So(actual, ShouldEqual, "Gwanda\t890516\ten\t14450")

				actual = h.ReadFromBucket(t, db, ds.CityNamesBucketName, "gokwe")
				So(actual, ShouldEqual, "Gokwe\t890983\ten\t18942")
			})

			Convey("Returns number of scanned records", func() {
				So(count, ShouldEqual, 2)
			})

			Convey("Returns no error", func() {
				So(err, ShouldBeNil)
			})
		})

		Convey("When the file has invalid data", func() {
			filename := h.CreateTempfile(t, "crap\ncrap\ncrap")
			count, err := scanCities(db, filename, 2000)

			Convey("Returns a zero number of scanned records", func() {
				So(count, ShouldEqual, 0)
			})

			Convey("Returns an error", func() {
				So(err, ShouldNotBeNil)
			})
		})

		Convey("When cities file does not exist", func() {
			count, err := scanCities(db, "fake.txt", 2000)

			Convey("Returns a zero number of scanned records", func() {
				So(count, ShouldEqual, 0)
			})

			Convey("Returns an error", func() {
				So(err, ShouldNotBeNil)
			})
		})
	})
}