Exemple #1
0
// removeDateFromAuthor removes any dates from a string s that should only
// contain the authors of the article
func removeDateFromAuthor(s string) string {
	s = regexp.MustCompile(`\d+,\s`).ReplaceAllString(s, "")
	s = dateRegexp().ReplaceAllString(s, "")

	//s = strings.Replace(s, ",", " ", -1)
	return strtrans.Spaces(s)
}
Exemple #2
0
// sanatizeDate removes any authors from a string s that should only
// contain the date of the article
func sanatizeDate(s string) string {
	matches := dateRegexp().FindAllString(s, -1)
	date := strings.Join(matches, " ")

	if date == "" {
		date = "unknown date"
	}

	//log.Printf("match:  |%+q|\n", date)

	return strtrans.Spaces(date)
}
Exemple #3
0
// sanatizeTitle removes any host site affiliations after a pipe "|" from a
// string s that should only contain the title of the article
// E.g.: The civic drama of Socrates trial | Aeon Essays
//  -->  The civic drama of Socrates trial
func sanatizeTitle(s string) string {
	s = regexp.MustCompile(`\s\|\s.+`).ReplaceAllString(s, "")
	return strtrans.Spaces(s)
}