// removeDateFromAuthor removes any dates from a string s that should only // contain the authors of the article func removeDateFromAuthor(s string) string { s = regexp.MustCompile(`\d+,\s`).ReplaceAllString(s, "") s = dateRegexp().ReplaceAllString(s, "") //s = strings.Replace(s, ",", " ", -1) return strtrans.Spaces(s) }
// sanatizeDate removes any authors from a string s that should only // contain the date of the article func sanatizeDate(s string) string { matches := dateRegexp().FindAllString(s, -1) date := strings.Join(matches, " ") if date == "" { date = "unknown date" } //log.Printf("match: |%+q|\n", date) return strtrans.Spaces(date) }
// sanatizeTitle removes any host site affiliations after a pipe "|" from a // string s that should only contain the title of the article // E.g.: The civic drama of Socrates trial | Aeon Essays // --> The civic drama of Socrates trial func sanatizeTitle(s string) string { s = regexp.MustCompile(`\s\|\s.+`).ReplaceAllString(s, "") return strtrans.Spaces(s) }