Пример #1
0
// WikidataJsonConverter converts XML pages into line delimited JSON
func WikidataJsonConverter(in chan *wikitools.Page, out chan *string, filter *regexp.Regexp, wg *sync.WaitGroup) {
	defer wg.Done()
	var container interface{}

	for page := range in {
		canonicalTitle := wikitools.CanonicalizeTitle(page.Title)
		m := filter.MatchString(canonicalTitle)
		if !m && page.Redir.Title == "" {
			dec := json.NewDecoder(strings.NewReader(page.Text))
			dec.UseNumber()

			if err := dec.Decode(&container); err == io.EOF {
				continue
			} else if err != nil {
				fmt.Errorf("%s\n", err)
				continue
			}

			parsed := wikitools.WikidataPage{Title: page.Title,
				CanonicalTitle: canonicalTitle,
				Content:        container,
				Redir:          page.Redir,
			}

			b, err := json.Marshal(parsed)
			if err != nil {
				fmt.Errorf("%s\n", err)
				continue
			}
			line := string(b)
			out <- &line
		}
	}
}
Пример #2
0
// WikiJsonConverter converts XML pages into line delimited JSON
func WikiJsonConverter(in chan *wikitools.Page, out chan *string, filter *regexp.Regexp, wg *sync.WaitGroup) {
	defer wg.Done()

	for page := range in {
		canonicalTitle := wikitools.CanonicalizeTitle(page.Title)
		m := filter.MatchString(canonicalTitle)
		if !m && page.Redir.Title == "" {
			b, err := json.Marshal(page)
			if err != nil {
				fmt.Fprintln(os.Stderr, err)
				continue
			}
			line := string(b)
			out <- &line
		}
	}
}