// WikidataJsonConverter converts XML pages into line delimited JSON func WikidataJsonConverter(in chan *wikitools.Page, out chan *string, filter *regexp.Regexp, wg *sync.WaitGroup) { defer wg.Done() var container interface{} for page := range in { canonicalTitle := wikitools.CanonicalizeTitle(page.Title) m := filter.MatchString(canonicalTitle) if !m && page.Redir.Title == "" { dec := json.NewDecoder(strings.NewReader(page.Text)) dec.UseNumber() if err := dec.Decode(&container); err == io.EOF { continue } else if err != nil { fmt.Errorf("%s\n", err) continue } parsed := wikitools.WikidataPage{Title: page.Title, CanonicalTitle: canonicalTitle, Content: container, Redir: page.Redir, } b, err := json.Marshal(parsed) if err != nil { fmt.Errorf("%s\n", err) continue } line := string(b) out <- &line } } }
// WikiJsonConverter converts XML pages into line delimited JSON func WikiJsonConverter(in chan *wikitools.Page, out chan *string, filter *regexp.Regexp, wg *sync.WaitGroup) { defer wg.Done() for page := range in { canonicalTitle := wikitools.CanonicalizeTitle(page.Title) m := filter.MatchString(canonicalTitle) if !m && page.Redir.Title == "" { b, err := json.Marshal(page) if err != nil { fmt.Fprintln(os.Stderr, err) continue } line := string(b) out <- &line } } }