// most of the code in here is because NAL's QC sucks... // sometimes annotations end with a '.'... // sometimes annotations are duplicated... func edgesFromRecord(r *gomarc.Reader) (ret []string) { agricolaId, valid := r.GetField(primaryId[:3], primaryId[3:]) if !valid { return nil } nodupes := make(map[string]bool) for _, sub := range secondaryIds { flds, exists := r.GetFields(sub[:3], sub[3:]) if !exists { continue } for _, fld := range flds { if strings.HasSuffix(fld, ".") { if strings.HasSuffix(fld, "etc.") { // skip } else if fld[len(fld)-2] == ')' || fld[len(fld)-3] != '.' { // acronym? "U.S." fld = fld[:len(fld)-1] } } if !nodupes[fld] { ret = append(ret, agricolaId+"\t"+fld) nodupes[fld] = true } } } return ret }
func xmlRecord(r *gomarc.Reader) string { xmldata := make(map[string][]string) for name, tagsf := range xmlFieldMap { tag := tagsf[:3] sf := tagsf[3:] flds, hadFlds := r.GetFields(tag, sf) if hadFlds { switch name { case "language": flds = []string{flds[0][35:38]} case "published": flds = []string{strings.Join(flds, " ")} } xmldata[name] = append(xmldata[name], flds...) } } xmlout := make(map[string]string) for tagname, contents := range xmldata { maintag := "" indent := " " if strings.Contains(tagname, "/") { parts := strings.Split(tagname, "/") maintag = parts[0] tagname = parts[1] indent = " " } nodupes := make(map[string]bool) for _, c := range contents { if !nodupes[c] { xmlout[maintag] += fmt.Sprintf("%s<%s>%s</%s>\n", indent, tagname, c, tagname) nodupes[c] = true } } } xmlstring := xmlout[""] for othertag, inner := range xmlout { if othertag == "" { continue } xmlstring += fmt.Sprintf(" <%s>\n%s </%s>\n", othertag, inner, othertag) } return "<document>\n" + xmlstring + "</document>" }