func (doc *Document) Authors() (authors []finc.Author) { for _, ra := range doc.Author { authors = append(authors, finc.Author{ FirstName: AuthorReplacer.Replace(span.UnescapeTrim(ra.Given)), LastName: AuthorReplacer.Replace(span.UnescapeTrim(ra.Family)), }) } return authors }
// CombinedTitle returns a longish title. func (doc *Document) CombinedTitle() string { if len(doc.Title) > 0 { if len(doc.Subtitle) > 0 { return span.UnescapeTrim(fmt.Sprintf("%s : %s", strings.Join(doc.Title, " "), strings.Join(doc.Subtitle, " "))) } return span.UnescapeTrim(strings.Join(doc.Title, " ")) } if len(doc.Subtitle) > 0 { return span.UnescapeTrim(strings.Join(doc.Subtitle, " ")) } return "" }
// ToIntermediateSchema converts a crossref document into IS. func (doc *Document) ToIntermediateSchema() (*finc.IntermediateSchema, error) { var err error output := finc.NewIntermediateSchema() output.Date, err = doc.Issued.Date() if err != nil { return output, err } output.RawDate = output.Date.Format("2006-01-02") if doc.URL == "" { return output, errNoURL } output.RecordID = doc.RecordID() if len(output.RecordID) > span.KeyLengthLimit { return output, span.Skip{Reason: fmt.Sprintf("ID_TOO_LONG %s", output.RecordID)} } if output.Date.After(Future) { return output, span.Skip{Reason: fmt.Sprintf("TOO_FUTURISTIC %s", output.RecordID)} } if doc.Type == "journal-issue" { return output, span.Skip{Reason: fmt.Sprintf("JOURNAL_ISSUE %s", output.RecordID)} } output.ArticleTitle = doc.CombinedTitle() if len(output.ArticleTitle) == 0 { return output, span.Skip{Reason: fmt.Sprintf("NO_ATITLE %s", output.RecordID)} } for _, title := range ArticleTitleBlocker { if output.ArticleTitle == title { return output, span.Skip{Reason: fmt.Sprintf("BLOCKED_ATITLE %s", output.RecordID)} } } for _, p := range ArticleTitleCleanerPatterns { output.ArticleTitle = p.ReplaceAllString(output.ArticleTitle, "") } // refs. #8428 if len(output.ArticleTitle) > 32000 { return output, span.Skip{Reason: fmt.Sprintf("TOO_LONG_TITLE %s", output.RecordID)} } output.DOI = doc.DOI output.Format = Formats.LookupDefault(doc.Type, DefaultFormat) output.Genre = Genres.LookupDefault(doc.Type, "unknown") output.ISSN = doc.ISSN output.Issue = doc.Issue output.Languages = []string{"eng"} output.Publishers = append(output.Publishers, doc.Publisher) output.RefType = RefTypes.LookupDefault(doc.Type, "GEN") output.SourceID = SourceID output.Subjects = doc.Subjects output.Type = doc.Type output.URL = append(output.URL, doc.URL) output.Volume = doc.Volume if len(doc.ContainerTitle) > 0 { output.JournalTitle = span.UnescapeTrim(doc.ContainerTitle[0]) } else { return output, span.Skip{Reason: fmt.Sprintf("NO_JTITLE %s", output.RecordID)} } if len(doc.Subtitle) > 0 { output.ArticleSubtitle = span.UnescapeTrim(doc.Subtitle[0]) } output.Authors = doc.Authors() // TODO(miku): do we need a config for these things? // Maybe a generic filter (in js?) that will gather exclusion rules? // if len(output.Authors) == 0 { // return output, span.Skip{Reason: fmt.Sprintf("NO_AUTHORS %s", output.RecordID)} // } pi := doc.PageInfo() output.StartPage = fmt.Sprintf("%d", pi.StartPage) output.EndPage = fmt.Sprintf("%d", pi.EndPage) output.Pages = pi.RawMessage output.PageCount = fmt.Sprintf("%d", pi.PageCount()) if doc.Publisher == "" { output.MegaCollection = fmt.Sprintf("X-U (CrossRef)") } else { output.MegaCollection = fmt.Sprintf("%s (CrossRef)", doc.Publisher) } return output, nil }
// ShortTitle returns the first main title only. func (doc *Document) ShortTitle() (s string) { if len(doc.Title) > 0 { s = span.UnescapeTrim(doc.Title[0]) } return }