const ( // Internal bookkeeping. SourceID = "49" ) var ( errNoDate = errors.New("date is missing") errNoURL = errors.New("URL is missing") ) var ( DefaultFormat = "ElectronicArticle" // Load assets Formats = assetutil.MustLoadStringMap("assets/crossref/formats.json") Genres = assetutil.MustLoadStringMap("assets/crossref/genres.json") RefTypes = assetutil.MustLoadStringMap("assets/crossref/reftypes.json") // AuthorReplacer is a special cleaner for author names. AuthorReplacer = strings.NewReplacer("#", "", "--", "", "*", "", "|", "", "&NA;", "", "\u0026NA;", "", "\u0026", "") // ArticleTitleBlocker will trigger skips, if article title matches exactly. ArticleTitleBlocker = []string{"Titelei", "Front Matter", "Advertisement", "Advertisement:"} // ArticleTitleCleanerPatterns removes matching parts. ArticleTitleCleanerPatterns = []*regexp.Regexp{ // refs. #5827 regexp.MustCompile(`[?]{6,}`), }
// You should have received a copy of the GNU General Public License // along with Foobar. If not, see <http://www.gnu.org/licenses/>. // // @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+> // package exporter import ( "strings" "github.com/miku/span/assetutil" ) var ( SubjectMapping = assetutil.MustLoadStringSliceMap("assets/finc/subjects.json") LanguageMap = assetutil.MustLoadStringMap("assets/finc/iso-639-3-language.json") AIAccessFacet = "Electronic Resources" FormatDe105 = assetutil.MustLoadStringMap("assets/finc/formats/de105.json") FormatDe14 = assetutil.MustLoadStringMap("assets/finc/formats/de14.json") FormatDe15 = assetutil.MustLoadStringMap("assets/finc/formats/de15.json") FormatDe520 = assetutil.MustLoadStringMap("assets/finc/formats/de520.json") FormatDe540 = assetutil.MustLoadStringMap("assets/finc/formats/de540.json") FormatDeCh1 = assetutil.MustLoadStringMap("assets/finc/formats/dech1.json") FormatDed117 = assetutil.MustLoadStringMap("assets/finc/formats/ded117.json") FormatDeGla1 = assetutil.MustLoadStringMap("assets/finc/formats/degla1.json") FormatDel152 = assetutil.MustLoadStringMap("assets/finc/formats/del152.json") FormatDel189 = assetutil.MustLoadStringMap("assets/finc/formats/del189.json") FormatDeZi4 = assetutil.MustLoadStringMap("assets/finc/formats/dezi4.json") FormatDeZwi2 = assetutil.MustLoadStringMap("assets/finc/formats/dezwi2.json") FormatNrw = assetutil.MustLoadStringMap("assets/finc/formats/nrw.json")
// Internal bookkeeping. SourceID = "28" // BatchSize for grouped channel transport. BatchSize = 25000 // Collection name Collection = "DOAJ Directory of Open Access Journals" // Format for all records Format = "ElectronicArticle" Genre = "article" ) var errDateMissing = errors.New("date is missing") var ( LCCPatterns = assetutil.MustLoadRegexpMap("assets/finc/lcc.json") LanguageMap = assetutil.MustLoadStringMap("assets/doaj/language-iso-639-3.json") ) type Response struct { ID string `json:"_id"` Index string `json:"_index"` Source Document `json:"_source"` Type string `json:"_type"` } type Document struct { BibJson BibJson `json:"bibjson"` Created string `json:"created_date"` ID string `json:"id"` Index Index `json:"index"` Updated string `json:"last_updated"`
Title string `xml:"Title"` Year string `xml:"Year"` RawDate string `xml:"Date"` Volume string `xml:"Volume"` Issue string `xml:"Issue"` RawAuthors []string `xml:"Authors>Author"` Language string `xml:"Language"` Abstract string `xml:"Abstract"` Group string `xml:"x-group"` Descriptors string `xml:"Descriptors>Descriptor"` Text string `xml:"Text"` } var ( rawDateReplacer = strings.NewReplacer(`"`, "", "\n", "", "\t", "") collections = assetutil.MustLoadStringMap("assets/genios/collections.json") // Restricts the possible languages for detection. acceptedLanguages = container.NewStringSet("deu", "eng") ) type Genios struct{} // NewBatch wraps up a new batch for channel com. func NewBatch(docs []*Document) span.Batcher { batch := span.Batcher{ Apply: func(s interface{}) (span.Importer, error) { return s.(span.Importer), nil }, Items: make([]interface{}, len(docs))} for i, doc := range docs { batch.Items[i] = doc }