예제 #1
0
파일: document.go 프로젝트: ubleipzig/span
const (
	// Internal bookkeeping.
	SourceID = "49"
)

var (
	errNoDate = errors.New("date is missing")
	errNoURL  = errors.New("URL is missing")
)

var (
	DefaultFormat = "ElectronicArticle"

	// Load assets
	Formats  = assetutil.MustLoadStringMap("assets/crossref/formats.json")
	Genres   = assetutil.MustLoadStringMap("assets/crossref/genres.json")
	RefTypes = assetutil.MustLoadStringMap("assets/crossref/reftypes.json")

	// AuthorReplacer is a special cleaner for author names.
	AuthorReplacer = strings.NewReplacer("#", "", "--", "", "*", "", "|", "", "&NA;", "", "\u0026NA;", "", "\u0026", "")

	// ArticleTitleBlocker will trigger skips, if article title matches exactly.
	ArticleTitleBlocker = []string{"Titelei", "Front Matter", "Advertisement", "Advertisement:"}

	// ArticleTitleCleanerPatterns removes matching parts.
	ArticleTitleCleanerPatterns = []*regexp.Regexp{
		// refs. #5827
		regexp.MustCompile(`[?]{6,}`),
	}
예제 #2
0
파일: common.go 프로젝트: ubleipzig/span
// You should have received a copy of the GNU General Public License
// along with Foobar.  If not, see <http://www.gnu.org/licenses/>.
//
// @license GPL-3.0+ <http://spdx.org/licenses/GPL-3.0+>
//
package exporter

import (
	"strings"

	"github.com/miku/span/assetutil"
)

var (
	SubjectMapping = assetutil.MustLoadStringSliceMap("assets/finc/subjects.json")
	LanguageMap    = assetutil.MustLoadStringMap("assets/finc/iso-639-3-language.json")
	AIAccessFacet  = "Electronic Resources"

	FormatDe105  = assetutil.MustLoadStringMap("assets/finc/formats/de105.json")
	FormatDe14   = assetutil.MustLoadStringMap("assets/finc/formats/de14.json")
	FormatDe15   = assetutil.MustLoadStringMap("assets/finc/formats/de15.json")
	FormatDe520  = assetutil.MustLoadStringMap("assets/finc/formats/de520.json")
	FormatDe540  = assetutil.MustLoadStringMap("assets/finc/formats/de540.json")
	FormatDeCh1  = assetutil.MustLoadStringMap("assets/finc/formats/dech1.json")
	FormatDed117 = assetutil.MustLoadStringMap("assets/finc/formats/ded117.json")
	FormatDeGla1 = assetutil.MustLoadStringMap("assets/finc/formats/degla1.json")
	FormatDel152 = assetutil.MustLoadStringMap("assets/finc/formats/del152.json")
	FormatDel189 = assetutil.MustLoadStringMap("assets/finc/formats/del189.json")
	FormatDeZi4  = assetutil.MustLoadStringMap("assets/finc/formats/dezi4.json")
	FormatDeZwi2 = assetutil.MustLoadStringMap("assets/finc/formats/dezwi2.json")
	FormatNrw    = assetutil.MustLoadStringMap("assets/finc/formats/nrw.json")
예제 #3
0
파일: document.go 프로젝트: voxadam/span
	// Internal bookkeeping.
	SourceID = "28"
	// BatchSize for grouped channel transport.
	BatchSize = 25000
	// Collection name
	Collection = "DOAJ Directory of Open Access Journals"
	// Format for all records
	Format = "ElectronicArticle"
	Genre  = "article"
)

var errDateMissing = errors.New("date is missing")

var (
	LCCPatterns = assetutil.MustLoadRegexpMap("assets/finc/lcc.json")
	LanguageMap = assetutil.MustLoadStringMap("assets/doaj/language-iso-639-3.json")
)

type Response struct {
	ID     string   `json:"_id"`
	Index  string   `json:"_index"`
	Source Document `json:"_source"`
	Type   string   `json:"_type"`
}

type Document struct {
	BibJson BibJson `json:"bibjson"`
	Created string  `json:"created_date"`
	ID      string  `json:"id"`
	Index   Index   `json:"index"`
	Updated string  `json:"last_updated"`
예제 #4
0
파일: document.go 프로젝트: voxadam/span
	Title            string   `xml:"Title"`
	Year             string   `xml:"Year"`
	RawDate          string   `xml:"Date"`
	Volume           string   `xml:"Volume"`
	Issue            string   `xml:"Issue"`
	RawAuthors       []string `xml:"Authors>Author"`
	Language         string   `xml:"Language"`
	Abstract         string   `xml:"Abstract"`
	Group            string   `xml:"x-group"`
	Descriptors      string   `xml:"Descriptors>Descriptor"`
	Text             string   `xml:"Text"`
}

var (
	rawDateReplacer = strings.NewReplacer(`"`, "", "\n", "", "\t", "")
	collections     = assetutil.MustLoadStringMap("assets/genios/collections.json")
	// Restricts the possible languages for detection.
	acceptedLanguages = container.NewStringSet("deu", "eng")
)

type Genios struct{}

// NewBatch wraps up a new batch for channel com.
func NewBatch(docs []*Document) span.Batcher {
	batch := span.Batcher{
		Apply: func(s interface{}) (span.Importer, error) {
			return s.(span.Importer), nil
		}, Items: make([]interface{}, len(docs))}
	for i, doc := range docs {
		batch.Items[i] = doc
	}