Example #1
0
// Languages returns the given and guessed languages
// found in abstract and fulltext. Note: This is slow.
// Skip detection on too short strings.
func (article *Article) Languages() []string {
	set := container.NewStringSet()

	if article.Front.Article.Abstract.Lang != "" {
		base, err := language.ParseBase(article.Front.Article.Abstract.Lang)
		if err == nil {
			set.Add(base.ISO3())
		}
	}

	vals := []string{
		article.Front.Article.Abstract.Value,
		article.Front.Article.TranslatedAbstract.Title.Value,
		article.Body.Section.Value,
	}

	for _, s := range vals {
		if len(s) < 20 {
			continue
		}
		lang, err := span.DetectLang3(s)
		if err != nil || lang == "und" {
			continue
		}
		if !acceptedLanguages.Contains(lang) {
			continue
		}
		set.Add(lang)
	}

	return set.Values()
}
Example #2
0
func ExampleCompose() {
	nl, _ := language.ParseBase("nl")
	us, _ := language.ParseRegion("US")
	de := language.Make("de-1901-u-co-phonebk")
	jp := language.Make("ja-JP")
	fi := language.Make("fi-x-ing")

	u, _ := language.ParseExtension("u-nu-arabic")
	x, _ := language.ParseExtension("x-piglatin")

	// Combine a base language and region.
	fmt.Println(language.Compose(nl, us))
	// Combine a base language and extension.
	fmt.Println(language.Compose(nl, x))
	// Replace the region.
	fmt.Println(language.Compose(jp, us))
	// Combine several tags.
	fmt.Println(language.Compose(us, nl, u))

	// Replace the base language of a tag.
	fmt.Println(language.Compose(de, nl))
	fmt.Println(language.Compose(de, nl, u))
	// Remove the base language.
	fmt.Println(language.Compose(de, language.Base{}))
	// Remove all variants.
	fmt.Println(language.Compose(de, []language.Variant{}))
	// Remove all extensions.
	fmt.Println(language.Compose(de, []language.Extension{}))
	fmt.Println(language.Compose(fi, []language.Extension{}))
	// Remove all variants and extensions.
	fmt.Println(language.Compose(de.Raw()))

	// An error is gobbled or returned if non-nil.
	fmt.Println(language.Compose(language.ParseRegion("ZA")))
	fmt.Println(language.Compose(language.ParseRegion("HH")))

	// Compose uses the same Default canonicalization as Make.
	fmt.Println(language.Compose(language.Raw.Parse("en-Latn-UK")))

	// Call compose on a different CanonType for different results.
	fmt.Println(language.All.Compose(language.Raw.Parse("en-Latn-UK")))

	// Output:
	// nl-US <nil>
	// nl-x-piglatin <nil>
	// ja-US <nil>
	// nl-US-u-nu-arabic <nil>
	// nl-1901-u-co-phonebk <nil>
	// nl-1901-u-nu-arabic <nil>
	// und-1901-u-co-phonebk <nil>
	// de-u-co-phonebk <nil>
	// de-1901 <nil>
	// fi <nil>
	// de <nil>
	// und-ZA <nil>
	// und language: subtag "HH" is well-formed but unknown
	// en-Latn-GB <nil>
	// en-GB <nil>
}
Example #3
0
// DetectLang3 returns the best guess 3-letter language code for a given text.
func DetectLang3(text string) (string, error) {
	c := cld2.Detect(text)
	b, err := language.ParseBase(c)
	if err != nil {
		return "", err
	}
	return b.ISO3(), nil
}
Example #4
0
// Languages returns a list of language in 3-letter format.
func (article *Article) Languages() []string {
	set := container.NewStringSet()
	for _, cm := range article.Front.Article.CustomMetaGroup.CustomMeta {
		if cm.Name.Value == "lang" {
			base, err := language.ParseBase(cm.Value.Value)
			if err == nil {
				set.Add(base.ISO3())
			}
		}
	}
	return set.Values()
}