// Languages returns the given and guessed languages // found in abstract and fulltext. Note: This is slow. // Skip detection on too short strings. func (article *Article) Languages() []string { set := container.NewStringSet() if article.Front.Article.Abstract.Lang != "" { base, err := language.ParseBase(article.Front.Article.Abstract.Lang) if err == nil { set.Add(base.ISO3()) } } vals := []string{ article.Front.Article.Abstract.Value, article.Front.Article.TranslatedAbstract.Title.Value, article.Body.Section.Value, } for _, s := range vals { if len(s) < 20 { continue } lang, err := span.DetectLang3(s) if err != nil || lang == "und" { continue } if !acceptedLanguages.Contains(lang) { continue } set.Add(lang) } return set.Values() }
func ExampleCompose() { nl, _ := language.ParseBase("nl") us, _ := language.ParseRegion("US") de := language.Make("de-1901-u-co-phonebk") jp := language.Make("ja-JP") fi := language.Make("fi-x-ing") u, _ := language.ParseExtension("u-nu-arabic") x, _ := language.ParseExtension("x-piglatin") // Combine a base language and region. fmt.Println(language.Compose(nl, us)) // Combine a base language and extension. fmt.Println(language.Compose(nl, x)) // Replace the region. fmt.Println(language.Compose(jp, us)) // Combine several tags. fmt.Println(language.Compose(us, nl, u)) // Replace the base language of a tag. fmt.Println(language.Compose(de, nl)) fmt.Println(language.Compose(de, nl, u)) // Remove the base language. fmt.Println(language.Compose(de, language.Base{})) // Remove all variants. fmt.Println(language.Compose(de, []language.Variant{})) // Remove all extensions. fmt.Println(language.Compose(de, []language.Extension{})) fmt.Println(language.Compose(fi, []language.Extension{})) // Remove all variants and extensions. fmt.Println(language.Compose(de.Raw())) // An error is gobbled or returned if non-nil. fmt.Println(language.Compose(language.ParseRegion("ZA"))) fmt.Println(language.Compose(language.ParseRegion("HH"))) // Compose uses the same Default canonicalization as Make. fmt.Println(language.Compose(language.Raw.Parse("en-Latn-UK"))) // Call compose on a different CanonType for different results. fmt.Println(language.All.Compose(language.Raw.Parse("en-Latn-UK"))) // Output: // nl-US <nil> // nl-x-piglatin <nil> // ja-US <nil> // nl-US-u-nu-arabic <nil> // nl-1901-u-co-phonebk <nil> // nl-1901-u-nu-arabic <nil> // und-1901-u-co-phonebk <nil> // de-u-co-phonebk <nil> // de-1901 <nil> // fi <nil> // de <nil> // und-ZA <nil> // und language: subtag "HH" is well-formed but unknown // en-Latn-GB <nil> // en-GB <nil> }
// DetectLang3 returns the best guess 3-letter language code for a given text. func DetectLang3(text string) (string, error) { c := cld2.Detect(text) b, err := language.ParseBase(c) if err != nil { return "", err } return b.ISO3(), nil }
// Languages returns a list of language in 3-letter format. func (article *Article) Languages() []string { set := container.NewStringSet() for _, cm := range article.Front.Article.CustomMetaGroup.CustomMeta { if cm.Name.Value == "lang" { base, err := language.ParseBase(cm.Value.Value) if err == nil { set.Add(base.ISO3()) } } } return set.Values() }