示例#1
0
文件: maketables.go 项目: 7ukey/text
func (b *builder) parseRegistry() {
	r := gen.OpenIANAFile("assignments/language-subtag-registry")
	defer r.Close()
	b.registry = make(map[string]*ianaEntry)

	scan := bufio.NewScanner(r)
	scan.Split(bufio.ScanWords)
	var record *ianaEntry
	for more := scan.Scan(); more; {
		key := scan.Text()
		more = scan.Scan()
		value := scan.Text()
		switch key {
		case "Type:":
			record = &ianaEntry{typ: value}
		case "Subtag:", "Tag:":
			if s := strings.SplitN(value, "..", 2); len(s) > 1 {
				for a := s[0]; a <= s[1]; a = inc(a) {
					b.addToRegistry(a, record)
				}
			} else {
				b.addToRegistry(value, record)
			}
		case "Suppress-Script:":
			record.suppressScript = value
		case "Added:":
			record.added = value
		case "Deprecated:":
			record.deprecated = value
		case "Macrolanguage:":
			record.macro = value
		case "Preferred-Value:":
			record.preferred = value
		case "Prefix:":
			record.prefix = append(record.prefix, value)
		case "Scope:":
			record.scope = value
		case "Description:":
			buf := []byte(value)
			for more = scan.Scan(); more; more = scan.Scan() {
				b := scan.Bytes()
				if b[0] == '%' || b[len(b)-1] == ':' {
					break
				}
				buf = append(buf, ' ')
				buf = append(buf, b...)
			}
			record.description = append(record.description, string(buf))
			continue
		default:
			continue
		}
		more = scan.Scan()
	}
	if scan.Err() != nil {
		log.Panic(scan.Err())
	}
}
示例#2
0
文件: maketables.go 项目: 7ukey/text
func (b *builder) writeRegion() {
	b.writeConsts(b.region.index, regionConsts...)

	isoOffset := b.region.index("AA")
	m49map := make([]int16, len(b.region.slice()))
	fromM49map := make(map[int16]int)
	altRegionISO3 := ""
	altRegionIDs := []uint16{}

	b.writeConst("isoRegionOffset", isoOffset)

	// 2-letter region lookup and mapping to numeric codes.
	regionISO := b.region.clone()
	regionISO.s = regionISO.s[isoOffset:]
	regionISO.sorted = false

	regionTypes := make([]byte, len(b.region.s))

	// Is the region valid BCP 47?
	for s, e := range b.registry {
		if len(s) == 2 && s == strings.ToUpper(s) {
			i := b.region.index(s)
			for _, d := range e.description {
				if strings.Contains(d, "Private use") {
					regionTypes[i] = iso3166UserAssgined
				}
			}
			regionTypes[i] |= bcp47Region
		}
	}

	// Is the region a valid ccTLD?
	r := gen.OpenIANAFile("domains/root/db")
	defer r.Close()

	buf, err := ioutil.ReadAll(r)
	failOnError(err)
	re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`)
	for _, m := range re.FindAllSubmatch(buf, -1) {
		i := b.region.index(strings.ToUpper(string(m[1])))
		regionTypes[i] |= ccTLD
	}

	b.writeSlice("regionTypes", regionTypes)

	iso3Set := make(map[string]int)
	update := func(iso2, iso3 string) {
		i := regionISO.index(iso2)
		if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] {
			regionISO.s[i] += iso3[1:]
			iso3Set[iso3] = -1
		} else {
			if ok && j >= 0 {
				regionISO.s[i] += string([]byte{0, byte(j)})
			} else {
				iso3Set[iso3] = len(altRegionISO3)
				regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))})
				altRegionISO3 += iso3
				altRegionIDs = append(altRegionIDs, uint16(isoOffset+i))
			}
		}
	}
	for _, tc := range b.supp.CodeMappings.TerritoryCodes {
		i := regionISO.index(tc.Type) + isoOffset
		if d := m49map[i]; d != 0 {
			log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d)
		}
		m49 := parseM49(tc.Numeric)
		m49map[i] = m49
		if r := fromM49map[m49]; r == 0 {
			fromM49map[m49] = i
		} else if r != i {
			dep := b.registry[regionISO.s[r-isoOffset]].deprecated
			if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" || t.deprecated > dep) {
				fromM49map[m49] = i
			}
		}
	}
	for _, ta := range b.supp.Metadata.Alias.TerritoryAlias {
		if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 {
			from := parseM49(ta.Type)
			if r := fromM49map[from]; r == 0 {
				fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset
			}
		}
	}
	for _, tc := range b.supp.CodeMappings.TerritoryCodes {
		if len(tc.Alpha3) == 3 {
			update(tc.Type, tc.Alpha3)
		}
	}
	// This entries are not included in territoryCodes. Mostly 3-letter variants
	// of deleted codes and an entry for QU.
	for _, m := range []struct{ iso2, iso3 string }{
		{"CT", "CTE"},
		{"DY", "DHY"},
		{"HV", "HVO"},
		{"JT", "JTN"},
		{"MI", "MID"},
		{"NH", "NHB"},
		{"NQ", "ATN"},
		{"PC", "PCI"},
		{"PU", "PUS"},
		{"PZ", "PCZ"},
		{"RH", "RHO"},
		{"VD", "VDR"},
		{"WK", "WAK"},
		// These three-letter codes are used for others as well.
		{"FQ", "ATF"},
	} {
		update(m.iso2, m.iso3)
	}
	for i, s := range regionISO.s {
		if len(s) != 4 {
			regionISO.s[i] = s + "  "
		}
	}
	b.writeConst("regionISO", tag.Index(regionISO.join()))
	b.writeConst("altRegionISO3", altRegionISO3)
	b.writeSlice("altRegionIDs", altRegionIDs)

	// Create list of deprecated regions.
	// TODO: consider inserting SF -> FI. Not included by CLDR, but is the only
	// Transitionally-reserved mapping not included.
	regionOldMap := stringSet{}
	// Include regions in territoryAlias (not all are in the IANA registry!)
	for _, reg := range b.supp.Metadata.Alias.TerritoryAlias {
		if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 {
			regionOldMap.add(reg.Type)
			regionOldMap.updateLater(reg.Type, reg.Replacement)
			i, _ := regionISO.find(reg.Type)
			j, _ := regionISO.find(reg.Replacement)
			if k := m49map[i+isoOffset]; k == 0 {
				m49map[i+isoOffset] = m49map[j+isoOffset]
			}
		}
	}
	b.writeSortedMap("regionOldMap", &regionOldMap, func(s string) uint16 {
		return uint16(b.region.index(s))
	})
	// 3-digit region lookup, groupings.
	for i := 1; i < isoOffset; i++ {
		m := parseM49(b.region.s[i])
		m49map[i] = m
		fromM49map[m] = i
	}
	b.writeSlice("m49", m49map)

	const (
		searchBits = 7
		regionBits = 9
	)
	if len(m49map) >= 1<<regionBits {
		log.Fatalf("Maximum number of regions exceeded: %d > %d", len(m49map), 1<<regionBits)
	}
	m49Index := [9]int16{}
	fromM49 := []uint16{}
	m49 := []int{}
	for k, _ := range fromM49map {
		m49 = append(m49, int(k))
	}
	sort.Ints(m49)
	for _, k := range m49[1:] {
		val := (k & (1<<searchBits - 1)) << regionBits
		fromM49 = append(fromM49, uint16(val|fromM49map[int16(k)]))
		m49Index[1:][k>>searchBits] = int16(len(fromM49))
	}
	b.writeSlice("m49Index", m49Index)
	b.writeSlice("fromM49", fromM49)
}
示例#3
0
文件: gen.go 项目: ChongFeng/beats
func main() {
	r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml")
	reg := &registry{}
	if err := xml.NewDecoder(r).Decode(&reg); err != nil && err != io.EOF {
		log.Fatalf("Error decoding charset registry: %v", err)
	}
	if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" {
		log.Fatalf("Unexpected ID %s", reg.Registry[0].ID)
	}

	w := &bytes.Buffer{}
	fmt.Fprintf(w, "const (\n")
	for _, rec := range reg.Registry[0].Record {
		constName := ""
		for _, a := range rec.Alias {
			if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 {
				// Some of the constant definitions have comments in them. Strip those.
				constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0])
			}
		}
		if constName == "" {
			switch rec.MIB {
			case "2085":
				constName = "HZGB2312" // Not listed as alias for some reason.
			default:
				log.Fatalf("No cs alias defined for %s.", rec.MIB)
			}
		}
		if rec.MIME != "" {
			rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME)
		}
		fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME)
		if len(rec.Desc.Data) > 0 {
			fmt.Fprint(w, "// ")
			d := xml.NewDecoder(strings.NewReader(rec.Desc.Data))
			inElem := true
			attr := ""
			for {
				t, err := d.Token()
				if err != nil {
					if err != io.EOF {
						log.Fatal(err)
					}
					break
				}
				switch x := t.(type) {
				case xml.CharData:
					attr = "" // Don't need attribute info.
					a := bytes.Split([]byte(x), []byte("\n"))
					for i, b := range a {
						if b = bytes.TrimSpace(b); len(b) != 0 {
							if !inElem && i > 0 {
								fmt.Fprint(w, "\n// ")
							}
							inElem = false
							fmt.Fprintf(w, "%s ", string(b))
						}
					}
				case xml.StartElement:
					if x.Name.Local == "xref" {
						inElem = true
						use := false
						for _, a := range x.Attr {
							if a.Name.Local == "type" {
								use = use || a.Value != "person"
							}
							if a.Name.Local == "data" && use {
								attr = a.Value + " "
							}
						}
					}
				case xml.EndElement:
					inElem = false
					fmt.Fprint(w, attr)
				}
			}
			fmt.Fprint(w, "\n")
		}
		for _, x := range rec.Xref {
			switch x.Type {
			case "rfc":
				fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data))
			case "uri":
				fmt.Fprintf(w, "// Reference: %s\n", x.Data)
			}
		}
		fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB)
		fmt.Fprintln(w)
	}
	fmt.Fprintln(w, ")")

	gen.WriteGoFile("mib.go", "identifier", w.Bytes())
}