func (b *builder) parseRegistry() { r := gen.OpenIANAFile("assignments/language-subtag-registry") defer r.Close() b.registry = make(map[string]*ianaEntry) scan := bufio.NewScanner(r) scan.Split(bufio.ScanWords) var record *ianaEntry for more := scan.Scan(); more; { key := scan.Text() more = scan.Scan() value := scan.Text() switch key { case "Type:": record = &ianaEntry{typ: value} case "Subtag:", "Tag:": if s := strings.SplitN(value, "..", 2); len(s) > 1 { for a := s[0]; a <= s[1]; a = inc(a) { b.addToRegistry(a, record) } } else { b.addToRegistry(value, record) } case "Suppress-Script:": record.suppressScript = value case "Added:": record.added = value case "Deprecated:": record.deprecated = value case "Macrolanguage:": record.macro = value case "Preferred-Value:": record.preferred = value case "Prefix:": record.prefix = append(record.prefix, value) case "Scope:": record.scope = value case "Description:": buf := []byte(value) for more = scan.Scan(); more; more = scan.Scan() { b := scan.Bytes() if b[0] == '%' || b[len(b)-1] == ':' { break } buf = append(buf, ' ') buf = append(buf, b...) } record.description = append(record.description, string(buf)) continue default: continue } more = scan.Scan() } if scan.Err() != nil { log.Panic(scan.Err()) } }
func (b *builder) writeRegion() { b.writeConsts(b.region.index, regionConsts...) isoOffset := b.region.index("AA") m49map := make([]int16, len(b.region.slice())) fromM49map := make(map[int16]int) altRegionISO3 := "" altRegionIDs := []uint16{} b.writeConst("isoRegionOffset", isoOffset) // 2-letter region lookup and mapping to numeric codes. regionISO := b.region.clone() regionISO.s = regionISO.s[isoOffset:] regionISO.sorted = false regionTypes := make([]byte, len(b.region.s)) // Is the region valid BCP 47? for s, e := range b.registry { if len(s) == 2 && s == strings.ToUpper(s) { i := b.region.index(s) for _, d := range e.description { if strings.Contains(d, "Private use") { regionTypes[i] = iso3166UserAssgined } } regionTypes[i] |= bcp47Region } } // Is the region a valid ccTLD? r := gen.OpenIANAFile("domains/root/db") defer r.Close() buf, err := ioutil.ReadAll(r) failOnError(err) re := regexp.MustCompile(`"/domains/root/db/([a-z]{2}).html"`) for _, m := range re.FindAllSubmatch(buf, -1) { i := b.region.index(strings.ToUpper(string(m[1]))) regionTypes[i] |= ccTLD } b.writeSlice("regionTypes", regionTypes) iso3Set := make(map[string]int) update := func(iso2, iso3 string) { i := regionISO.index(iso2) if j, ok := iso3Set[iso3]; !ok && iso3[0] == iso2[0] { regionISO.s[i] += iso3[1:] iso3Set[iso3] = -1 } else { if ok && j >= 0 { regionISO.s[i] += string([]byte{0, byte(j)}) } else { iso3Set[iso3] = len(altRegionISO3) regionISO.s[i] += string([]byte{0, byte(len(altRegionISO3))}) altRegionISO3 += iso3 altRegionIDs = append(altRegionIDs, uint16(isoOffset+i)) } } } for _, tc := range b.supp.CodeMappings.TerritoryCodes { i := regionISO.index(tc.Type) + isoOffset if d := m49map[i]; d != 0 { log.Panicf("%s found as a duplicate UN.M49 code of %03d", tc.Numeric, d) } m49 := parseM49(tc.Numeric) m49map[i] = m49 if r := fromM49map[m49]; r == 0 { fromM49map[m49] = i } else if r != i { dep := b.registry[regionISO.s[r-isoOffset]].deprecated if t := b.registry[tc.Type]; t != nil && dep != "" && (t.deprecated == "" || t.deprecated > dep) { fromM49map[m49] = i } } } for _, ta := range b.supp.Metadata.Alias.TerritoryAlias { if len(ta.Type) == 3 && ta.Type[0] <= '9' && len(ta.Replacement) == 2 { from := parseM49(ta.Type) if r := fromM49map[from]; r == 0 { fromM49map[from] = regionISO.index(ta.Replacement) + isoOffset } } } for _, tc := range b.supp.CodeMappings.TerritoryCodes { if len(tc.Alpha3) == 3 { update(tc.Type, tc.Alpha3) } } // This entries are not included in territoryCodes. Mostly 3-letter variants // of deleted codes and an entry for QU. for _, m := range []struct{ iso2, iso3 string }{ {"CT", "CTE"}, {"DY", "DHY"}, {"HV", "HVO"}, {"JT", "JTN"}, {"MI", "MID"}, {"NH", "NHB"}, {"NQ", "ATN"}, {"PC", "PCI"}, {"PU", "PUS"}, {"PZ", "PCZ"}, {"RH", "RHO"}, {"VD", "VDR"}, {"WK", "WAK"}, // These three-letter codes are used for others as well. {"FQ", "ATF"}, } { update(m.iso2, m.iso3) } for i, s := range regionISO.s { if len(s) != 4 { regionISO.s[i] = s + " " } } b.writeConst("regionISO", tag.Index(regionISO.join())) b.writeConst("altRegionISO3", altRegionISO3) b.writeSlice("altRegionIDs", altRegionIDs) // Create list of deprecated regions. // TODO: consider inserting SF -> FI. Not included by CLDR, but is the only // Transitionally-reserved mapping not included. regionOldMap := stringSet{} // Include regions in territoryAlias (not all are in the IANA registry!) for _, reg := range b.supp.Metadata.Alias.TerritoryAlias { if len(reg.Type) == 2 && reg.Reason == "deprecated" && len(reg.Replacement) == 2 { regionOldMap.add(reg.Type) regionOldMap.updateLater(reg.Type, reg.Replacement) i, _ := regionISO.find(reg.Type) j, _ := regionISO.find(reg.Replacement) if k := m49map[i+isoOffset]; k == 0 { m49map[i+isoOffset] = m49map[j+isoOffset] } } } b.writeSortedMap("regionOldMap", ®ionOldMap, func(s string) uint16 { return uint16(b.region.index(s)) }) // 3-digit region lookup, groupings. for i := 1; i < isoOffset; i++ { m := parseM49(b.region.s[i]) m49map[i] = m fromM49map[m] = i } b.writeSlice("m49", m49map) const ( searchBits = 7 regionBits = 9 ) if len(m49map) >= 1<<regionBits { log.Fatalf("Maximum number of regions exceeded: %d > %d", len(m49map), 1<<regionBits) } m49Index := [9]int16{} fromM49 := []uint16{} m49 := []int{} for k, _ := range fromM49map { m49 = append(m49, int(k)) } sort.Ints(m49) for _, k := range m49[1:] { val := (k & (1<<searchBits - 1)) << regionBits fromM49 = append(fromM49, uint16(val|fromM49map[int16(k)])) m49Index[1:][k>>searchBits] = int16(len(fromM49)) } b.writeSlice("m49Index", m49Index) b.writeSlice("fromM49", fromM49) }
func main() { r := gen.OpenIANAFile("assignments/character-sets/character-sets.xml") reg := ®istry{} if err := xml.NewDecoder(r).Decode(®); err != nil && err != io.EOF { log.Fatalf("Error decoding charset registry: %v", err) } if len(reg.Registry) == 0 || reg.Registry[0].ID != "character-sets-1" { log.Fatalf("Unexpected ID %s", reg.Registry[0].ID) } w := &bytes.Buffer{} fmt.Fprintf(w, "const (\n") for _, rec := range reg.Registry[0].Record { constName := "" for _, a := range rec.Alias { if strings.HasPrefix(a, "cs") && strings.IndexByte(a, '-') == -1 { // Some of the constant definitions have comments in them. Strip those. constName = strings.Title(strings.SplitN(a[2:], "\n", 2)[0]) } } if constName == "" { switch rec.MIB { case "2085": constName = "HZGB2312" // Not listed as alias for some reason. default: log.Fatalf("No cs alias defined for %s.", rec.MIB) } } if rec.MIME != "" { rec.MIME = fmt.Sprintf(" (MIME: %s)", rec.MIME) } fmt.Fprintf(w, "// %s is the MIB identifier with IANA name %s%s.\n//\n", constName, rec.Name, rec.MIME) if len(rec.Desc.Data) > 0 { fmt.Fprint(w, "// ") d := xml.NewDecoder(strings.NewReader(rec.Desc.Data)) inElem := true attr := "" for { t, err := d.Token() if err != nil { if err != io.EOF { log.Fatal(err) } break } switch x := t.(type) { case xml.CharData: attr = "" // Don't need attribute info. a := bytes.Split([]byte(x), []byte("\n")) for i, b := range a { if b = bytes.TrimSpace(b); len(b) != 0 { if !inElem && i > 0 { fmt.Fprint(w, "\n// ") } inElem = false fmt.Fprintf(w, "%s ", string(b)) } } case xml.StartElement: if x.Name.Local == "xref" { inElem = true use := false for _, a := range x.Attr { if a.Name.Local == "type" { use = use || a.Value != "person" } if a.Name.Local == "data" && use { attr = a.Value + " " } } } case xml.EndElement: inElem = false fmt.Fprint(w, attr) } } fmt.Fprint(w, "\n") } for _, x := range rec.Xref { switch x.Type { case "rfc": fmt.Fprintf(w, "// Reference: %s\n", strings.ToUpper(x.Data)) case "uri": fmt.Fprintf(w, "// Reference: %s\n", x.Data) } } fmt.Fprintf(w, "%s MIB = %s\n", constName, rec.MIB) fmt.Fprintln(w) } fmt.Fprintln(w, ")") gen.WriteGoFile("mib.go", "identifier", w.Bytes()) }