Beispiel #1
0
func insertCollation(builder *build.Builder, locale string, c *Collation) {
	t := builder.Tailoring(locale)
	for _, r := range c.Rules.Any {
		switch r.XMLName.Local {
		case "reset":
			if r.Before == "" {
				failOnError(t.SetAnchor(r.Value))
			} else {
				failOnError(t.SetAnchorBefore(r.Value))
			}
		case "x":
			var context, extend string
			for _, r1 := range r.Any {
				switch r1.XMLName.Local {
				case "context":
					context = r1.Value
				case "extend":
					extend = r1.Value
				}
			}
			for _, r1 := range r.Any {
				if t := r1.XMLName.Local; t == "context" || t == "extend" {
					continue
				}
				insertTailoring(t, r1, context, extend)
			}
		default:
			insertTailoring(t, r, "", "")
		}
	}
}
Beispiel #2
0
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in http://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) {
	r := openReader(*ducet)
	defer r.Close()
	input := bufio.NewReader(r)
	colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
	for i := 1; true; i++ {
		l, prefix, err := input.ReadLine()
		if err == io.EOF {
			break
		}
		Error(err)
		line := string(l)
		if prefix {
			log.Fatalf("%d: buffer overflow", i)
		}
		if len(line) == 0 || line[0] == '#' {
			continue
		}
		if line[0] == '@' {
			if strings.HasPrefix(line[1:], "version ") {
				if v := strings.Split(line[1:], " ")[1]; v != unicode.Version {
					log.Fatalf("incompatible version %s; want %s", v, unicode.Version)
				}
			}
		} else {
			// parse entries
			part := strings.Split(line, " ; ")
			if len(part) != 2 {
				log.Fatalf("%d: production rule without ';': %v", i, line)
			}
			lhs := []rune{}
			for _, v := range strings.Split(part[0], " ") {
				if v != "" {
					lhs = append(lhs, rune(convHex(i, v)))
				}
			}
			vars := []int{}
			rhs := [][]int{}
			for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
				if m[1] == "*" {
					vars = append(vars, i)
				}
				elem := []int{}
				for _, h := range strings.Split(m[2], ".") {
					elem = append(elem, convHex(i, h))
				}
				rhs = append(rhs, elem)
			}
			builder.Add(lhs, rhs, vars)
		}
	}
}
Beispiel #3
0
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in http://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) {
	var r io.ReadCloser
	var err error
	if strings.HasSuffix(*root, ".zip") {
		for _, f := range openArchive(root).File {
			if strings.HasSuffix(f.Name, "allkeys_CLDR.txt") {
				r, err = f.Open()
			}
		}
		if r == nil {
			err = fmt.Errorf("file allkeys_CLDR.txt not found in archive %q", *root)
		}
	} else {
		r, err = openReader(root)
	}
	failOnError(err)
	defer r.Close()
	scanner := bufio.NewScanner(r)
	colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
	for i := 1; scanner.Scan(); i++ {
		line := scanner.Text()
		if len(line) == 0 || line[0] == '#' {
			continue
		}
		if line[0] == '@' {
			// parse properties
			switch {
			case strings.HasPrefix(line[1:], "version "):
				a := strings.Split(line[1:], " ")
				if a[1] != unicode.Version {
					log.Fatalf("incompatible version %s; want %s", a[1], unicode.Version)
				}
			case strings.HasPrefix(line[1:], "backwards "):
				log.Fatalf("%d: unsupported option backwards", i)
			default:
				log.Printf("%d: unknown option %s", i, line[1:])
			}
		} else {
			// parse entries
			part := strings.Split(line, " ; ")
			if len(part) != 2 {
				log.Fatalf("%d: production rule without ';': %v", i, line)
			}
			lhs := []rune{}
			for _, v := range strings.Split(part[0], " ") {
				if v == "" {
					continue
				}
				lhs = append(lhs, rune(convHex(i, v)))
			}
			var n int
			var vars []int
			rhs := [][]int{}
			for i, m := range colelem.FindAllStringSubmatch(part[1], -1) {
				n += len(m[0])
				elem := []int{}
				for _, h := range strings.Split(m[2], ".") {
					elem = append(elem, convHex(i, h))
				}
				if m[1] == "*" {
					vars = append(vars, i)
				}
				rhs = append(rhs, elem)
			}
			if len(part[1]) < n+3 || part[1][n+1] != '#' {
				log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
			}
			if *test {
				testInput.add(string(lhs))
			}
			failOnError(builder.Add(lhs, rhs, vars))
		}
	}
	if scanner.Err() != nil {
		log.Fatal(scanner.Err())
	}
}
Beispiel #4
0
// parseUCA parses a Default Unicode Collation Element Table of the format
// specified in http://www.unicode.org/reports/tr10/#File_Format.
// It returns the variable top.
func parseUCA(builder *build.Builder) int {
	maxVar, minNonVar := 0, 1<<30
	r, err := openReader(*ducet)
	failonerror(err)
	defer r.Close()
	input := bufio.NewReader(r)
	colelem := regexp.MustCompile(`\[([.*])([0-9A-F.]+)\]`)
	for i := 1; err == nil; i++ {
		l, prefix, e := input.ReadLine()
		err = e
		line := string(l)
		if prefix {
			log.Fatalf("%d: buffer overflow", i)
		}
		if err != nil && err != io.EOF {
			log.Fatalf("%d: %v", i, err)
		}
		if len(line) == 0 || line[0] == '#' {
			continue
		}
		if line[0] == '@' {
			// parse properties
			switch {
			case strings.HasPrefix(line[1:], "version "):
				a := strings.Split(line[1:], " ")
				if a[1] != unicode.Version {
					log.Fatalf("incompatible version %s; want %s", a[1], unicode.Version)
				}
			case strings.HasPrefix(line[1:], "backwards "):
				log.Fatalf("%d: unsupported option backwards", i)
			default:
				log.Printf("%d: unknown option %s", i, line[1:])
			}
		} else {
			// parse entries
			part := strings.Split(line, " ; ")
			if len(part) != 2 {
				log.Fatalf("%d: production rule without ';': %v", i, line)
			}
			lhs := []rune{}
			for _, v := range strings.Split(part[0], " ") {
				if v == "" {
					continue
				}
				lhs = append(lhs, rune(convHex(i, v)))
			}
			var n int
			rhs := [][]int{}
			for _, m := range colelem.FindAllStringSubmatch(part[1], -1) {
				n += len(m[0])
				elem := []int{}
				for _, h := range strings.Split(m[2], ".") {
					elem = append(elem, convHex(i, h))
				}
				if p := elem[0]; m[1] == "*" {
					if p > maxVar {
						maxVar = p
					}
				} else if p > 0 && p < minNonVar {
					minNonVar = p
				}
				rhs = append(rhs, elem)
			}
			if len(part[1]) < n+3 || part[1][n+1] != '#' {
				log.Fatalf("%d: expected comment; found %s", i, part[1][n:])
			}
			builder.Add(lhs, rhs)
		}
	}
	if maxVar >= minNonVar {
		log.Fatalf("found maxVar > minNonVar (%d > %d)", maxVar, minNonVar)
	}
	return maxVar
}