func listInterfaces(domXML *etree.Document) []string {
	networkInterfaces := []string{}
	for _, t := range domXML.FindElements("//domain/devices/interface/target") {
		for _, i := range t.Attr {
			networkInterfaces = append(networkInterfaces, i.Value)
		}

	}
	return networkInterfaces
}
func listDisks(domXML *etree.Document) []string {
	disks := []string{}
	for _, t := range domXML.FindElements("//domain/devices/disk[@device='disk']/target") {
		for _, i := range t.Attr {
			if i.Key == "dev" {
				disks = append(disks, i.Value)
			}
		}
	}
	return disks
}
Example #3
0
func doSpeaker(speaker string, hasMeta, hasMeta2 bool, values map[string]map[string][]string, doc, doc2 *etree.Document) {
	need_scan := false
	if _, ok := values[speaker]; !ok {
		values[speaker] = make(map[string][]string)
		need_scan = true
	}

	var run_hasMeta bool
	var run_doc *etree.Document
	var run_nonfixed []string
	for run := 0; run < 2; run++ {
		if run == 0 {
			run_hasMeta = hasMeta
			run_doc = doc
			run_nonfixed = nonfixed
		} else {
			run_hasMeta = hasMeta2
			run_doc = doc2
			run_nonfixed = nonfixed2
		}

		if run_hasMeta && len(run_nonfixed) > 0 {
			if need_scan {
				for _, item := range run_nonfixed {
					found := false
					var xp string
					if run == 0 {
						xp = cfg.Items[item].XPath
					} else {
						xp = cfg.Items[item].XPath2
					}
					xpath := strings.Replace(xp, "%speaker%", speaker, -1)
					for _, t := range run_doc.FindElements(xpath) {
						value := strings.TrimSpace(t.Text())
						if cfg.Items[item].hasFilter {
							val, err := vm.Run(fmt.Sprintf("fn[%q](%q);", item, value))
							x(err, "\nRunning function for "+item+":\n"+cfg.Items[item].Filter)
							value = val.String()
						}
						if value != "" && oktype(item, value) {
							if _, ok := values[speaker][item]; !ok {
								values[speaker][item] = make([]string, 0, 1)
							}
							values[speaker][item] = append(values[speaker][item], value)
							found = true
						}
					}
					if !found && speaker != "" {
						fmt.Fprintf(os.Stderr, "Niet gevonden in %s voor (%s) %s, %q\n", currentfile, cfg.Items[item].Type, item, speaker)
					}
				}
			}

			for _, item := range run_nonfixed {
				ii, ok := values[speaker][item]
				if !ok || len(ii) == 0 {
					fmt.Fprintf(fpout, "##META %s %s =\n", cfg.Items[item].Type, item)
					continue
				}
				for _, i := range ii {
					fmt.Fprintf(fpout, "##META %s %s = %s\n", cfg.Items[item].Type, item, i)
				}
			}
		}
	}
}
Example #4
0
func doFile(filename, dirname string) {
	if !strings.HasSuffix(filename, ".xml") {
		return
	}

	fileno++
	lineno := 0

	native_seen = make(map[string]bool)

	if cfg.Output_dir != "" {
		var f string
		if strings.HasSuffix(filename, ".xml") {
			f = filename[:len(filename)-4] + ".txt"
		} else {
			f = filename + ".txt"
		}
		var err error
		fpout, err = os.Create(filepath.Join(cfg.Output_dir, dirname, f))
		x(err)
		defer fpout.Close()
		pathlevel = 0
	}

	if cfg.File_src != "" {
		fmt.Fprintf(fpout, "##META text %s = %s\n", cfg.File_src, filename)
	}

	if cfg.File_path != "" {
		if dirname == "" {
			for i := 0; i < pathlevel; i++ {
				fmt.Fprintf(fpout, "##META text %s%d =\n", cfg.File_path, i+1)
			}
			pathlevel = 0
		} else {
			parts := strings.Split(dirname, string(os.PathSeparator))
			for i, p := range parts {
				fmt.Fprintf(fpout, "##META text %s%d = %s\n", cfg.File_path, i+1, p)
			}
			for i := len(parts); i < pathlevel; i++ {
				fmt.Fprintf(fpout, "##META text %s%d =\n", cfg.File_path, i+1)
			}
			pathlevel = len(parts)
		}
	}

	filename = filepath.Join(dirname, filename)

	fmt.Fprintln(os.Stderr, ">", filename)

	var doc, doc2 *etree.Document
	statestack := make([]State, 1, 10)
	currentspeaker := " oiqoewij doijqowiu98793olj fdowqjoiequ8nf  fke f wf  wejfo  fwoiu92  "
	values := make(map[string]map[string][]string)
	hasMeta := false
	hasMeta2 := false
	fixedDone := false
	fixedDone2 := false
	nativeDone := false

	currentfile = filepath.Join(cfg.Data_dir, filename)
	fpin, err := os.Open(currentfile)
	x(err)
	defer fpin.Close()
	d := xml.NewDecoder(fpin)
	var meta, label string
	text := make([]byte, 0)
	var teller, uttteller uint64
PARSE:
	for {
		offset1 := d.InputOffset()
		tt, err := d.Token()
		if err == io.EOF {
			break
		}
		x(err)

		if t, ok := tt.(xml.StartElement); ok {

			state := statestack[len(statestack)-1]

			hasClass := false
			id := ""
			for _, e := range t.Attr {
				switch e.Name.Local {
				case "speaker":
					state.speaker = e.Value
				case "id":
					id = e.Value
				case "class":
					hasClass = true
				case "auth":
					state.inSkip = true
				}
			}

			switch t.Name.Local {
			case "metadata":
				state.inMetadata = true
				var src string
				for _, e := range t.Attr {
					if e.Name.Local == "src" {
						src = e.Value
						break
					}
				}
				if src != "" {

					if cfg.Meta_src != "" {
						fmt.Fprintf(fpout, "##META text %s = %s\n", cfg.Meta_src, src)
					}

					srcs := make([]string, 0, 4)
					if dirname != "" {
						srcs = append(srcs, filepath.Join(dirname, src+".xml"), filepath.Join(dirname, src))
					}
					srcs = append(srcs, src+".xml", src)

					for i, src := range srcs {
						doc = etree.NewDocument()
						err := doc.ReadFromFile(filepath.Join(cfg.Meta_dir, src))
						if err == nil {
							break
						}
						if i == len(srcs)-1 {
							x(err)
						}
					}

					for _, item := range fixed {
						found := false
						for _, t := range doc.FindElements(cfg.Items[item].XPath) {
							value := t.Text()
							if cfg.Items[item].hasFilter {
								val, err := vm.Run(fmt.Sprintf("fn[%q](%q);", item, value))
								x(err, "\nRunning function for "+item+":\n"+cfg.Items[item].Filter)
								value = val.String()
							}
							if value != "" && oktype(item, value) {
								found = true
								fmt.Fprintf(fpout, "##META %s %s = %s\n", cfg.Items[item].Type, item, value)
							}
						}
						if !found {
							fmt.Fprintf(os.Stderr, "Niet gevonden in %s voor (%s) %s\n", currentfile, cfg.Items[item].Type, item)
							fmt.Fprintf(fpout, "##META %s %s =\n", cfg.Items[item].Type, item)
						}
					}
					hasMeta = true
					fixedDone = true
				}
			case "meta":
				meta = ""
				for _, e := range t.Attr {
					if e.Name.Local == "id" {
						meta = e.Value
						break
					}
				}
				state.inMeta = native_use[meta]
			case "foreign-data":
				if !state.inMetadata {
					x(fmt.Errorf("Invalid tag <foreign-data>"))
				}
				x(d.Skip())
				offset2 := d.InputOffset()
				fp, err := os.Open(currentfile)
				x(err)
				_, err = fp.Seek(offset1, 0)
				if err != nil {
					fp.Close()
					x(err)
				}
				data2 := make([]byte, offset2-offset1)
				_, err = io.ReadFull(fp, data2)
				fp.Close()
				x(err)

				doc2 = etree.NewDocument()
				data := []byte("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n")
				data = append(data, data2...)
				x(doc2.ReadFromBytes(data))
				x(err)
				for _, item := range fixed2 {
					found := false
					for _, t := range doc2.FindElements(cfg.Items[item].XPath2) {
						value := t.Text()
						if cfg.Items[item].hasFilter {
							val, err := vm.Run(fmt.Sprintf("fn[%q](%q);", item, value))
							x(err, "\nRunning function for "+item+":\n"+cfg.Items[item].Filter)
							value = val.String()
						}
						if value != "" && oktype(item, value) {
							found = true
							fmt.Fprintf(fpout, "##META %s %s = %s\n", cfg.Items[item].Type, item, value)
						}
					}
					if !found {
						fmt.Fprintf(os.Stderr, "Niet gevonden in %s voor (%s) %s\n", currentfile, cfg.Items[item].Type, item)
						fmt.Fprintf(fpout, "##META %s %s =\n", cfg.Items[item].Type, item)
					}
				}
				hasMeta2 = true
				fixedDone2 = true
			case "s":
				teller++
				if !state.inSkip {
					if state.inS {
						if !cfg.Tokenized {
							state.inSkip = true
						}
					} else {
						if id == "" {
							label = fmt.Sprintf("%s.s.%d", filename, teller)
						} else {
							label = id
						}
						text = text[0:0]
						state.inS = true
						state.inW = false
						state.inT = false
					}
				}
			case "utt":
				uttteller++
				if !state.inSkip && !state.inS { // inS, niet inUtt
					if id == "" {
						label = fmt.Sprintf("%s.utt.%d", filename, uttteller)
					} else {
						label = id
					}
					text = text[0:0]
					state.inUtt = true
					state.inW = false
					state.inT = false
				}
			case "w":
				state.inW = true
				state.inT = false
			case "t":
				if !hasClass {
					state.inT = true
				}
			case "morpheme", "str":
				if cfg.Tokenized {
					state.inSkip = true
				}
			}

			if _, ok := tt.(xml.EndElement); !ok {
				statestack = append(statestack, state)
			}
		} else if t, ok := tt.(xml.EndElement); ok {
			state := statestack[len(statestack)-1]
			statestack = statestack[0 : len(statestack)-1]
			if !state.inSkip {
				switch t.Name.Local {
				case "w":
					text = append(text, ' ')
				case "s", "utt":
					if !statestack[len(statestack)-1].inS && strings.TrimSpace(string(text)) != "" {
						doFixed(fixedDone, fixedDone2, nativeDone, hasMeta, hasMeta2)
						fixedDone = true
						fixedDone2 = true
						nativeDone = true
						if state.speaker != currentspeaker {
							doSpeaker(state.speaker, hasMeta, hasMeta2, values, doc, doc2)
							currentspeaker = state.speaker
						}
						words := make([]string, 0)
						for _, w := range strings.Fields(string(text)) {
							words = append(words, alpinoEscape(w))
						}
						fmt.Fprintf(fpout, "%s|%s\n", label, strings.Join(words, " "))
						text = text[0:0]
						lineno++
						if *opt_m > 0 && *opt_m == lineno {
							break PARSE
						}
					}
				}
			}
		} else if t, ok := tt.(xml.CharData); ok {
			state := statestack[len(statestack)-1]
			if state.inMetadata && state.inMeta {
				item := native_items[meta].Label
				value := string(t)
				if cfg.Items[item].hasFilter {
					val, err := vm.Run(fmt.Sprintf("fn[%q](%q);", item, value))
					x(err, "\nRunning function for "+item+":\n"+cfg.Items[item].Filter)
					value = val.String()
				}
				if value != "" && oktype(item, value) {
					fmt.Fprintf(fpout, "##META %s %s = %s\n", native_items[meta].Type, item, value)
					native_seen[meta] = true
				}
			}
			if !state.inSkip &&
				(state.inS || state.inUtt) &&
				(state.inW && cfg.Tokenized || !state.inW && !cfg.Tokenized) &&
				state.inT {
				text = append(text, t...)
			}
		}
	}
	fmt.Fprintln(fpout)
	if cfg.Output_dir != "" {
		doEnd()
		native_seen = make(map[string]bool)
		doFixed(false, false, false, false, false)
	}
}