Beispiel #1
0
func (s *Scenario) GetAsset(w *Worker, base *url.URL, node xml.Node, attr string) error {
	path, err := url.Parse(node.Attr(attr))

	if err != nil {
		return w.Fail(nil, err)
	}

	requestURI := base.ResolveReference(path)

	req, res, err := w.SimpleGet(requestURI.String())

	if err != nil {
		return w.Fail(req, err)
	}

	if res.StatusCode != 200 {
		return w.Fail(res.Request, fmt.Errorf("Response code should be %d, got %d", 200, res.StatusCode))
	}

	md5sum := calcMD5(res.Body)
	defer res.Body.Close()

	if expectedMD5, ok := s.ExpectedAssets[requestURI.RequestURI()]; ok {
		if md5sum == expectedMD5 {
			w.Success(StaticFileScore)
		} else {
			return w.Fail(res.Request, fmt.Errorf("Expected MD5 checksum is miss match %s, got %s", expectedMD5, md5sum))
		}
	}

	return nil
}
Beispiel #2
0
func trNodeToSchedule(scheduleNode xml.Node) (item ScheduleItem, err error) {

	results, err := scheduleNode.Search("./td/text()")

	if err != nil {
		return ScheduleItem{}, err
	}

	item = ScheduleItem{
		TrainNumber:     strings.TrimSpace(results[1].String()),
		Misc:            strings.TrimSpace(results[2].String()),
		Class:           strings.TrimSpace(results[3].String()),
		Relation:        strings.TrimSpace(results[4].String()),
		StartingStation: strings.TrimSpace(results[5].String()),
		CurrentStation:  strings.TrimSpace(results[6].String()),
		ArrivingTime:    strings.TrimSpace(results[7].String()),
		DepartingTime:   strings.TrimSpace(results[8].String()),
		Ls:              strings.TrimSpace(results[9].String()),
	}

	if len(results) > 10 {
		item.Status = strings.TrimSpace(results[10].String())
	}

	stationParts := strings.FieldsFunc(item.Relation, func(r rune) bool {
		return r == '-'
	})

	item.EndStation = stationParts[1] // [ANGKE BOGOR] BOGOR is end station

	return
}
Beispiel #3
0
func getContent(node gxtml.Node, cssQuery string) string {
	result, err := node.Search(toXpath(cssQuery))
	if err != nil {
		panic(fmt.Errorf("Failed to find %v node", cssQuery))
	}
	return result[0].Content()
}
Beispiel #4
0
func Link(performance xml.Node) string {
	anchor, err := performance.Search(".//a")
	if err != nil {
		fmt.Println(err)
	}
	return "http://www.bso.org" + anchor[0].Attr("href")
}
Beispiel #5
0
func extend(doc *xml.ElementNode, node xml.Node) {
	dup := node.Duplicate(1).(*xml.ElementNode)
	doc.AddChild(dup)
	if strings.ToLower(dup.Name()) != "p" || strings.ToLower(dup.Name()) != "div" {
		dup.SetName("div")
	}
}
Beispiel #6
0
func (s *scrape) MapRegex(node xml.Node) (map[string]string, error) {
	if node.IsValid() == false {
		return nil, errors.New("Invalid node")
	}
	m := make(map[string]string, 1)
	inner := node.String()
	for k, v := range ScrapeRegex {
		// remove new line chars
		reg, _ := regexp.CompilePOSIX("\r\n|\r|\n")
		inner = reg.ReplaceAllString(inner, "")
		// get the real data
		reg, _ = regexp.CompilePOSIX(v[0])
		scraped := reg.FindString(inner)
		scraped = reg.ReplaceAllString(scraped, "$1")

		if scraped != "" {
			m[k] = scraped
		}
	}
	// Skip empty and unwanted
	if len(m) > 0 {
		if m[ScrapeMeta[IGNOREEMPTY]] != "" {
			return m, nil
		}
		return nil, nil
	}
	return nil, nil
}
Beispiel #7
0
func parseSource(m xml.Node) string {
	res, _ := m.Search("source")

	if len(res) > 0 {
		return res[0].Content()
	}

	return ""
}
Beispiel #8
0
// Walks through the documents elements and populates the buffer.
func (self *Formatter) walk(node xml.Node) {
	for c := node.FirstChild(); c != nil; c = c.NextSibling() {
		self.walk(c)
	}

	if node.NodeType() == xml.XML_ELEMENT_NODE {
		self.handleNode(node)
	}
}
Beispiel #9
0
func parseRights(m xml.Node) string {
	res, _ := m.Search("rights")

	if len(res) > 0 {
		return res[0].Content()
	}

	return ""
}
Beispiel #10
0
// Writes code blocks to the buffer.
func (self *Formatter) writeCodeBlock(node xml.Node) {
	block := []byte(strings.Trim(node.Content(), "\n\r\v"))
	node.SetContent("")

	if len(block) == 0 {
		return
	}
	self.buf.Write(block)
	self.buf.Write([]byte{'\n', '\n'})
}
Beispiel #11
0
func (this *Document) walkElements(node xml.Node, f func(xml.Node) error) error {
	f(node)
	for child := node.FirstChild(); child != nil; child = child.NextSibling() {
		err := this.walkElements(child, f)
		if err != nil {
			return err
		}
	}
	return nil
}
Beispiel #12
0
func parseSubjects(m xml.Node) []string {
	subjects := []string{}

	res, _ := m.Search("subject")
	for _, n := range res {
		subjects = append(subjects, n.Content())
	}

	return subjects
}
Beispiel #13
0
func parseDescription(m xml.Node) string {
	description := ""

	res, _ := m.Search("description")
	if len(res) > 0 {
		description = res[0].Content()
	}

	return description
}
Beispiel #14
0
func parsePublisher(m xml.Node) string {
	publisher := ""

	res, _ := m.Search("publisher")
	if len(res) > 0 {
		publisher = res[0].Content()
	}

	return publisher
}
Beispiel #15
0
func parseTitles(m xml.Node) []string {
	titles := []string{}

	res, _ := m.Search("title")
	for _, n := range res {
		titles = append(titles, n.Content())
	}

	return titles
}
Beispiel #16
0
func parseLanguages(m xml.Node) []string {
	languages := []string{}

	res, _ := m.Search("language")
	for _, n := range res {
		languages = append(languages, n.Content())
	}

	return languages
}
Beispiel #17
0
func parseIdentifiers(m xml.Node) []*Identifier {
	identifiers := []*Identifier{}

	res, _ := m.Search("identifier")
	for _, n := range res {
		identifier := Identifier{Identifier: n.Content(), Scheme: n.Attr("scheme")}
		identifiers = append(identifiers, &identifier)
	}

	return identifiers
}
Beispiel #18
0
func Pieces(details xml.Node) string {
	pieces, _ := details.Search(".//div[@class='program-media-collapse']/h3")
	var piecesString string
	piecesString = "<ul class=\"works\">"
	for _, piece := range pieces {
		piecesString += "<li>"
		piecesString += piece.Content()
		piecesString += "</li>"
	}
	piecesString += "</ul>"
	return piecesString
}
Beispiel #19
0
// unmarshalPath walks down an XML structure looking for wanted
// paths, and calls unmarshal on them.
// The consumed result tells whether XML elements have been consumed
// from the Decoder until start's matching end element, or if it's
// still untouched because start is uninteresting for sv's fields.
func (p *Decoder) unmarshalPath(tinfo *typeInfo, sv reflect.Value, parents []string, start gokoxml.Node) (err error) {
	recurse := false
	name := start.Name() // For speed

Loop:
	for i := range tinfo.fields {
		finfo := &tinfo.fields[i]
		if finfo.flags&fElement == 0 || len(finfo.parents) < len(parents) {
			continue
		}
		for j := range parents {
			if parents[j] != finfo.parents[j] {
				continue Loop
			}
		}
		if len(finfo.parents) == len(parents) && finfo.name == name {
			// It's a perfect match, unmarshal the field.
			return p.unmarshal(sv.FieldByIndex(finfo.idx), start)
		}
		if len(finfo.parents) > len(parents) && finfo.parents[len(parents)] == name {
			// It's a prefix for the field. Break and recurse
			// since it's not ok for one field path to be itself
			// the prefix for another field path.
			recurse = true

			// We can reuse the same slice as long as we
			// don't try to append to it.
			parents = finfo.parents[:len(parents)+1]
			break
		}
	}

	if !recurse {
		// We have no business with this element.
		return nil
	}

	// The element is not a perfect match for any field, but one
	// or more fields have the path to this element as a parent
	// prefix. Recurse and attempt to match these.
	for cur_node := start.FirstChild(); cur_node != nil; cur_node = cur_node.NextSibling() {
		if cur_node.NodeType() != gokoxml.XML_ELEMENT_NODE {
			continue
		}

		if err := p.unmarshalPath(tinfo, sv, parents, cur_node); err != nil {
			return err
		}
	}

	// No more XML Nodes.
	return nil
}
Beispiel #20
0
func linkDensity(node xml.Node) float64 {
	links, err := node.Search("a")
	if err != nil {
		return 0.0
	}

	llength := 0.0
	for _, link := range links {
		llength += float64(len(link.Content()))
	}
	tlength := float64(len(node.Content()))
	return llength / tlength
}
Beispiel #21
0
// Writes text blocks to the buffer.
func (self *Formatter) writeBlock(node xml.Node, prefix string) {
	block := []byte(strings.TrimSpace(node.Content()))
	node.SetContent("")

	if len(block) == 0 {
		return
	}
	// Position of last space, line break and max length.
	sp, br, max := 0, 0, 79-len(prefix)
	self.buf.WriteString(prefix)

	for i, c := range block {
		// Break line if exceeded max length and the position of the last space
		// is greater than the position of the last line break. Don't break very
		// long words.
		if i-br > max && sp > br {
			self.buf.WriteByte('\n')
			br = sp
			// Only the first line is prefixed.
			for j := 0; j < len(prefix); j++ {
				self.buf.WriteByte(' ')
			}
		}
		if whitespace[c] {
			// The last character was a space, so ignore this one.
			if sp == i {
				sp++
				br++
				continue
			}
			// Write the last word to the buffer, append a space and update
			// the position of the last space.
			if sp > br {
				self.buf.WriteByte(' ')
			}
			self.buf.Write(block[sp:i])
			sp = i + 1
		}
	}

	// Write the last word to the buffer.
	if sp < len(block) {
		if sp > br {
			self.buf.WriteByte(' ')
		}
		self.buf.Write(block[sp:])
	}

	// Close block with 2 breaks.
	self.buf.Write([]byte{'\n', '\n'})
}
Beispiel #22
0
/*
 * Invokes a MIME type plugin based on current node's type attribute, passing src attribute's value
 * as argument. Subcommand's output is piped to Gokogiri through a buffer.
 */
func (gen *Generator) handleMIMETypePlugin(e xml.Node, doc *html.HtmlDocument) (err error) {
	src := e.Attribute("src").Value()
	typ := e.Attribute("type").Value()
	cmd := exec.Command(fmt.Sprintf("m%s%s", ZAS_PREFIX, gen.resolveMIMETypePlugin(typ)), src)
	stdout, err := cmd.StdoutPipe()
	if err != nil {
		return
	}
	cmd.Stderr = os.Stderr
	c := make(chan bufErr)
	go func() {
		data, err := ioutil.ReadAll(stdout)
		c <- bufErr{data, err}
	}()
	if err = cmd.Start(); err != nil {
		return
	}
	be := <-c
	if err = cmd.Wait(); err != nil {
		return
	}
	if be.err != nil {
		return be.err
	}
	parent := e.Parent()
	child, err := doc.Coerce(be.buffer)
	if err != nil {
		return
	}
	parent.AddChild(child)
	e.Remove()
	return
}
Beispiel #23
0
// parseAppDiv extracts timestamp and blockindex from an appointment div
func parseAppDiv(div xml.Node) (timestamp int64, blockIndex string, err error) {

	idValues := idBlockPattern.FindStringSubmatch(div.Attr("id"))
	timestamp, err = strconv.ParseInt(idValues[1], 10, 64)
	if err != nil {
		return
	}

	blockIndexValues := blockIndexPattern.FindStringSubmatch(div.Content())
	if len(blockIndexValues) == 1 {
		blockIndex = blockIndexValues[0]
	}
	return
}
Beispiel #24
0
func processNode(node xml.Node, row string) {
	row = row + node.Attr("TEXT") + "|"
	kids, err := node.Search("node")
	if err != nil {
		log.Println("Error searching for node:", err)
		return
	}
	if len(kids) > 0 { // has children, not a leaf node
		for i := range kids {
			processNode(kids[i], row)
		}
	} else {
		fmt.Println(row) // print leaf node
	}
}
Beispiel #25
0
/*
  Not working because:
  http://www.sc2ratings.com/players.php?realname=Yang,%20Hee-Soo
  is parsed as:
  http://www.sc2ratings.com/players.php?realname=Yang, Hee-Soo
*/
func parseLeagues(player xml.Node) []string {
	out := []string{}
	partialUrl, err := player.Search(".//a/@href")
	errorHandler(err)
	if len(partialUrl) == 1 {
		playerPageUrl := "http://www.sc2ratings.com/" + partialUrl[0].String()
		playerPageSource := retrievePageSource(playerPageUrl)

		playerPage, err := gokogiri.ParseHtml(playerPageSource)
		errorHandler(err)
		defer playerPage.Free()
		fmt.Println(playerPage)
	}
	return out
}
Beispiel #26
0
func parseDates(m xml.Node) []*Date {
	dates := []*Date{}

	res, _ := m.Search("date")
	for _, n := range res {
		date := Date{Date: n.Content(), Event: n.Attr("event")}
		dates = append(dates, &date)
	}

	res, _ = m.Search("meta[@property='dcterms:modified']")
	if len(res) > 0 {
		date := Date{Date: res[0].Content(), Event: "modified"}
		dates = append(dates, &date)
	}

	return dates
}
Beispiel #27
0
func loadInputs(doc xml.Node) ([]*Input, error) {
	nodes, e := doc.Search(".//input")
	if e != nil {
		return nil, e
	}
	out := []*Input{}
	for _, n := range nodes {
		i := &Input{
			Type:    n.Attr("type"),
			Name:    n.Attr("name"),
			Value:   n.Attr("value"),
			Checked: n.Attr("checked") == "checked",
		}
		out = append(out, i)
	}
	return out, nil

}
Beispiel #28
0
// ProcessField method fetches data from passed document
func (f *Field) ProcessField(d *html.HtmlDocument) interface{} {
	var value interface{}
	var node xml.Node
	selector := xpath.Compile(f.Selector)
	result, _ := d.Root().Search(selector)

	if len(result) > 0 {
		node = result[0]
	} else {
		return ""
	}

	if f.Callback != nil {
		value = f.Callback(&node)
	} else {
		value = node.Content()
	}

	return value
}
Beispiel #29
0
func removeNonAnchorElements(node xml.Node) {
	var next xml.Node
	for n := node.FirstChild(); n != nil; n = next {
		next = n.NextSibling()

		if n.NodeType() == xml.XML_ELEMENT_NODE {
			removeNonAnchorElements(n)

			if strings.ToLower(n.Name()) != "a" {
				var chNext xml.Node
				for ch := n.FirstChild(); ch != nil; ch = chNext {
					chNext = ch.NextSibling()
					ch.Unlink()
					n.InsertBefore(ch)
				}
				n.Remove()
			}
		}
	}
}
Beispiel #30
0
func parseListApp(div xml.Node) (*Appointment, error) {

	values := idListPattern.FindStringSubmatch(div.Attr("id"))

	timestamp, err := strconv.ParseInt(values[1], 10, 64)
	if err != nil {
		return nil, err
	}

	practitioner, err := strconv.ParseInt(values[2], 10, 64)
	if err != nil {
		return nil, err
	}

	return &Appointment{
		session:      nil,
		Timestamp:    time.Unix(timestamp, 0),
		Practitioner: Practitioner(practitioner),
		Status:       Booked,
	}, nil
}