Ejemplo n.º 1
0
// Writes code blocks to the buffer.
func (self *Formatter) writeCodeBlock(node xml.Node) {
	block := []byte(strings.Trim(node.Content(), "\n\r\v"))
	node.SetContent("")

	if len(block) == 0 {
		return
	}
	self.buf.Write(block)
	self.buf.Write([]byte{'\n', '\n'})
}
Ejemplo n.º 2
0
func linkDensity(node xml.Node) float64 {
	links, err := node.Search("a")
	if err != nil {
		return 0.0
	}

	llength := 0.0
	for _, link := range links {
		llength += float64(len(link.Content()))
	}
	tlength := float64(len(node.Content()))
	return llength / tlength
}
Ejemplo n.º 3
0
// Writes text blocks to the buffer.
func (self *Formatter) writeBlock(node xml.Node, prefix string) {
	block := []byte(strings.TrimSpace(node.Content()))
	node.SetContent("")

	if len(block) == 0 {
		return
	}
	// Position of last space, line break and max length.
	sp, br, max := 0, 0, 79-len(prefix)
	self.buf.WriteString(prefix)

	for i, c := range block {
		// Break line if exceeded max length and the position of the last space
		// is greater than the position of the last line break. Don't break very
		// long words.
		if i-br > max && sp > br {
			self.buf.WriteByte('\n')
			br = sp
			// Only the first line is prefixed.
			for j := 0; j < len(prefix); j++ {
				self.buf.WriteByte(' ')
			}
		}
		if whitespace[c] {
			// The last character was a space, so ignore this one.
			if sp == i {
				sp++
				br++
				continue
			}
			// Write the last word to the buffer, append a space and update
			// the position of the last space.
			if sp > br {
				self.buf.WriteByte(' ')
			}
			self.buf.Write(block[sp:i])
			sp = i + 1
		}
	}

	// Write the last word to the buffer.
	if sp < len(block) {
		if sp > br {
			self.buf.WriteByte(' ')
		}
		self.buf.Write(block[sp:])
	}

	// Close block with 2 breaks.
	self.buf.Write([]byte{'\n', '\n'})
}
Ejemplo n.º 4
0
// parseAppDiv extracts timestamp and blockindex from an appointment div
func parseAppDiv(div xml.Node) (timestamp int64, blockIndex string, err error) {

	idValues := idBlockPattern.FindStringSubmatch(div.Attr("id"))
	timestamp, err = strconv.ParseInt(idValues[1], 10, 64)
	if err != nil {
		return
	}

	blockIndexValues := blockIndexPattern.FindStringSubmatch(div.Content())
	if len(blockIndexValues) == 1 {
		blockIndex = blockIndexValues[0]
	}
	return
}
Ejemplo n.º 5
0
// ProcessField method fetches data from passed document
func (f *Field) ProcessField(d *html.HtmlDocument) interface{} {
	var value interface{}
	var node xml.Node
	selector := xpath.Compile(f.Selector)
	result, _ := d.Root().Search(selector)

	if len(result) > 0 {
		node = result[0]
	} else {
		return ""
	}

	if f.Callback != nil {
		value = f.Callback(&node)
	} else {
		value = node.Content()
	}

	return value
}
Ejemplo n.º 6
0
// Formats the content of inline elements and writes the content of block
// elements to the buffer.
func (self *Formatter) handleNode(node xml.Node) {
	name := node.Name()

	switch {
	case ignore[name]:
		// Remove ignored elements.
		node.SetContent("")
	case name == "pre":
		// Treat pre elements as code blocks.
		self.writeCodeBlock(node)
	case heading[name]:
		// Headings are prefixed with "# ".
		self.writeBlock(node, "# ")
	case name == "li":
		// List items are prefixed with "- ".
		self.writeBlock(node, "- ")
	case name == "br":
		// Preserve explicit line breaks.
		node.SetContent("\n")
	case italic[name]:
		// Wrap italic elements with /.
		node.SetContent("/" + node.Content() + "/")
	case bold[name]:
		// Wrap bold elements with *.
		node.SetContent("*" + node.Content() + "*")
	case name == "img":
		// Collect the src of images and replace them with (alt)[url index]
		alt, src := node.Attr("alt"), node.Attr("src")

		if len(alt) > 0 && len(src) > 0 {
			node.SetContent(fmt.Sprintf("(%s)[%d]", alt, len(self.links)))
			self.links = append(self.links, src)
		}
	case name == "a":
		// Collect the href and and the url index.
		href, content := node.Attr("href"), node.Content()

		if len(href) > 0 && len(content) > 0 {
			node.SetContent(fmt.Sprintf("%s[%d]", content, len(self.links)))
			self.links = append(self.links, href)
		}
	case block[name]:
		// Write the content of block elements to the buffer.
		self.writeBlock(node, "")
	}
}
Ejemplo n.º 7
0
func (p *Decoder) unmarshal(val reflect.Value, start gokoxml.Node) error {
	// Find first xml node.
	if start == nil {
		start = p.doc.Root().XmlNode
	}

	// Unpacks a pointer
	if pv := val; pv.Kind() == reflect.Ptr {
		if pv.IsNil() {
			pv.Set(reflect.New(pv.Type().Elem()))
		}
		val = pv.Elem()
	}

	var (
		sv    reflect.Value
		tinfo *typeInfo
		err   error
	)

	switch v := val; v.Kind() {
	default:
		return errors.New("unknown type " + v.Type().String())

		// TODO: Implement this once i understand Skip()
		// case reflect.Interface:
		// 	return p.Skip()

	case reflect.Slice:
		typ := v.Type()
		if typ.Elem().Kind() == reflect.Uint8 {
			// []byte
			if err := copyValue(v, start.Content()); err != nil {
				return err
			}
			break
		}

		// Slice of element values.
		// Grow slice.
		n := v.Len()
		if n >= v.Cap() {
			ncap := 2 * n
			if ncap < 4 {
				ncap = 4
			}
			new := reflect.MakeSlice(typ, n, ncap)
			reflect.Copy(new, v)
			v.Set(new)
		}
		v.SetLen(n + 1)

		// Recur to read element into slice.
		if err := p.unmarshal(v.Index(n), start); err != nil {
			v.SetLen(n)
			return err
		}
		return nil

	case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String:
		if err := copyValue(v, start.Content()); err != nil {
			return err
		}

	case reflect.Struct:
		typ := v.Type()
		if typ == nameType {
			v.Set(reflect.ValueOf(xml.Name{Local: start.Name()}))
			break
		}
		if typ == timeType {
			if err := copyValue(v, start.Content()); err != nil {
				return err
			}
			break
		}

		sv = v
		tinfo, err = getTypeInfo(typ)
		if err != nil {
			return err
		}

		// Validate and assign element name.
		if tinfo.xmlname != nil {
			// var space string
			finfo := tinfo.xmlname
			if finfo.name != "" && finfo.name != start.Name() {
				return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name() + ">")
			}

			fv := sv.FieldByIndex(finfo.idx)
			if _, ok := fv.Interface().(xml.Name); ok {
				fv.Set(reflect.ValueOf(xml.Name{Local: start.Name()}))
			}
		}

		var saveComment reflect.Value
		var doSaveComment = false
		_ = saveComment

		for i := range tinfo.fields {
			finfo := &tinfo.fields[i]
			switch finfo.flags & fMode {
			case fAttr:
				strv := sv.FieldByIndex(finfo.idx)
				for name, a := range start.Attributes() {
					if name == finfo.name {
						copyValue(strv, a.Content())
					}
				}
			case fCharData:
				strv := sv.FieldByIndex(finfo.idx)
				copyValue(strv, start.Content())

			case fInnerXml:
				strv := sv.FieldByIndex(finfo.idx)
				// TODO: Not sure why i need to call FirstChild() here.
				copyValue(strv, start.FirstChild().String())

			case fComment:
				if !doSaveComment {
					doSaveComment = true
					saveComment = sv.FieldByIndex(finfo.idx)
				}
			}
		}

		for cur_node := start.FirstChild(); cur_node != nil; cur_node = cur_node.NextSibling() {
			if sv.IsValid() {
				if cur_node.NodeType() != gokoxml.XML_ELEMENT_NODE {
					if doSaveComment && cur_node.NodeType() == gokoxml.XML_COMMENT_NODE {
						copyValue(saveComment, cur_node.Content())
					}
					continue
				}

				err = p.unmarshalPath(tinfo, sv, nil, cur_node)
				if err != nil {
					return err
				}
			}
		}

	} // switch v := val; v.Kind() {

	return nil
}