// Writes code blocks to the buffer. func (self *Formatter) writeCodeBlock(node xml.Node) { block := []byte(strings.Trim(node.Content(), "\n\r\v")) node.SetContent("") if len(block) == 0 { return } self.buf.Write(block) self.buf.Write([]byte{'\n', '\n'}) }
func linkDensity(node xml.Node) float64 { links, err := node.Search("a") if err != nil { return 0.0 } llength := 0.0 for _, link := range links { llength += float64(len(link.Content())) } tlength := float64(len(node.Content())) return llength / tlength }
// Writes text blocks to the buffer. func (self *Formatter) writeBlock(node xml.Node, prefix string) { block := []byte(strings.TrimSpace(node.Content())) node.SetContent("") if len(block) == 0 { return } // Position of last space, line break and max length. sp, br, max := 0, 0, 79-len(prefix) self.buf.WriteString(prefix) for i, c := range block { // Break line if exceeded max length and the position of the last space // is greater than the position of the last line break. Don't break very // long words. if i-br > max && sp > br { self.buf.WriteByte('\n') br = sp // Only the first line is prefixed. for j := 0; j < len(prefix); j++ { self.buf.WriteByte(' ') } } if whitespace[c] { // The last character was a space, so ignore this one. if sp == i { sp++ br++ continue } // Write the last word to the buffer, append a space and update // the position of the last space. if sp > br { self.buf.WriteByte(' ') } self.buf.Write(block[sp:i]) sp = i + 1 } } // Write the last word to the buffer. if sp < len(block) { if sp > br { self.buf.WriteByte(' ') } self.buf.Write(block[sp:]) } // Close block with 2 breaks. self.buf.Write([]byte{'\n', '\n'}) }
// parseAppDiv extracts timestamp and blockindex from an appointment div func parseAppDiv(div xml.Node) (timestamp int64, blockIndex string, err error) { idValues := idBlockPattern.FindStringSubmatch(div.Attr("id")) timestamp, err = strconv.ParseInt(idValues[1], 10, 64) if err != nil { return } blockIndexValues := blockIndexPattern.FindStringSubmatch(div.Content()) if len(blockIndexValues) == 1 { blockIndex = blockIndexValues[0] } return }
// ProcessField method fetches data from passed document func (f *Field) ProcessField(d *html.HtmlDocument) interface{} { var value interface{} var node xml.Node selector := xpath.Compile(f.Selector) result, _ := d.Root().Search(selector) if len(result) > 0 { node = result[0] } else { return "" } if f.Callback != nil { value = f.Callback(&node) } else { value = node.Content() } return value }
// Formats the content of inline elements and writes the content of block // elements to the buffer. func (self *Formatter) handleNode(node xml.Node) { name := node.Name() switch { case ignore[name]: // Remove ignored elements. node.SetContent("") case name == "pre": // Treat pre elements as code blocks. self.writeCodeBlock(node) case heading[name]: // Headings are prefixed with "# ". self.writeBlock(node, "# ") case name == "li": // List items are prefixed with "- ". self.writeBlock(node, "- ") case name == "br": // Preserve explicit line breaks. node.SetContent("\n") case italic[name]: // Wrap italic elements with /. node.SetContent("/" + node.Content() + "/") case bold[name]: // Wrap bold elements with *. node.SetContent("*" + node.Content() + "*") case name == "img": // Collect the src of images and replace them with (alt)[url index] alt, src := node.Attr("alt"), node.Attr("src") if len(alt) > 0 && len(src) > 0 { node.SetContent(fmt.Sprintf("(%s)[%d]", alt, len(self.links))) self.links = append(self.links, src) } case name == "a": // Collect the href and and the url index. href, content := node.Attr("href"), node.Content() if len(href) > 0 && len(content) > 0 { node.SetContent(fmt.Sprintf("%s[%d]", content, len(self.links))) self.links = append(self.links, href) } case block[name]: // Write the content of block elements to the buffer. self.writeBlock(node, "") } }
func (p *Decoder) unmarshal(val reflect.Value, start gokoxml.Node) error { // Find first xml node. if start == nil { start = p.doc.Root().XmlNode } // Unpacks a pointer if pv := val; pv.Kind() == reflect.Ptr { if pv.IsNil() { pv.Set(reflect.New(pv.Type().Elem())) } val = pv.Elem() } var ( sv reflect.Value tinfo *typeInfo err error ) switch v := val; v.Kind() { default: return errors.New("unknown type " + v.Type().String()) // TODO: Implement this once i understand Skip() // case reflect.Interface: // return p.Skip() case reflect.Slice: typ := v.Type() if typ.Elem().Kind() == reflect.Uint8 { // []byte if err := copyValue(v, start.Content()); err != nil { return err } break } // Slice of element values. // Grow slice. n := v.Len() if n >= v.Cap() { ncap := 2 * n if ncap < 4 { ncap = 4 } new := reflect.MakeSlice(typ, n, ncap) reflect.Copy(new, v) v.Set(new) } v.SetLen(n + 1) // Recur to read element into slice. if err := p.unmarshal(v.Index(n), start); err != nil { v.SetLen(n) return err } return nil case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr, reflect.String: if err := copyValue(v, start.Content()); err != nil { return err } case reflect.Struct: typ := v.Type() if typ == nameType { v.Set(reflect.ValueOf(xml.Name{Local: start.Name()})) break } if typ == timeType { if err := copyValue(v, start.Content()); err != nil { return err } break } sv = v tinfo, err = getTypeInfo(typ) if err != nil { return err } // Validate and assign element name. if tinfo.xmlname != nil { // var space string finfo := tinfo.xmlname if finfo.name != "" && finfo.name != start.Name() { return UnmarshalError("expected element type <" + finfo.name + "> but have <" + start.Name() + ">") } fv := sv.FieldByIndex(finfo.idx) if _, ok := fv.Interface().(xml.Name); ok { fv.Set(reflect.ValueOf(xml.Name{Local: start.Name()})) } } var saveComment reflect.Value var doSaveComment = false _ = saveComment for i := range tinfo.fields { finfo := &tinfo.fields[i] switch finfo.flags & fMode { case fAttr: strv := sv.FieldByIndex(finfo.idx) for name, a := range start.Attributes() { if name == finfo.name { copyValue(strv, a.Content()) } } case fCharData: strv := sv.FieldByIndex(finfo.idx) copyValue(strv, start.Content()) case fInnerXml: strv := sv.FieldByIndex(finfo.idx) // TODO: Not sure why i need to call FirstChild() here. copyValue(strv, start.FirstChild().String()) case fComment: if !doSaveComment { doSaveComment = true saveComment = sv.FieldByIndex(finfo.idx) } } } for cur_node := start.FirstChild(); cur_node != nil; cur_node = cur_node.NextSibling() { if sv.IsValid() { if cur_node.NodeType() != gokoxml.XML_ELEMENT_NODE { if doSaveComment && cur_node.NodeType() == gokoxml.XML_COMMENT_NODE { copyValue(saveComment, cur_node.Content()) } continue } err = p.unmarshalPath(tinfo, sv, nil, cur_node) if err != nil { return err } } } } // switch v := val; v.Kind() { return nil }