Example #1
0
func CompareNode(originalNode, expectedNode *html.Node) *Error {
	err := &Error{
		Got:      h5.RenderNodesToString([]*html.Node{originalNode}),
		Expected: h5.RenderNodesToString([]*html.Node{expectedNode}),
	}
	if originalNode.Type != expectedNode.Type {
		err.Description = "Node type does not match"
		return err
	}
	if originalNode.Data != expectedNode.Data {
		err.Description = "Nodes data does not match"
		return err
	}
	for _, attr := range expectedNode.Attr {
		attrFound := false
		attrValueSame := false
		for _, originalAttr := range originalNode.Attr {
			if originalAttr.Key == attr.Key {
				attrFound = true
				if originalAttr.Val == attr.Val {
					attrValueSame = true
				} else {
					if attr.Key == "class" {
						attrValueSame = equalWithSeparator(originalAttr.Val, attr.Val, " ")
					}
					if attr.Key == "style" {
						attrValueSame = equalWithSeparator(originalAttr.Val, attr.Val, ";")
					}
				}
			}
		}
		if !attrFound {
			err.Description = fmt.Sprintf("Attribute %v not found in node", attr.Key)
			return err
		}
		if !attrValueSame {
			err.Description = fmt.Sprintf("Attribute %v value is different", attr.Key)
			return err
		}
	}
	if len(originalNode.Attr) != len(expectedNode.Attr) {
		err.Description = "Different number of node attributes"
		return err
	}
	return CompareNodes(h5.Children(originalNode), h5.Children(expectedNode))
}
Example #2
0
func getTagValueAsString(node *html.Node, selector string) string {
	s, _ := css.Selector(selector)
	nodes := s.Find(node)
	if len(nodes) > 0 {
		if children := h5.Children(nodes[0]); len(children) > 0 {
			return children[0].Data
		}
	}
	return ""
}
Example #3
0
func extractNodeString(n *html.Node) string {
	if n.Type == html.TextNode {
		return n.Data
	} else {
		nodes := h5.Children(n)
		buff := make([]string, len(nodes))
		for i, n := range nodes {
			buff[i] = extractNodeString(n)
		}
		return strings.Join(buff, "")
	}
}
Example #4
0
func rewriteBody(containerSelector string, dest io.Writer, body string) (err error) {
	if containerSelector == "" {
		dest.Write([]byte(body))
		return
	}

	var chain *selector.Chain
	var document *h5.Tree

	if document, err = h5.NewFromString(body); err != nil {
		err = fmt.Errorf("invalid html document: %v", err)
		return
	}

	var titleNode string
	if titleNode, err = getTitleNode(document); err != nil {
		return
	}

	if chain, err = selector.Selector(containerSelector); err != nil {
		err = fmt.Errorf("invalid css: %v", containerSelector)
		return
	}

	if matches := chain.Find(document.Top()); len(matches) > 0 {
		match := matches[0:1] // Take only the first match
		newBody := h5.RenderNodesToString(h5.Children(match[0]))

		fmt.Printf("data: %v", h5.Data(match[0]))

		dest.Write([]byte(titleNode))
		dest.Write([]byte(newBody))
		return
	}

	err = fmt.Errorf("container not found")
	return
}
Example #5
0
func (importer *HPEventImporter) Import(root *html.Node) ([]*event.Show, error) {
	groupName := getGroupKeyword(root)
	s, _ := selector.Selector("#concert_schedule tr")
	nodes := s.Find(root)
	if len(nodes) == 0 {
		return nil, nil
	}
	var idxDate, idxVenue, idxOpen, idxStart int

	dateBucket := make(map[string][]*event.Show)
	shows := make([]*event.Show, len(nodes)-1)
	for i, row := range nodes {
		columns := h5.Children(row)
		if i == 0 { // header row
			for j, col := range columns {
				val := extractNodeString(col)
				switch val {
				case "日程":
					idxDate = j
					break
				case "会場":
					idxVenue = j
					break
				case "開場":
					idxOpen = j
					break
				case "開演":
					idxStart = j
					break
				}
			}
		} else { // show row
			s := &event.Show{}
			s.VenueName = parseVenue(extractNodeString(columns[idxVenue]))
			datestr := extractNodeString(columns[idxDate])
			s.OpenAt = parseDate(datestr, extractNodeString(columns[idxOpen]))
			s.StartAt = parseDate(datestr, extractNodeString(columns[idxStart]))
			s.CreatedAt = time.Now()
			s.UpdatedAt = s.CreatedAt
			shows[i-1] = s
			if bucket, ok := dateBucket[datestr]; !ok {
				dateBucket[datestr] = []*event.Show{s}
			} else {
				dateBucket[datestr] = append(bucket, s)
			}
		}
	}
	// Set Keywords
	for _, bucket := range dateBucket {
		numShowsInDay := len(bucket)
		if numShowsInDay == 1 {
			startAt := bucket[0].StartAt
			bucket[0].YAKeyword = fmt.Sprintf(
				"%d/%d %s",
				startAt.Month(),
				startAt.Day(),
				groupName,
			)
		} else {
			for _, show := range bucket {
				startAt := show.StartAt
				timeKey := ""
				if startAt.Hour() < 12 {
					timeKey = "朝"
				} else if startAt.Hour() > 17 {
					timeKey = "夜"
				} else {
					timeKey = "昼"
				}
				show.YAKeyword = fmt.Sprintf(
					"%d/%d %s %s",
					startAt.Month(),
					startAt.Day(),
					timeKey,
					groupName,
				)
			}
		}
	}
	return shows, nil
}