func CompareNode(originalNode, expectedNode *html.Node) *Error { err := &Error{ Got: h5.RenderNodesToString([]*html.Node{originalNode}), Expected: h5.RenderNodesToString([]*html.Node{expectedNode}), } if originalNode.Type != expectedNode.Type { err.Description = "Node type does not match" return err } if originalNode.Data != expectedNode.Data { err.Description = "Nodes data does not match" return err } for _, attr := range expectedNode.Attr { attrFound := false attrValueSame := false for _, originalAttr := range originalNode.Attr { if originalAttr.Key == attr.Key { attrFound = true if originalAttr.Val == attr.Val { attrValueSame = true } else { if attr.Key == "class" { attrValueSame = equalWithSeparator(originalAttr.Val, attr.Val, " ") } if attr.Key == "style" { attrValueSame = equalWithSeparator(originalAttr.Val, attr.Val, ";") } } } } if !attrFound { err.Description = fmt.Sprintf("Attribute %v not found in node", attr.Key) return err } if !attrValueSame { err.Description = fmt.Sprintf("Attribute %v value is different", attr.Key) return err } } if len(originalNode.Attr) != len(expectedNode.Attr) { err.Description = "Different number of node attributes" return err } return CompareNodes(h5.Children(originalNode), h5.Children(expectedNode)) }
func getTagValueAsString(node *html.Node, selector string) string { s, _ := css.Selector(selector) nodes := s.Find(node) if len(nodes) > 0 { if children := h5.Children(nodes[0]); len(children) > 0 { return children[0].Data } } return "" }
func extractNodeString(n *html.Node) string { if n.Type == html.TextNode { return n.Data } else { nodes := h5.Children(n) buff := make([]string, len(nodes)) for i, n := range nodes { buff[i] = extractNodeString(n) } return strings.Join(buff, "") } }
func rewriteBody(containerSelector string, dest io.Writer, body string) (err error) { if containerSelector == "" { dest.Write([]byte(body)) return } var chain *selector.Chain var document *h5.Tree if document, err = h5.NewFromString(body); err != nil { err = fmt.Errorf("invalid html document: %v", err) return } var titleNode string if titleNode, err = getTitleNode(document); err != nil { return } if chain, err = selector.Selector(containerSelector); err != nil { err = fmt.Errorf("invalid css: %v", containerSelector) return } if matches := chain.Find(document.Top()); len(matches) > 0 { match := matches[0:1] // Take only the first match newBody := h5.RenderNodesToString(h5.Children(match[0])) fmt.Printf("data: %v", h5.Data(match[0])) dest.Write([]byte(titleNode)) dest.Write([]byte(newBody)) return } err = fmt.Errorf("container not found") return }
func (importer *HPEventImporter) Import(root *html.Node) ([]*event.Show, error) { groupName := getGroupKeyword(root) s, _ := selector.Selector("#concert_schedule tr") nodes := s.Find(root) if len(nodes) == 0 { return nil, nil } var idxDate, idxVenue, idxOpen, idxStart int dateBucket := make(map[string][]*event.Show) shows := make([]*event.Show, len(nodes)-1) for i, row := range nodes { columns := h5.Children(row) if i == 0 { // header row for j, col := range columns { val := extractNodeString(col) switch val { case "日程": idxDate = j break case "会場": idxVenue = j break case "開場": idxOpen = j break case "開演": idxStart = j break } } } else { // show row s := &event.Show{} s.VenueName = parseVenue(extractNodeString(columns[idxVenue])) datestr := extractNodeString(columns[idxDate]) s.OpenAt = parseDate(datestr, extractNodeString(columns[idxOpen])) s.StartAt = parseDate(datestr, extractNodeString(columns[idxStart])) s.CreatedAt = time.Now() s.UpdatedAt = s.CreatedAt shows[i-1] = s if bucket, ok := dateBucket[datestr]; !ok { dateBucket[datestr] = []*event.Show{s} } else { dateBucket[datestr] = append(bucket, s) } } } // Set Keywords for _, bucket := range dateBucket { numShowsInDay := len(bucket) if numShowsInDay == 1 { startAt := bucket[0].StartAt bucket[0].YAKeyword = fmt.Sprintf( "%d/%d %s", startAt.Month(), startAt.Day(), groupName, ) } else { for _, show := range bucket { startAt := show.StartAt timeKey := "" if startAt.Hour() < 12 { timeKey = "朝" } else if startAt.Hour() > 17 { timeKey = "夜" } else { timeKey = "昼" } show.YAKeyword = fmt.Sprintf( "%d/%d %s %s", startAt.Month(), startAt.Day(), timeKey, groupName, ) } } } return shows, nil }