func main() { // Show Usage info & exit if len(os.Args) == 1 || os.Args[1] == "-h" || os.Args[1] == "--help" { usage() os.Exit(0) } log.SetPrefix("html2md: ") src := "-" if len(os.Args) > 1 { src = os.Args[1] } dst := "-" if len(os.Args) > 2 { src = os.Args[2] } input, err := readerFrom(src) if err != nil { log.Fatal("Error while opening <in_file>", err) } output, err := writerFrom(dst) if err != nil { log.Fatal("Error while opening <out_file>", err) } // Read from input html, err := ioutil.ReadAll(input) if err != nil { log.Fatal("Error while reading <in_file>", err) } // Convert from HTML to MarkDown markdown := html2md.Convert(string(html)) markdown = normalize(markdown) // Write to output _, err = output.Write([]byte(markdown)) if err != nil { log.Fatal("Error while writing markdown to <out_file>", err) } }
// ParseOneProject 处理单个 project func (ProjectLogic) ParseOneProject(projectUrl string) error { if !strings.HasPrefix(projectUrl, "http") { projectUrl = OsChinaDomain + projectUrl } var ( doc *goquery.Document err error ) // 加上 ?fromerr=xfwefs,否则页面有 js 重定向 if doc, err = goquery.NewDocument(projectUrl + "?fromerr=xfwefs"); err != nil { return errors.New("goquery fetch " + projectUrl + " error:" + err.Error()) } // 标题 category := strings.TrimSpace(doc.Find(".Project .name").Text()) name := strings.TrimSpace(doc.Find(".Project .name u").Text()) if category == "" && name == "" { return errors.New("projectUrl:" + projectUrl + " category and name are empty") } tmpIndex := strings.LastIndex(category, name) if tmpIndex != -1 { category = category[:tmpIndex] } // uri uri := projectUrl[strings.LastIndex(projectUrl, "/")+1:] project := &model.OpenProject{} _, err = MasterDB.Where("uri=?", uri).Get(project) // 已经存在 if project.Id != 0 { logger.Infoln("url", projectUrl, "has exists!") return nil } logoSelection := doc.Find(".Project .PN img") if logoSelection.AttrOr("title", "") != "" { project.Logo = logoSelection.AttrOr("src", "") if !strings.HasPrefix(project.Logo, "http") { project.Logo = OsChinaDomain + project.Logo } project.Logo, err = DefaultUploader.TransferUrl(nil, project.Logo, ProjectLogoPrefix) if err != nil { logger.Errorln("project logo upload error:", err) } } // 获取项目相关链接 doc.Find("#Body .urls li").Each(func(i int, liSelection *goquery.Selection) { aSelection := liSelection.Find("a") uri := util.FetchRealUrl(OsChinaDomain + aSelection.AttrOr("href", "")) switch aSelection.Text() { case "软件首页": project.Home = uri case "软件文档": project.Doc = uri case "软件下载": project.Download = uri } }) ctime := time.Now() doc.Find("#Body .attrs li").Each(func(i int, liSelection *goquery.Selection) { aSelection := liSelection.Find("a") txt := aSelection.Text() if i == 0 { project.Licence = txt if txt == "未知" { project.Licence = "其他" } } else if i == 1 { project.Lang = txt } else if i == 2 { project.Os = txt } else if i == 3 { dtime, err := time.ParseInLocation("2006年01月02日", aSelection.Last().Text(), time.Local) if err != nil { logger.Errorln("parse ctime error:", err) } else { ctime = dtime.Local() } } }) project.Name = name project.Category = category project.Uri = uri project.Repo = strings.TrimSpace(doc.Find("#Body .github-widget").AttrOr("data-repo", "")) project.Src = "https://github.com/" + project.Repo pos := strings.Index(project.Repo, "/") if pos > -1 { project.Author = project.Repo[:pos] } else { project.Author = "网友" } if project.Doc == "" { // TODO:暂时认为一定是 Go 语言 project.Doc = "https://godoc.org/" + project.Src[8:] } desc := "" doc.Find("#Body .detail").Find("p").NextAll().Each(func(i int, domSelection *goquery.Selection) { doc.FindSelection(domSelection).WrapHtml(`<div id="tmp` + strconv.Itoa(i) + `"></div>`) domHtml, _ := doc.Find("#tmp" + strconv.Itoa(i)).Html() if domSelection.Is("pre") { desc += domHtml + "\n\n" } else { desc += html2md.Convert(domHtml) + "\n\n" } }) project.Desc = strings.TrimSpace(desc) project.Username = PresetUsernames[rand.Intn(4)] project.Status = model.ProjectStatusOnline project.Ctime = model.OftenTime(ctime) _, err = MasterDB.Insert(project) if err != nil { return errors.New("insert into open project error:" + err.Error()) } return nil }