Esempio n. 1
0
func main() {

	// Show Usage info & exit
	if len(os.Args) == 1 || os.Args[1] == "-h" || os.Args[1] == "--help" {
		usage()
		os.Exit(0)
	}

	log.SetPrefix("html2md: ")

	src := "-"
	if len(os.Args) > 1 {
		src = os.Args[1]
	}

	dst := "-"
	if len(os.Args) > 2 {
		src = os.Args[2]
	}

	input, err := readerFrom(src)
	if err != nil {
		log.Fatal("Error while opening <in_file>", err)
	}

	output, err := writerFrom(dst)
	if err != nil {
		log.Fatal("Error while opening <out_file>", err)
	}

	// Read from input
	html, err := ioutil.ReadAll(input)
	if err != nil {
		log.Fatal("Error while reading <in_file>", err)
	}

	// Convert from HTML to MarkDown
	markdown := html2md.Convert(string(html))
	markdown = normalize(markdown)

	// Write to output
	_, err = output.Write([]byte(markdown))
	if err != nil {
		log.Fatal("Error while writing markdown to <out_file>", err)
	}
}
Esempio n. 2
0
// ParseOneProject 处理单个 project
func (ProjectLogic) ParseOneProject(projectUrl string) error {
	if !strings.HasPrefix(projectUrl, "http") {
		projectUrl = OsChinaDomain + projectUrl
	}

	var (
		doc *goquery.Document
		err error
	)

	// 加上 ?fromerr=xfwefs,否则页面有 js 重定向
	if doc, err = goquery.NewDocument(projectUrl + "?fromerr=xfwefs"); err != nil {
		return errors.New("goquery fetch " + projectUrl + " error:" + err.Error())
	}

	// 标题
	category := strings.TrimSpace(doc.Find(".Project .name").Text())
	name := strings.TrimSpace(doc.Find(".Project .name u").Text())
	if category == "" && name == "" {
		return errors.New("projectUrl:" + projectUrl + " category and name are empty")
	}

	tmpIndex := strings.LastIndex(category, name)
	if tmpIndex != -1 {
		category = category[:tmpIndex]
	}

	// uri
	uri := projectUrl[strings.LastIndex(projectUrl, "/")+1:]

	project := &model.OpenProject{}

	_, err = MasterDB.Where("uri=?", uri).Get(project)
	// 已经存在
	if project.Id != 0 {
		logger.Infoln("url", projectUrl, "has exists!")
		return nil
	}

	logoSelection := doc.Find(".Project .PN img")
	if logoSelection.AttrOr("title", "") != "" {
		project.Logo = logoSelection.AttrOr("src", "")

		if !strings.HasPrefix(project.Logo, "http") {
			project.Logo = OsChinaDomain + project.Logo
		}

		project.Logo, err = DefaultUploader.TransferUrl(nil, project.Logo, ProjectLogoPrefix)
		if err != nil {
			logger.Errorln("project logo upload error:", err)
		}
	}

	// 获取项目相关链接
	doc.Find("#Body .urls li").Each(func(i int, liSelection *goquery.Selection) {
		aSelection := liSelection.Find("a")
		uri := util.FetchRealUrl(OsChinaDomain + aSelection.AttrOr("href", ""))
		switch aSelection.Text() {
		case "软件首页":
			project.Home = uri
		case "软件文档":
			project.Doc = uri
		case "软件下载":
			project.Download = uri
		}
	})

	ctime := time.Now()
	doc.Find("#Body .attrs li").Each(func(i int, liSelection *goquery.Selection) {
		aSelection := liSelection.Find("a")
		txt := aSelection.Text()
		if i == 0 {
			project.Licence = txt
			if txt == "未知" {
				project.Licence = "其他"
			}
		} else if i == 1 {
			project.Lang = txt
		} else if i == 2 {
			project.Os = txt
		} else if i == 3 {
			dtime, err := time.ParseInLocation("2006年01月02日", aSelection.Last().Text(), time.Local)
			if err != nil {
				logger.Errorln("parse ctime error:", err)
			} else {
				ctime = dtime.Local()
			}
		}
	})

	project.Name = name
	project.Category = category
	project.Uri = uri
	project.Repo = strings.TrimSpace(doc.Find("#Body .github-widget").AttrOr("data-repo", ""))
	project.Src = "https://github.com/" + project.Repo

	pos := strings.Index(project.Repo, "/")
	if pos > -1 {
		project.Author = project.Repo[:pos]
	} else {
		project.Author = "网友"
	}

	if project.Doc == "" {
		// TODO:暂时认为一定是 Go 语言
		project.Doc = "https://godoc.org/" + project.Src[8:]
	}

	desc := ""
	doc.Find("#Body .detail").Find("p").NextAll().Each(func(i int, domSelection *goquery.Selection) {
		doc.FindSelection(domSelection).WrapHtml(`<div id="tmp` + strconv.Itoa(i) + `"></div>`)
		domHtml, _ := doc.Find("#tmp" + strconv.Itoa(i)).Html()
		if domSelection.Is("pre") {
			desc += domHtml + "\n\n"
		} else {
			desc += html2md.Convert(domHtml) + "\n\n"
		}
	})

	project.Desc = strings.TrimSpace(desc)
	project.Username = PresetUsernames[rand.Intn(4)]
	project.Status = model.ProjectStatusOnline
	project.Ctime = model.OftenTime(ctime)

	_, err = MasterDB.Insert(project)
	if err != nil {
		return errors.New("insert into open project error:" + err.Error())
	}

	return nil
}