Example #1
0
// Convert returns a latex text body. It takes the html source and the url
// of the source to create a goquery document and work with that.
// Additionally, a codeLanguage to use as highlight for code blocks can be
// provided. A codeLanguage can be an empty string.
// When excludeFigures is true, figures are not included in the latex
// source.
func Convert(htmlStr string, articleUrl string, codeLang string, excludeFigures bool) string {
	doc := generateDoc(htmlStr, articleUrl)

	escapeDocLatexMetaChars(doc)
	convertDocUniCode(doc)
	unwrapDrop(doc)

	// wrap all html environments by corresponding latex environments.
	convertImages(doc, excludeFigures)
	convertVideos(doc)
	convertCode(doc, codeLang) // code blocks include latex metachars {,}
	convertFootnotes(doc)
	convertLinks(doc)
	convertHeading(doc)
	convertList(doc)
	convertQuotations(doc)

	// inside tex macros transform <br> linebreaks with \\ so they are handled
	// gracefully by the text engine
	wrapElementsAndDeleteLinebreak(doc, "em", "\\emph{", "}")
	wrapElementsAndDeleteLinebreak(doc, "i", "\\textit{", "}")
	wrapElementsAndDeleteLinebreak(doc, "b", "\\textbf{", "}")
	wrapElementsAndDeleteLinebreak(doc, "strong", "\\textbf{", "}")
	wrapElementsAndDeleteLinebreak(doc, "u", "\\underline{", "}")
	wrapElementsAndDeleteLinebreak(doc, "sup", "\\textsuperscript{", "}")
	wrapElementsAndDeleteLinebreak(doc, "sub", "\\textsubscript{", "}")
	wrapElementsAndDeleteLinebreak(doc, "strike", "\\sout{", "}")
	wrapElementsAndDeleteLinebreak(doc, "span[style=\"text-decoration: line-through\"]", "\\sout{", "}")

	// outside tex macros use double line break to separate paragraphs etc
	wrapElementsAndKeepLinebreak(doc, "p", "\n\n", "")
	wrapElementsAndKeepLinebreak(doc, "br", "", "\n\n")

	// When .Text() is called, the latex code survives.
	t := doc.Text()

	t = convertInlineMath(t)

	t = strtrans.LinebreaksToTwoLinebreaks(t)
	t = strdel.EmptyBrackets(t)
	t = deleteSpaceBeforeClosingBrackets(t)

	//Unicode(&t)

	return t

	/*
		sample := " talking to Amazon’s Web services"
		fmt.Printf("%+q\n", sample)
		h, _ := doc.Html()
		fmt.Println(t)
	*/
}
Example #2
0
func (a Article) Text() string {
	//
	// Titel -- Subtitle
	// =================
	// By Authors
	//
	// Abstract
	//
	// ---------------------
	// Content
	// ---------------------
	//
	// From Journal on Date
	// Source: Url

	// Title
	title := a.Title
	title += "\n"
	title += strings.Repeat("=", len(title)-1)
	title += "\n"

	// Authors
	authors := "By "
	for i, auth := range a.Authors {
		authors += auth
		if i == len(a.Authors)-1 {
			authors += "\n"
			break
		}
		authors += " and "
	}
	authors += "\n\n"

	// Abstract
	var abstract string
	if a.Abstract != "" {
		abstract = a.Abstract + "\n\n"
	}

	// Content
	tmp := "From " + a.Journal + " on " + a.Date
	content := strings.Repeat("-", len(tmp)) + "\n"
	content += a.PlainTextContent() + "\n\n"
	content += strings.Repeat("-", len(tmp)) + "\n\n"

	// Reference
	ref := "From " + a.Journal + " on " + a.Date + "\n"
	ref += "Source: " + a.Url

	return strtrans.LinebreaksToTwoLinebreaks(title + authors + abstract + content + ref)
}