func main() { if len(os.Args) < 2 { log.Fatalf("Usage: %s TOPIC", os.Args[0]) } url := "http://l1sp.org/" + os.Args[1] request, err := http.NewRequest("GET", url, nil) if err != nil { log.Fatal(err) } client := &http.Client{} resp, err := client.Do(request) if err != nil { log.Fatal(err) } defer resp.Body.Close() contents, err := ioutil.ReadAll(resp.Body) if err != nil { log.Fatal(err) } text, err := html2text.FromString(string(contents)) if err != nil { log.Fatal(err) } fmt.Println(text) }
func (bt *berthaTransformer) authorToPerson(a author) (person, error) { uuid := uuid.NewMD5(uuid.UUID{}, []byte(a.TmeIdentifier)).String() plainDescription, err := html2text.FromString(a.Biography) if err != nil { return person{}, err } altIds := alternativeIdentifiers{ UUIDS: []string{uuid}, TME: []string{a.TmeIdentifier}, } p := person{ Uuid: uuid, Name: a.Name, PrefLabel: a.Name, EmailAddress: a.Email, TwitterHandle: a.TwitterHandle, FacebookProfile: a.FacebookProfile, LinkedinProfile: a.LinkedinProfile, Description: plainDescription, DescriptionXML: a.Biography, ImageUrl: a.ImageUrl, AlternativeIdentifiers: altIds, } return p, err }
func (self *Wiki) IndexArticle(path string, article *Article) error { html, err := article.GetHtml() if err != nil { return err } text, err := html2text.FromString(html) if err != nil { return err } self.Index.Index(path, ArticleData{Name: article.Name, Content: text}) return nil }
func completeTags(episode *Episode) { logger.Debug.Println("Tag update : " + episode.Podcast.feedPodcast.Title + " - " + episode.feedEpisode.Title + " : " + episode.file()) tag, err := taglib.Read(episode.file()) if err != nil { logger.Warning.Println("Cannot complete episode tags for "+episode.Podcast.feedPodcast.Title+" - "+episode.feedEpisode.Title, err) return } defer tag.Close() var replaceArtist string if episode.feedEpisode.Author.Name != "" { replaceArtist = episode.feedEpisode.Author.Name } else { replaceArtist = episode.Podcast.feedPodcast.Title } //use the podcast title for now replaceArtist = episode.Podcast.feedPodcast.Title completeTag(taglib.Artist, replaceArtist, tag) completeTag(taglib.Album, episode.Podcast.feedPodcast.Title, tag) plaintextDescription, err := html2text.FromString(episode.feedEpisode.Description) if err == nil { episode.feedEpisode.Description = plaintextDescription } if len(episode.feedEpisode.Description) > maxCommentSize+5 { episode.feedEpisode.Description = episode.feedEpisode.Description[:maxCommentSize] + " ..." } completeTag(taglib.Comments, episode.feedEpisode.Description, tag) completeTag(taglib.Title, episode.feedEpisode.Title+" "+episode.formattedPubDate(dateFormat), tag) completeTag(taglib.Genre, "Podcast", tag) pubdate, err := episode.feedEpisode.ParsedPubDate() if err == nil { completeTag(taglib.Year, strconv.Itoa(pubdate.Year()), tag) } logger.Debug.Println("Tag Write Start for : " + episode.file()) err = tag.Save() //setAlbumArtist(episode.Podcast.feedPodcast.Title, episode.file()) logger.Debug.Println("Tag Write End for : " + episode.file()) if err != nil { logger.Warning.Println(episode.Podcast.feedPodcast.Title+" - "+episode.feedEpisode.Title+" : Cannot save the modified tags", err) } logger.Debug.Println("Tag update END : " + episode.Podcast.feedPodcast.Title + " - " + episode.feedEpisode.Title) }
//formatMessage make into text if contains html func formatMessage(message string) string { isHTML := strings.Contains(message, "<html>") if isHTML { text, err := html2text.FromString(message) if err != nil { log.Println("failed to convert to text " + err.Error()) } return text } return message }
// NewMessageFrom creates new mail message object with custom From header. func NewMessageFrom(to []string, from, subject, htmlBody string) *Message { msg := gomail.NewMessage() msg.SetHeader("From", from) msg.SetHeader("To", to...) msg.SetHeader("Subject", subject) msg.SetDateHeader("Date", time.Now()) body, err := html2text.FromString(htmlBody) if err != nil { log.Error(4, "html2text.FromString: %v", err) msg.SetBody("text/html", htmlBody) } else { msg.SetBody("text/plain", body) if setting.MailService.EnableHTMLAlternative { msg.AddAlternative("text/html", htmlBody) } } return &Message{ Message: msg, } }
func (c *Context) View(i int) { var email protocol.Encrypted request, err := http.NewRequest("GET", fmt.Sprintf("%v/inbox/%v", c.host, i), nil) request.SetBasicAuth("user", c.password) response, err := c.client.Do(request) if err != nil { log.Fatal(err) } if response.StatusCode == http.StatusOK { data, err := ioutil.ReadAll(response.Body) response.Body.Close() if err != nil { log.Fatal(err) } err = proto.Unmarshal(data, &email) if err != nil { log.Fatal(err) } key, err := rsa.DecryptPKCS1v15(rand.Reader, &c.key, email.Key) if err != nil { log.Fatal(err) } cipher, err := aes.NewCipher(key) if err != nil { log.Fatal(err) } cipher.Decrypt(email.Data, email.Data) decrypted := protocol.Email{} err = proto.Unmarshal(email.Data, &decrypted) if err != nil { log.Fatal(err) } process := func(mediaType string, body []byte) { fmt.Println(mediaType) if mediaType == "text/plain" { fmt.Println(string(body)) } else if mediaType == "text/html" { text, err := html2text.FromString(string(body)) if err != nil { log.Fatal(err) } fmt.Println(text) } } message, err := goemail.ParseMessage(strings.NewReader(*decrypted.Mail)) if err != nil { log.Fatal(err) } if message.HasBody() { mediaType, _, _ := message.Header.ContentType() process(mediaType, message.Body) } else { for _, part := range message.MessagesAll() { mediaType, _, err := part.Header.ContentType() if err != nil { log.Fatal(err) } process(mediaType, part.Body) } } } }
// ParseMIMEBody parses the body of the message object into a tree of MIMEPart // objects, each of which is aware of its content type, filename and headers. // If the part was encoded in quoted-printable or base64, it is decoded before // being stored in the MIMEPart object. func ParseMIMEBody(mailMsg *mail.Message) (*MIMEBody, error) { mimeMsg := &MIMEBody{ IsTextFromHTML: false, header: mailMsg.Header, } if !IsMultipartMessage(mailMsg) { // Attachment only? if IsBinaryBody(mailMsg) { return binMIME(mailMsg) } // Parse as text only bodyBytes, err := decodeSection(mailMsg.Header.Get("Content-Transfer-Encoding"), mailMsg.Body) if err != nil { return nil, fmt.Errorf("Error decoding text-only message: %v", err) } // Handle plain ASCII text, content-type unspecified mimeMsg.Text = string(bodyBytes) // Process top-level content-type ctype := mailMsg.Header.Get("Content-Type") if ctype != "" { if mediatype, mparams, err := mime.ParseMediaType(ctype); err == nil { if mparams["charset"] != "" { // Convert plain text to UTF8 if content type specified a charset newStr, err := ConvertToUTF8String(mparams["charset"], bodyBytes) if err != nil { return nil, err } mimeMsg.Text = newStr } else if mediatype == "text/html" { // charset is empty, look in HTML body for charset charset, err := charsetFromHTMLString(mimeMsg.Text) if charset != "" && err == nil { newStr, err := ConvertToUTF8String(charset, bodyBytes) if err == nil { mimeMsg.Text = newStr } } } if mediatype == "text/html" { mimeMsg.HTML = mimeMsg.Text // Empty Text will trigger html2text conversion below mimeMsg.Text = "" } } } } else { // Parse top-level multipart ctype := mailMsg.Header.Get("Content-Type") mediatype, params, err := mime.ParseMediaType(ctype) if err != nil { return nil, fmt.Errorf("Unable to parse media type: %v", err) } if !strings.HasPrefix(mediatype, "multipart/") { return nil, fmt.Errorf("Unknown mediatype: %v", mediatype) } boundary := params["boundary"] if boundary == "" { return nil, fmt.Errorf("Unable to locate boundary param in Content-Type header") } // Root Node of our tree root := NewMIMEPart(nil, mediatype) mimeMsg.Root = root err = parseParts(root, mailMsg.Body, boundary) if err != nil { return nil, err } // Locate text body if mediatype == "multipart/altern" { match := BreadthMatchFirst(root, func(p MIMEPart) bool { return p.ContentType() == "text/plain" && p.Disposition() != "attachment" }) if match != nil { if match.Charset() != "" { newStr, err := ConvertToUTF8String(match.Charset(), match.Content()) if err != nil { return nil, err } mimeMsg.Text += newStr } else { mimeMsg.Text += string(match.Content()) } } } else { // multipart is of a mixed type match := DepthMatchAll(root, func(p MIMEPart) bool { return p.ContentType() == "text/plain" && p.Disposition() != "attachment" }) for i, m := range match { if i > 0 { mimeMsg.Text += "\n--\n" } if m.Charset() != "" { newStr, err := ConvertToUTF8String(m.Charset(), m.Content()) if err != nil { return nil, err } mimeMsg.Text += newStr } else { mimeMsg.Text += string(m.Content()) } } } // Locate HTML body match := BreadthMatchFirst(root, func(p MIMEPart) bool { return p.ContentType() == "text/html" && p.Disposition() != "attachment" }) if match != nil { if match.Charset() != "" { newStr, err := ConvertToUTF8String(match.Charset(), match.Content()) if err != nil { return nil, err } mimeMsg.HTML += newStr } else { mimeMsg.HTML = string(match.Content()) } } // Locate attachments mimeMsg.Attachments = BreadthMatchAll(root, func(p MIMEPart) bool { return p.Disposition() == "attachment" || p.ContentType() == "application/octet-stream" }) // Locate inlines mimeMsg.Inlines = BreadthMatchAll(root, func(p MIMEPart) bool { return p.Disposition() == "inline" }) // Locate others parts not handled in "Attachments" and "inlines" mimeMsg.OtherParts = BreadthMatchAll(root, func(p MIMEPart) bool { if strings.HasPrefix(p.ContentType(), "multipart/") { return false } if p.Disposition() != "" { return false } if p.ContentType() == "application/octet-stream" { return false } return p.ContentType() != "text/plain" && p.ContentType() != "text/html" }) } // Down-convert HTML to text if necessary if mimeMsg.Text == "" && mimeMsg.HTML != "" { mimeMsg.IsTextFromHTML = true var err error if mimeMsg.Text, err = html2text.FromString(mimeMsg.HTML); err != nil { // Fail gently mimeMsg.Text = "" return mimeMsg, err } } return mimeMsg, nil }
func main() { domain := flag.String("domain", "", "mailgun domain") privateKey := flag.String("private-key", "", "secret mailgun api key") publicKey := flag.String("public-key", "", "mailgun public api key") dry := flag.Bool("dry", false, "do not update cache & only print to stdout (no e-mail)") verbose := flag.Bool("verbose", false, "Print detailed output per monitored url.") flag.Parse() if *domain == "" || *privateKey == "" || *publicKey == "" { log.Fatalln("domain, private-key and public-key flags are required") } gun := m.NewMailgun(*domain, *privateKey, *publicKey) urls := loadUrls() for _, url := range urls { body, err := requestURL(url) if err != nil { log.Println("Skipped URL because of error requesting it:", url) continue } filename := getExecFolder() + "/cache/" + getMD5Hash(url) + ".html" cached, err := ioutil.ReadFile(filename) if err != nil { if *dry == false { updateCache(filename, body) } switch err := err.(type) { case *os.PathError: fmt.Printf("This URL will now be monitored: %s\n\n", url) default: log.Fatalf("Fatal errors type %T\n", err) } } else { if reflect.DeepEqual(cached, body) { if *verbose { fmt.Printf("This URL didn't change: %s\n\n", url) } } else { cachedDoc := getGoqueryDoc(cached) currentDoc := getGoqueryDoc(body) cachedContent, _ := cachedDoc.Find("#content").Html() currentContent, _ := currentDoc.Find("#content").Html() if cachedContent == currentContent { if *verbose { fmt.Println("The website changed, but the content stayed the same.") } } else { cachedText, _ := html2text.FromString(cachedContent) currentText, _ := html2text.FromString(currentContent) diff := difflib.Diff(strings.Split(cachedText, "\n"), strings.Split(currentText, "\n")) msg := createMessage(diff, url) htmlMsg := createHTMLMessage(diff, url) if *dry { fmt.Println(msg) } else { sendEmails(gun, msg, htmlMsg) } } if *dry == false { updateCache(filename, body) } } } } }
// ReadMessage reads message from r. // Using Send() on read messages can result in garbage in headers, // make sure to remove unnecessary ones, before sending. func ReadMessage(r io.Reader) (*Message, error) { rawmsg, err := mail.ReadMessage(r) if err != nil { return nil, err } m := new(Message) // MessageID if id := rawmsg.Header.Get("Message-Id"); id != "" { m.ID = id } else { m.ID = makeID() } // Date if date, err := rawmsg.Header.Date(); err == nil { m.Date = date } else { m.Date = time.Now() } // Subject if subject, err := decodeHeader(rawmsg.Header.Get("Subject")); err == nil && subject != "" { m.Subject = subject } else if err != nil { return nil, fmt.Errorf("decode header: %v", err) } else { m.Subject = "No subject" } // Return-Path if h := rawmsg.Header.Get("Return-Path"); h != "" { retpath, err := DecodeAddress(h) if err != nil { return nil, fmt.Errorf("parse return-path: %v", err) } if len(retpath) > 0 { m.ReturnPath = retpath[0] } } // From if h := rawmsg.Header.Get("From"); h != "" { from, err := DecodeAddress(h) if err != nil { return nil, fmt.Errorf("parse from: %v", err) } if len(from) > 0 { m.From = from[0] } } // To if h := rawmsg.Header.Get("To"); h != "" { to, err := DecodeAddress(h) if err != nil { return nil, fmt.Errorf("parse to: %v", err) } m.To = to } else { m.To = make([]string, 0) } // CC if h := rawmsg.Header.Get("Cc"); h != "" { cc, err := DecodeAddress(h) if err != nil { return nil, fmt.Errorf("parse cc: %v", err) } m.CC = cc } else { m.CC = make([]string, 0) } // If return-path is unset, set it using from. if m.ReturnPath == "" { m.ReturnPath = m.From } // Decode rest of the headers. headers := make(map[string]string) for k, v := range rawmsg.Header { switch k { case "Message-Id", "Subject", "Date", "Return-Path", "From", "To", "Cc": continue } for _, w := range v { decoded, err := decodeHeader(w) if err != nil { return nil, fmt.Errorf("decode header: %v", err) } str := headers[k] if str != "" { str += " " } headers[k] = str + decoded } } m.Headers = headers // Decode body. if err := m.decodeBody(rawmsg.Body, textproto.MIMEHeader(rawmsg.Header)); err != nil { return nil, fmt.Errorf("decode body: %v", err) } if len(m.HTML) > 0 { m.Body, err = html2text.FromString(m.HTML) if err != nil { return nil, err } m.IsHTML = true } return m, nil }