// ToSeconds returns total seconds from the time format "01:02:03" func ToSeconds(str dna.String) dna.Int { if str == "" { return 0 } else { intervals := dna.IntArray(str.Split(":").Map(func(val dna.String, idx dna.Int) dna.Int { return val.ToInt() }).([]dna.Int)) switch intervals.Length() { case 3: return intervals[0]*3600 + intervals[1]*60 + intervals[2] case 2: return intervals[0]*60 + intervals[1] case 1: return intervals[0] default: return 0 } } }
// getAPIAlbumSimilars fetches album's similars // with the following url format: // http://www.allmusic.com/album/google-bot-mw0002585207/similar/mobile func getAPIAlbumSimilars(album *APIAlbum) <-chan bool { channel := make(chan bool, 1) go func() { link := "http://www.allmusic.com/album/google-bot-mw" + album.Id.ToFormattedString(10, true) + "/similar/mobile" result, err := http.Get(link) if err == nil { data := &result.Data idsArr := data.FindAllString(`<a href=".+`, -1) ids := dna.IntArray(idsArr.Map(func(val dna.String, idx dna.Int) dna.Int { idArr := val.FindAllStringSubmatch(`mw([0-9]+)`, -1) if len(idArr) > 0 { return idArr[0][1].ToInt() } else { return 0 } }).([]dna.Int)).Filter(func(val dna.Int, idx dna.Int) dna.Bool { if val > 0 { return true } else { return false } }) if ids.Length() > 0 { album.Similars = ids } } channel <- true }() return channel }
// SelectMissingIds accepts a table name as an input and a list of ids as a source. // It returns a new list of ids that does not exist in the destination table // // * tblName : a table name // * srcIds : a source ids // * db : a pointer to connected databased // * Returns a new list of ids which are not from the specified table // // The format of sql statement is: // WITH dna (id) AS (VALUES (5),(6),(7),(8),(9)) // SELECT id FROM dna WHERE NOT EXISTS // (SELECT 1 from ziartists WHERE id=dna.id) func SelectMissingIds(tblName dna.String, srcIds *dna.IntArray, db *sqlpg.DB) (*dna.IntArray, error) { if srcIds.Length() > 0 { val := dna.StringArray(srcIds.Map(func(val dna.Int, idx dna.Int) dna.String { return "(" + val.ToString() + ")" }).([]dna.String)) selectStmt := "with dna (id) as (values " + val.Join(",") + ") \n" selectStmt += "SELECT id FROM dna WHERE NOT EXISTS\n (SELECT 1 from " + tblName + " WHERE id=dna.id)" ids := &[]dna.Int{} err := db.Select(ids, selectStmt) switch { case err != nil: return nil, err case err == nil && ids != nil: slice := dna.IntArray(*ids) return &slice, nil case err == nil && ids == nil: return &dna.IntArray{}, nil default: panic("Default case triggered. Case is not expected. Cannot select non existed ids") } } else { return nil, errors.New("Empty input array") } }
// UpdateSongFreaks gets lastest songs,albums,artists and videos from songfreaks.com // The update process goes through 4 steps: // Step 1: Initalizing db connection, loading site config and state handler. // Step 2: Finding new songs, insert new albums,artists and videos if found. // Step 3: Updating found new albums in Step 2. // Step 4: Recovering failed sql statements in Step 2. func UpdateSongFreaks() { db, err := sqlpg.Connect(sqlpg.NewSQLConfig(SqlConfigPath)) dna.PanicError(err) siteConf, err := LoadSiteConfig("sf", SiteConfigPath) siteConf.NConcurrent = 20 dna.PanicError(err) // Update new songs state := NewStateHandler(new(sf.APISongFreaksTrack), siteConf, db) state.TableName = "sfsongs" Update(state) // Update "ratings", "songids", "review_author", "review" of song ids := &[]dna.Int{} query := dna.Sprintf("SELECT id FROM sfalbums where checktime > '%v' AND array_length(songids, 1) is NULL", time.Now().Format("2006-01-02")) // dna.Log(query) err = db.Select(ids, query) if err != nil { dna.PanicError(err) } idsSlice := dna.IntArray(*ids) if idsSlice.Length() > 0 { state = NewStateHandlerWithExtSlice(new(sf.APISongFreaksAlbum), &idsSlice, siteConf, db) Update(state) } else { dna.Log("No new albums found") } // Recover failed sql statements RecoverErrorQueries(SqlErrorLogPath, db) CountDown(3*time.Second, QuittingMessage, EndingMessage) db.Close() }
// This function will decode a cipher string into id. func Decrypt(cipher dna.String) dna.Int { arr := dna.StringArray{cipher[0:2], cipher[2:3], cipher[3:4], cipher[4:6], cipher[6:7], cipher[7:8], cipher[8:10]}.Filter( func(value dna.String, index dna.Int) dna.Bool { return value != "" }) return dna.IntArray(arr.Map(func(v dna.String, i dna.Int) dna.Int { return ns[6-i].IndexOf(v) }).([]dna.Int)).Join("").ToInt() }
// updateEmptyTitles returns an error if there is no missing titles // of songs or videos. func updateEmptyTitles(db *sqlpg.DB, siteConf *SiteConfig, lastId dna.Int) bool { var queryPat dna.String = "select id from %v where id > %v and title = ''" songids := &[]dna.Int{} songQuery := dna.Sprintf(queryPat, "csnsongs", lastId) db.Select(songids, songQuery) videoQuery := dna.Sprintf(queryPat, "csnvideos", lastId) videoids := &[]dna.Int{} db.Select(videoids, videoQuery) ids := dna.IntArray(*songids).Concat(dna.IntArray(*videoids)) if ids.Length() > 0 { dna.Log(ids) state := NewStateHandlerWithExtSlice(new(csn.SongVideoUpdater), &ids, siteConf, db) Update(state) RecoverErrorQueries(SqlErrorLogPath, db) return false } else { // dna.Log("No record needs to be updated.") return true } // return donec }
func GetUsersWithInitialId(initialId dna.Int) (*Users, error) { rids := rand.Perm(UsersMagnitude) ids := dna.NewIntArray(rids) ids = dna.IntArray(ids.Map(func(val dna.Int, idx dna.Int) dna.Int { return val + initialId*dna.Int(UsersMagnitude) }).([]dna.Int)) // dna.Log(ids) apiUsers, err := GetAPIUsers(ids) if err != nil { return nil, err } else { var users []*User for _, apiUser := range apiUsers { user := NewUser() user.Fill(&apiUser) users = append(users, user) } return &Users{initialId, users}, nil } }
// SelectMissingIds accepts a table name as an input and a range as a source. // It returns a new list of ids that does not exist in the destination table // // * tblName : a table name // * head, tail : first and last number defines a range // * db : a pointer to connected databased // * Returns a new list of ids which are not from the specified table // // The format of sql statement is: // SELECT id FROM generate_series(5,9) id // WHERE NOT EXISTS (SELECT 1 from ziartists where id = id.id) func SelectMissingIdsWithRange(tblName dna.String, head, tail dna.Int, db *sqlpg.DB) (*dna.IntArray, error) { if head > tail { panic("Cannot create range: head has to be less than tail") } selectStmt := dna.Sprintf("SELECT id FROM generate_series(%v,%v) id \n", head, tail) selectStmt += "WHERE NOT EXISTS (SELECT 1 from " + tblName + " where id = id.id)" ids := &[]dna.Int{} err := db.Select(ids, selectStmt) switch { case err != nil: return nil, err case err == nil && ids != nil: slice := dna.IntArray(*ids) return &slice, nil case err == nil && ids == nil: return &dna.IntArray{}, nil default: panic("Default case triggered. Case is not expected. Cannot select non existed ids") } }
// getAPIAlbumFromMainPage returns album from main page func getAPIAlbumFromMainPage(album *APIAlbum) <-chan bool { channel := make(chan bool, 1) go func() { link := "http://www.allmusic.com/album/google-bot-mw" + album.Id.ToFormattedString(10, true) // dna.Log(link) result, err := http.Get(link) if err == nil { data := &result.Data artistsArr := data.FindAllString(`(?mis)<h3 class="album-artist".+?</h3>`, 1) if artistsArr.Length() > 0 { // Getting Artists album.Artists = artistsArr[0].RemoveHtmlTags("").Trim().Split(" / ") // Getting Artistids idsArr := artistsArr[0].FindAllString(`mn[0-9]+`, -1) album.Artistids = dna.IntArray(idsArr.Map(func(val dna.String, idx dna.Int) dna.Int { idArr := val.FindAllStringSubmatch(`mn([0-9]+)`, -1) if len(idArr) > 0 { return idArr[0][1].ToInt() } else { return 0 } }).([]dna.Int)) } coverartArr := data.FindAllString(`<meta name="image".+`, 1) if coverartArr.Length() > 0 { album.Coverart = coverartArr[0].GetTagAttributes("content").Trim() } // Getting Title titleArr := data.FindAllString(`(?mis)<h2 class="album-title".+?</h2>`, 1) if titleArr.Length() > 0 { album.Title = titleArr[0].RemoveHtmlTags("").Trim().DecodeHTML() } // Getting Review reviewArr := data.FindAllStringSubmatch(`(?mis)<div class="text" itemprop="reviewBody">(.+?)</div>`, 1) if len(reviewArr) > 0 { album.Review = reviewArr[0][1].Trim().ReplaceWithRegexp(`^<p>`, ``).ReplaceWithRegexp(`</p>$`, ``).Trim().ReplaceWithRegexp(`^<div class="text" itemprop="reviewBody">`, "").ReplaceWithRegexp(`</div>$`, "").Trim().ReplaceWithRegexp(`^<p>`, "").ReplaceWithRegexp(`</p>$`, "").Trim() } // Getting site rating ratingArr := data.FindAllStringSubmatch(`<div class="allmusic-rating.+([0-9]+)"`, 1) if len(ratingArr) > 0 { siteRating := ratingArr[0][1].ToInt() if siteRating > 0 { album.Ratings[0] = siteRating + 1 } } // Getting Duration durationArr := data.FindAllStringSubmatch(`(?mis)<h4>Duration</h4>.+?<span>(.+?)</span>`, 1) if len(durationArr) > 0 { album.Duration = ToSeconds(durationArr[0][1]) } // Getting DateReleased dateReleasedArr := data.FindAllStringSubmatch(`(?mis)<h4>Release Date</h4>.+?<span>(.+?)</span>`, 1) if len(dateReleasedArr) > 0 { // dna.Log(dateReleasedArr[0][1].String()) if dateReleasedArr[0][1].Trim().Match(`^[0-9]{4}$`) == true { album.DateReleased, _ = time.Parse(`2006`, dateReleasedArr[0][1].String()) } else { // dna.Log(dateReleasedArr[0][1]) album.DateReleased, _ = time.Parse(`January 02, 2006`, dateReleasedArr[0][1].String()) } if album.DateReleased.IsZero() == true { album.DateReleased, _ = time.Parse(`January 2, 2006`, dateReleasedArr[0][1].String()) } // dna.Log(dna.Sprintf("%v", album.DateReleased)) } // Getting Discographies discoArr := data.FindAllString(`(?mis)<li class="album">.+?</li>`, -1) discos := discoArr.Map(func(val dna.String, idx dna.Int) APIDiscography { var id dna.Int var title dna.String titleArr := val.FindAllStringSubmatch(`title="(.+?)" style="`, 1) if len(titleArr) > 0 { title = titleArr[0][1].Trim() } href := val.GetTagAttributes("href") coverart := val.GetTagAttributes("src") idArr := href.FindAllStringSubmatch(`mw([0-9]+)`, 1) if len(idArr) > 0 { id = idArr[0][1].ToInt() } else { id = 0 } return APIDiscography{id, title, coverart} }).([]APIDiscography) if len(discos) > 0 { bDisco, err := json.Marshal(discos) if err == nil { album.Discographies = dna.String(string(bDisco)) } } // Getting Genres, Moods, Styles and Themes album.Genres = getTSGM(data, "genres") album.Moods = getTSGM(data, "moods") album.Styles = getTSGM(data, "styles") album.Themes = getTSGM(data, "themes") // Getting Songs songTitleArr := data.FindAllString(`(?mis)<tr class="track.+?</tr>`, -1) songs := songTitleArr.Map(func(track dna.String, idx dna.Int) APISong { var id, songDuration dna.Int = 0, 0 var title dna.String = "" var composers, performers = []Person{}, []Person{} // Getting song's title and id titleArr := track.FindAllString(`(?mis)<div class="title" itemprop="name">.+?</div>`, 1) if titleArr.Length() > 0 { title = titleArr[0].RemoveHtmlTags("").Trim().DecodeHTML() idArr := titleArr[0].FindAllStringSubmatch(`m[a-z]([0-9]+)`, 1) if len(idArr) > 0 { id = idArr[0][1].ToInt() } } // Getting song's duration durationArr := track.FindAllString(`(?mis)<td class="time">.+?</td>`, 1) if durationArr.Length() > 0 { songDuration = ToSeconds(durationArr[0].RemoveHtmlTags("").Trim()) } // Getting composers composerArr := track.FindAllString(`(?mis)<div class="composer">.+?</div>`, 1) if composerArr.Length() > 0 { composers = composerArr[0].Split(" / ").Map(func(val dna.String, idx dna.Int) Person { var cid dna.Int = 0 name := val.RemoveHtmlTags("").Trim() performerIdArr := val.FindAllStringSubmatch(`mn([0-9]+)`, 1) if len(performerIdArr) > 0 { cid = performerIdArr[0][1].ToInt() } return Person{cid, name} }).([]Person) } // Getting artists performerArr := track.FindAllString(`(?mis)<td class="performer".+?</td>`, 1) if performerArr.Length() > 0 { perList := performerArr[0].FindAllString(`<a href=.+?</a>`, -1) if perList.Length() > 0 { performers = perList.Map(func(val dna.String, idx dna.Int) Person { var cid dna.Int = 0 /// performer name /// does not handle feat: seperator /// LOOK at Unmarshal song // panic("LOOK AGAIN!!!!!!!!! :(") name := val.RemoveHtmlTags("").Trim() artistIdArr := val.FindAllStringSubmatch(`mn([0-9]+)`, 1) if len(artistIdArr) > 0 { cid = artistIdArr[0][1].ToInt() } return Person{cid, name} }).([]Person) } } return APISong{id, title, performers, composers, songDuration} }).([]APISong) if len(songs) > 0 { bSongs, derr := json.Marshal(songs) if derr == nil { album.Songs = dna.String(string(bSongs)) } } // Getting Ratings } channel <- true }() return channel }
func getMovieFromPage(movie *Movie) <-chan bool { channel := make(chan bool, 1) go func() { link := "http://mmovie.hdviet.com/hdviet." + movie.Id.ToString() + ".html" result, err := http.Get(link) if err == nil { // Renew ACCESS_TOKEN_KEY if AccessTokenKeyRenewable == false { keyArr := result.Data.FindAllString(`_strLinkPlay.+`, 1) if keyArr.Length() > 0 { ACCESS_TOKEN_KEY = keyArr[0].Replace(`';`, "").ReplaceWithRegexp(`^.+;`, "").String() if len(ACCESS_TOKEN_KEY) != 32 { panic("Wrong ACCESS_TOKEN_KEY: Length has to be 32") } } AccessTokenKeyRenewable = true } // Finding main data mainDat := result.Data.FindAllString(`(?mis)<div class="main_decs">.+?<div class="clear">`, 1) if len(mainDat) > 0 { main := mainDat[0] if main.GetTags("h4").Length() > 0 { movie.Title = main.GetTags("h4")[0].RemoveHtmlTags("").DecodeHTML().Trim() switch movie.Title.Split(" - ").Length() { case 2: movie.ForeignTitle = movie.Title.Split(" - ")[0] movie.VnTitle = movie.Title.Split(" - ")[1] case 3: movie.ForeignTitle = movie.Title.Split(" - ")[0] movie.VnTitle = movie.Title.Split(" - ")[1] third := movie.Title.Split(" - ")[2] if third.Match(`Tập`) == true { movie.IsSeries = true movie.CurrentEps = third.Replace("Tập", "").Trim().Split("/")[0].ToInt() movie.MaxEp = third.Replace("Tập", "").Trim().Split("/")[1].ToInt() seasonArr := movie.VnTitle.FindAllStringSubmatch(`Phần ([0-9]+)`, 1) if len(seasonArr) > 0 { movie.SeasonId = seasonArr[0][1].ToInt() } } } movie.Topics = getNames(main, "Danh mục:", "Diễn viên") movie.Actors = getNames(main, "Diễn viên:", "Đạo diễn") movie.Directors = getNames(main, "Đạo diễn:", "<p>Quốc gia") movie.Countries = getNames(main, "<p>Quốc gia:", "Năm khởi chiếu") descArr := main.FindAllStringSubmatch(`(?mis)</h4>(.+?)Danh mục:`, 1) if len(descArr) > 0 { movie.Description = descArr[0][1].RemoveHtmlTags("").DecodeHTML().Trim() } yearArr := main.FindAllStringSubmatch(`(?mis)Năm khởi chiếu:(.+?)</p>`, 1) if len(yearArr) > 0 { movie.YearReleased = yearArr[0][1].RemoveHtmlTags("").DecodeHTML().Trim().ToInt() } ratingArr := main.FindAllStringSubmatch(`(?mis)Đánh giá IMDB:(.+?)<p>`, 1) if len(ratingArr) > 0 { if ratingArr[0][1].GetTags("span").Length() > 0 { rating := ratingArr[0][1].GetTags("span")[0].RemoveHtmlTags("").Replace(".", "").ToInt() movie.IMDBRating.Push(rating) } nvotes := ratingArr[0][1].FindAllStringSubmatch(`\((.+) phiếu bình chọn`, 1) if len(nvotes) > 0 { movie.IMDBRating.Push(nvotes[0][1].Replace(",", "").ToInt()) } } } } seasonsArr := result.Data.FindAllStringSubmatch(`(?mis)Season:(.+?)<div class="main_decs">`, 1) if len(seasonsArr) > 0 { movie.Seasons = dna.IntArray(seasonsArr[0][1].Split(`</li>`).Map(func(val dna.String, idx dna.Int) dna.Int { idArr := val.GetTagAttributes("href").FindAllStringSubmatch(`/hdviet\.(.+)\.html`, 1) if len(idArr) > 0 { return idArr[0][1].ToInt() } else { return 0 } }).([]dna.Int)).Filter(func(val dna.Int, idx dna.Int) dna.Bool { if val != 0 { return true } else { return false } }) } anotherTitleArr := result.Data.FindAllStringSubmatch(`var _moviename = '(.+)'`, 1) if len(anotherTitleArr) > 0 { movie.AnotherTitle = anotherTitleArr[0][1].Trim() } similarDat := result.Data.FindAllStringSubmatch(`(?mis)<div class="other-films">(.+?)</div>`, 1) if len(similarDat) > 0 { movie.Similars = dna.IntArray(similarDat[0][1].FindAllString(`<a.+</a>`, -1).Map(func(val dna.String, idx dna.Int) dna.Int { href := val.GetTagAttributes("href") midArr := href.FindAllStringSubmatch(`\.([0-9]+)\.html`, 1) if len(midArr) > 0 { return midArr[0][1].ToInt() } else { return 0 } }).([]dna.Int)) } thumbArr := result.Data.FindAllString(`<div class="fd-poster">[\n\t\r]+.+`, 1) if thumbArr.Length() > 0 { movie.Thumbnail = thumbArr[0].GetTagAttributes("src") } if result.Data.Match(`http://mmovie.hdviet.com/images/720.png`) == true { movie.MaxResolution = 720 } if result.Data.Match(`http://mmovie.hdviet.com/images/1080.png`) == true { movie.MaxResolution = 1080 } } channel <- true }() return channel }
// getSongFromMainPage returns album from main page func getAlbumFromMainPage(album *Album) <-chan bool { channel := make(chan bool, 1) go func() { link := "http://mp3.zing.vn/album/google-bot/" + album.Key + ".html" result, err := http.Get(link) // Log(link) // Log(result.Data) if err == nil { data := &result.Data encodedKeyArr := data.FindAllStringSubmatch(`xmlURL=http://mp3.zing.vn/xml/album-xml/(.+)&`, -1) if len(encodedKeyArr) > 0 { album.EncodedKey = encodedKeyArr[0][1] } // playsArr := data.FindAllStringSubmatch(`Lượt nghe:</span>(.+)</p>`, -1) // if len(playsArr) > 0 { // album.Plays = playsArr[0][1].Trim().Replace(".", "").ToInt() // } // yearsArr := data.FindAllStringSubmatch(`Năm phát hành:</span>(.+)</p>`, -1) // if len(yearsArr) > 0 { // album.YearReleased = yearsArr[0][1].Trim() // } nsongsArr := data.FindAllStringSubmatch(`Số bài hát:</span>(.+)</p>`, -1) if len(nsongsArr) > 0 { album.Nsongs = nsongsArr[0][1].Trim().ToInt() } // topicsArr := data.FindAllStringSubmatch(`Thể loại:(.+)`, -1) // if len(topicsArr) > 0 { // album.Topics = topicsArr[0][1].RemoveHtmlTags("").Trim().Split(", ").SplitWithRegexp(` / `).Unique() // } // descArr := data.FindAllStringSubmatch(`(?mis)(<p id="_albumIntro" class="rows2".+#_albumIntro">)Xem toàn bộ</a>`, -1) // if len(descArr) > 0 { // album.Description = descArr[0][1].RemoveHtmlTags("").Trim() // } // titleArr := data.FindAllStringSubmatch(`<h1 class="detail-title">(.+) - <a.+`, -1) // if len(titleArr) > 0 { // album.Title = titleArr[0][1].RemoveHtmlTags("").Trim() // } // artistsArr := data.FindAllStringSubmatch(`<h1 class="detail-title">.+(<a.+)`, -1) // if len(artistsArr) > 0 { // album.Artists = dna.StringArray(artistsArr[0][1].RemoveHtmlTags("").Trim().Split(" ft. ").Unique().Map(func(val dna.String, idx dna.Int) dna.String { // return val.Trim() // }).([]String)) // } covertArr := data.FindAllStringSubmatch(`<span class="album-detail-img">(.+)`, -1) if len(covertArr) > 0 { album.Coverart = covertArr[0][1].GetTagAttributes("src") datecreatedArr := album.Coverart.FindAllStringSubmatch(`_([0-9]+)\..+$`, -1) if len(datecreatedArr) > 0 { // Log(int64(datecreatedArr[0][1].ToInt())) album.DateCreated = time.Unix(int64(datecreatedArr[0][1].ToInt()), 0) } } songidsArr := data.FindAllString(`id="_divPlsLite.+?"`, -1) if songidsArr.Length() > 0 { album.Songids = dna.IntArray(songidsArr.Map(func(val dna.String, idx dna.Int) dna.Int { return GetId(val.FindAllStringSubmatch(`id="_divPlsLite(.+)"`, -1)[0][1]) }).([]dna.Int)) } } channel <- true }() return channel }