func main() { start := time.Now() db := spider.NewDBM("DBM.db") pages := spider.Get30Pages() db.StorePages2(pages) //db.DisplayInvertedTable() //db.GetPages2() //i := 0 //fmt.Printf("-----------------------------------------------\n") pages2 := db.GetPages2() spider.PrintEntireIndex(pages2) // for _, p := range pages2 { // i++ // fmt.Printf("PageID: %v\n", p.PageID) // fmt.Printf("PageSize: %v\n", p.Size) // fmt.Printf("PageTitle: %v\n", p.Title) // fmt.Printf("PageURL: %v\n", p.URL) // fmt.Printf("PageModified: %v\n", p.Modified) // fmt.Printf("PageWord: \n") // for _, word := range p.Words() { // fmt.Printf("%v", word.Word) // for _, pos := range word.Positions() { // fmt.Printf(" %v", pos) // } // } // fmt.Printf("***********************************************\n") // } elapsed := time.Since(start) fmt.Printf("Time spent: %v\n", elapsed) //fmt.Printf("\nnumberofpage:%v", i) db.Close() }
func main() { db := spider.NewDBM("DBM.db") pages2 := db.GetPages2() PrintEntireIndex(pages2) db.Close() }
func main() { db := spider.NewDBM("DBM.db") pages := spider.Get30Pages() db.StorePages2(pages) db.Close() }
func main() { start := time.Now() db := spider.NewDBM("DBM.db") defer db.Close() pages := spider.Get300Pages() db.StorePages2(pages) elapse := time.Since(start) fmt.Printf("Time:%v", elapse) }
func PreCompute() { db := spider.NewDBM(spider.DBMname) invertedindex := db.GetInvertedIndex() allStoredPages = db.GetPages2() //computeAveDocLen(allStoredPages) invertedTable = make(map[int][]int64) for index, temp := range invertedindex { terms := strings.Split(temp, ";") for _, term := range terms { docID, _ := strconv.ParseInt(term, 10, 64) if contain(invertedTable[index], docID) == false { invertedTable[index] = append(invertedTable[index], docID) } } } //compute allPagesWithTFIDF fmt.Printf("Document number in allstoredpage:%v", len(allStoredPages)) for _, page := range allStoredPages { var tempPage SPage tempPage.Page = page for _, word := range page.Words() { var tempWord wordWithTFIDF tempWord.Word = word TF := float64(word.TF()) DF := float64(len(invertedTable[word.WordID])) N := float64(len(allStoredPages)) MaxTF := GetMaxTF(page) //fmt.Printf("TF:%v DF:%v\n", TF, DF) if MaxTF <= 0 { //fmt.Printf("'%v' is word in the db with 0 MAXTF", word.Word) tempWord.TFIDF = 0 } else if DF <= 0 { //fmt.Printf("'%v' is word in the db with 0 DF", word.Word) tempWord.TFIDF = 0 } else { tempWord.TFIDF = (TF / MaxTF) * math.Log2(N/DF) } tempPage.myWord = append(tempPage.myWord, tempWord) } allPagesWithTFIDF = append(allPagesWithTFIDF, tempPage) } db.Close() }
func main() { start := time.Now() db := spider.NewDBM("DBM.db") elapsed := time.Since(start) fmt.Printf("Time spent on GetPage2: %v\n", elapsed) wordN := db.GetWordNumber() docN := db.GetDocumentNumber() fmt.Printf("Documents stored: %v\n", docN) fmt.Printf("Words stored: %v\n", wordN) fmt.Printf("Df of wordid=10: %v\n", db.Getdf(10)) fmt.Printf("Inside stuff 10: ") //pageIds := db.GetDocIdByWordID(10) var words []string words = append(words, "societi") // for _, pageId := range pageIds { // fmt.Printf("pageId: %v TF: %v TFIDF: %v", pageId, int(db.GetTf(10, pageId)), db.GetTfidf(10, pageId)) // fmt.Printf(" CosSim: %v\n", db.CosSim(pageId, words)) // } //ranker.PrintHiHi() pages2 := db.GetPages2() var testingPage *spider.Page for _, page := range pages2 { testingPage = page break } // for _, word := range testingPage.Words() { // fmt.Printf("%v", word) // } db.Close() //ranker.SearchingResult(testingPage, ranker.TFIDF) result, score := ranker.SearchingResult(testingPage) fmt.Printf("result:%v\n", result) fmt.Printf("score:%v\n", score) elapsed = time.Since(start) fmt.Printf("Time spent on main: %v\n", elapsed) // FreqWords := ranker.GetMostFreqWord(testingPage, 5) // for _, word := range FreqWords { // fmt.Printf("Word:%v TF:%v\n", word.Word, word.TF()) // } }