Beispiel #1
0
func main() {
	start := time.Now()
	db := spider.NewDBM("DBM.db")
	pages := spider.Get30Pages()
	db.StorePages2(pages)
	//db.DisplayInvertedTable()
	//db.GetPages2()

	//i := 0
	//fmt.Printf("-----------------------------------------------\n")
	pages2 := db.GetPages2()

	spider.PrintEntireIndex(pages2)
	// for _, p := range pages2 {
	// 	i++
	// 	fmt.Printf("PageID: %v\n", p.PageID)
	// 	fmt.Printf("PageSize: %v\n", p.Size)
	// 	fmt.Printf("PageTitle: %v\n", p.Title)
	// 	fmt.Printf("PageURL: %v\n", p.URL)
	// 	fmt.Printf("PageModified: %v\n", p.Modified)
	// 	fmt.Printf("PageWord: \n")
	// 	for _, word := range p.Words() {
	// 		fmt.Printf("%v", word.Word)
	// 		for _, pos := range word.Positions() {
	// 			fmt.Printf(" %v", pos)
	// 		}
	// 	}
	// 	fmt.Printf("***********************************************\n")
	// }

	elapsed := time.Since(start)
	fmt.Printf("Time spent: %v\n", elapsed)
	//fmt.Printf("\nnumberofpage:%v", i)
	db.Close()
}
Beispiel #2
0
func main() {
	db := spider.NewDBM("DBM.db")
	pages2 := db.GetPages2()

	PrintEntireIndex(pages2)
	db.Close()

}
Beispiel #3
0
func main() {

	db := spider.NewDBM("DBM.db")
	pages := spider.Get30Pages()
	db.StorePages2(pages)

	db.Close()

}
Beispiel #4
0
func main() {
	start := time.Now()

	db := spider.NewDBM("DBM.db")
	defer db.Close()
	pages := spider.Get300Pages()
	db.StorePages2(pages)
	elapse := time.Since(start)
	fmt.Printf("Time:%v", elapse)

}
Beispiel #5
0
func PreCompute() {
	db := spider.NewDBM(spider.DBMname)
	invertedindex := db.GetInvertedIndex()
	allStoredPages = db.GetPages2()
	//computeAveDocLen(allStoredPages)
	invertedTable = make(map[int][]int64)
	for index, temp := range invertedindex {
		terms := strings.Split(temp, ";")
		for _, term := range terms {
			docID, _ := strconv.ParseInt(term, 10, 64)
			if contain(invertedTable[index], docID) == false {
				invertedTable[index] = append(invertedTable[index], docID)
			}
		}
	}

	//compute allPagesWithTFIDF
	fmt.Printf("Document number in allstoredpage:%v", len(allStoredPages))
	for _, page := range allStoredPages {
		var tempPage SPage
		tempPage.Page = page
		for _, word := range page.Words() {
			var tempWord wordWithTFIDF
			tempWord.Word = word
			TF := float64(word.TF())
			DF := float64(len(invertedTable[word.WordID]))
			N := float64(len(allStoredPages))
			MaxTF := GetMaxTF(page)
			//fmt.Printf("TF:%v    DF:%v\n", TF, DF)
			if MaxTF <= 0 {
				//fmt.Printf("'%v' is word in the db with 0 MAXTF", word.Word)
				tempWord.TFIDF = 0
			} else if DF <= 0 {
				//fmt.Printf("'%v' is word in the db with 0 DF", word.Word)
				tempWord.TFIDF = 0
			} else {
				tempWord.TFIDF = (TF / MaxTF) * math.Log2(N/DF)
			}
			tempPage.myWord = append(tempPage.myWord, tempWord)
		}

		allPagesWithTFIDF = append(allPagesWithTFIDF, tempPage)
	}

	db.Close()
}
Beispiel #6
0
func main() {
	start := time.Now()

	db := spider.NewDBM("DBM.db")

	elapsed := time.Since(start)
	fmt.Printf("Time spent on GetPage2: %v\n", elapsed)
	wordN := db.GetWordNumber()
	docN := db.GetDocumentNumber()
	fmt.Printf("Documents stored: %v\n", docN)
	fmt.Printf("Words stored: %v\n", wordN)
	fmt.Printf("Df of wordid=10: %v\n", db.Getdf(10))
	fmt.Printf("Inside stuff 10: ")
	//pageIds := db.GetDocIdByWordID(10)
	var words []string
	words = append(words, "societi")
	// for _, pageId := range pageIds {
	// 	fmt.Printf("pageId: %v	TF: %v	TFIDF: %v", pageId, int(db.GetTf(10, pageId)), db.GetTfidf(10, pageId))
	// 	fmt.Printf("	CosSim: %v\n", db.CosSim(pageId, words))
	// }

	//ranker.PrintHiHi()

	pages2 := db.GetPages2()
	var testingPage *spider.Page
	for _, page := range pages2 {
		testingPage = page
		break
	}
	// for _, word := range testingPage.Words() {
	// 	fmt.Printf("%v", word)
	// }
	db.Close()
	//ranker.SearchingResult(testingPage, ranker.TFIDF)
	result, score := ranker.SearchingResult(testingPage)
	fmt.Printf("result:%v\n", result)
	fmt.Printf("score:%v\n", score)
	elapsed = time.Since(start)
	fmt.Printf("Time spent on main: %v\n", elapsed)

	// FreqWords := ranker.GetMostFreqWord(testingPage, 5)
	// for _, word := range FreqWords {
	// 	fmt.Printf("Word:%v    TF:%v\n", word.Word, word.TF())
	// }
}