Пример #1
0
func GatherData(configurationFile string) {

	if configurationFile == "" {
		panic("I can't work without a configuration file")
	}

	log.Info("Loading config")
	conf, err := config.LoadConfig(configurationFile)
	if err != nil {
		panic(err)
	}
	crawler := NewTwitterCrawler(&conf)

	myTweets := make(map[string]timelinesTweets)

	for _, account := range conf.TwitterAccounts {
		log.Info("-== Timeline for Account: %#v ==-\n", account)

		if crawler.configuration.Number != 0 {
			myTweets[account] = crawler.GetTimelinesN(account, false, conf.Number, conf.Slices) //false: don't be strict, getting all hashtag in the timeline, also if they are out the interested range

		} else {
			myTweets[account] = crawler.GetTimelines(account, false) //false: don't be strict, getting all hashtag in the timeline, also if they are out the interested range
		}
		log.Info("-== END TIMELINE for %#v ==-\n", account)

	}

	for _, account := range conf.TwitterAccounts {

		GatherDataFromAccount(crawler, account, myTweets[account])
	}

}
Пример #2
0
func GenerateData(configurationFile string) {
	if configurationFile == "" {
		panic("I can't work without a configuration file")
	}

	log.Info("Loading config")
	conf, err := config.LoadConfig(configurationFile)
	if err != nil {
		panic(err)
	}
	//api := GetTwitter(&conf)
	fmt.Println(">> Exporting gathered data")
	mygraph := Graph{Nodes: []Node{}, Links: []Link{}, Mode: "static", Defaultedgetype: "undirected"}

	for _, account := range conf.TwitterAccounts {
		fmt.Println(">> Generating graph for " + account)
		mygraph = Graph{Nodes: []Node{}, Links: []Link{}, Mode: "static", Defaultedgetype: "undirected"}
		db := nutz.NewStorage(account+".db", 0600, nil)

		nodecount := 0

		myNetwork := db.GetAll(account, MATCHING_HASHTAGS).DataList
		myMatrix := make(map[string][]int)                  // this is the Matrix Hashtags/ Users ID
		myNetworkMatrix := make(map[string]map[string]int8) //so we can extract later data easily
		myMapNetwork := make(map[int]string)                //this will be used to resolve User ID of the graph <-> Twitter id
		var myCSV [][]string
		HashtagsMap := db.GetAll(account, GENERATED_HASHTAGS)
		var Hashtags []string
		Hashtags = append(Hashtags, "UserID") //First column reserved to userid

		for h, _ := range HashtagsMap.DataList {
			Hashtags = append(Hashtags, string(h))
		}
		myCSV = append(myCSV, Hashtags)

		for k, _ := range myNetwork {
			//		ki64, _ := strconv.ParseInt(string(k), 10, 64)
			ki64 := string(k)
			//Column name is ki64
			myUserNetwork := db.GetAll(account, MATCHING_HASHTAGS, k).DataList
			var userOccurrence []string
			userOccurrence = append(userOccurrence, string(k)) //this is the userid
			for _, h := range Hashtags {
				if occurence, ok := myUserNetwork[h]; ok {
					userOccurrence = append(userOccurrence, string(occurence))
				} else {
					userOccurrence = append(userOccurrence, strconv.Itoa(0))
				}
			}
			myCSV = append(myCSV, userOccurrence)

			myNetworkMatrix[ki64] = make(map[string]int8)
			//fmt.Println("User ID is: " + string(k))
			if len(myUserNetwork) > conf.HashtagCutOff { //Cutting off people that just tweeted 1 hashtag

				htagsatisfied := false //cutoff var
				for h, o := range myUserNetwork {
					occur, _ := strconv.Atoi(string(o))
					//fmt.Println("Hastag: " + string(h) + " saw  " + string(o) + " times")
					if occur > conf.HashtagOccurrenceCutOff { // Cutting off people that just tweeted the hashtag once
						myMatrix[string(h)] = append(myMatrix[string(h)], nodecount) // Generating adjacient map
						htagsatisfied = true                                         //cutoff var, setting true to enable node counting
					}
					occurrences, _ := strconv.Atoi(string(o))
					myNetworkMatrix[ki64][string(h)] = int8(occurrences) // convert the db to a map

				}
				if htagsatisfied { //Cutting off also nodes who satisfied the cuttoff above
					myMapNetwork[nodecount] = ki64 //mapping Graph user id with Tweet user id

					mygraph.Nodes = append(mygraph.Nodes, Node{Id: nodecount, Name: string(k), Group: 1})
					nodecount++
				}
			}

		}
		fmt.Println(">> Preparing graph for " + account)
		linkscount := 0
		for hashtag, users := range myMatrix {
			for _, userid := range users {
				for _, userid2 := range users {
					if userid2 != userid {
						if int(myNetworkMatrix[myMapNetwork[userid]][hashtag]) > conf.HashtagOccurrenceCutOff {

							mygraph.Links = append(mygraph.Links, Link{Id: linkscount, Source: userid, Target: userid2, Value: float32(myNetworkMatrix[myMapNetwork[userid]][hashtag])})

							linkscount++
						}
					}
				}
			}
		}
		fmt.Println(">> Writing matrix to csv")
		utils.WriteCSV(myCSV, account+".csv")
		fmt.Println(">> Writing graph to json file")

		//
		// nUniqueMentions, _ := strconv.Atoi(string(unique_mentions.Data))
		// nMentions_to_followed, _ := strconv.Atoi(string(mentions_to_followed.Data))
		// nTweets, _ := strconv.Atoi(string(tweets.Data))
		// nReTweets, _ := strconv.Atoi(string(retweets.Data))

		//mygraph.Nodes = append(mygraph.Nodes, Node{Name: account, Group: group})

		// for k, v := range myUniqueMentions {

		// 	weight, _ := strconv.Atoi(string(v))
		// 	mygraph.Nodes = append(mygraph.Nodes, Node{Name: string(k), Group: group, Thickness: 0.01, Size: 0.01})

		// 	mygraph.Links = append(mygraph.Links, Link{Source: innercount, Target: nodecount, Value: weight})
		// 	innercount++
		// }

		fileJson, _ := ffjson.Marshal(&mygraph)
		err = ioutil.WriteFile(account+".output", fileJson, 0644)
		if err != nil {
			log.Info("WriteFileJson ERROR: " + err.Error())
		}
		out, err := xml.Marshal(mygraph)
		out = append([]byte(`<?xml version="1.0" encoding="UTF-8"?><gexf xmlns="http://www.gexf.net/1.2draft" version="1.2"> <meta lastmodifieddate="2009-03-20"> <creator>dark-lab</creator><description>Gephi file</description> </meta>`), out...)
		out = append(out, []byte(`</gexf>`)...)

		err = ioutil.WriteFile(account+".output.gexf", out, 0644)
		if err != nil {
			log.Info("WriteFileJson ERROR: " + err.Error())
		}
	}

}