func GatherData(configurationFile string) { if configurationFile == "" { panic("I can't work without a configuration file") } log.Info("Loading config") conf, err := config.LoadConfig(configurationFile) if err != nil { panic(err) } crawler := NewTwitterCrawler(&conf) myTweets := make(map[string]timelinesTweets) for _, account := range conf.TwitterAccounts { log.Info("-== Timeline for Account: %#v ==-\n", account) if crawler.configuration.Number != 0 { myTweets[account] = crawler.GetTimelinesN(account, false, conf.Number, conf.Slices) //false: don't be strict, getting all hashtag in the timeline, also if they are out the interested range } else { myTweets[account] = crawler.GetTimelines(account, false) //false: don't be strict, getting all hashtag in the timeline, also if they are out the interested range } log.Info("-== END TIMELINE for %#v ==-\n", account) } for _, account := range conf.TwitterAccounts { GatherDataFromAccount(crawler, account, myTweets[account]) } }
func GenerateData(configurationFile string) { if configurationFile == "" { panic("I can't work without a configuration file") } log.Info("Loading config") conf, err := config.LoadConfig(configurationFile) if err != nil { panic(err) } //api := GetTwitter(&conf) fmt.Println(">> Exporting gathered data") mygraph := Graph{Nodes: []Node{}, Links: []Link{}, Mode: "static", Defaultedgetype: "undirected"} for _, account := range conf.TwitterAccounts { fmt.Println(">> Generating graph for " + account) mygraph = Graph{Nodes: []Node{}, Links: []Link{}, Mode: "static", Defaultedgetype: "undirected"} db := nutz.NewStorage(account+".db", 0600, nil) nodecount := 0 myNetwork := db.GetAll(account, MATCHING_HASHTAGS).DataList myMatrix := make(map[string][]int) // this is the Matrix Hashtags/ Users ID myNetworkMatrix := make(map[string]map[string]int8) //so we can extract later data easily myMapNetwork := make(map[int]string) //this will be used to resolve User ID of the graph <-> Twitter id var myCSV [][]string HashtagsMap := db.GetAll(account, GENERATED_HASHTAGS) var Hashtags []string Hashtags = append(Hashtags, "UserID") //First column reserved to userid for h, _ := range HashtagsMap.DataList { Hashtags = append(Hashtags, string(h)) } myCSV = append(myCSV, Hashtags) for k, _ := range myNetwork { // ki64, _ := strconv.ParseInt(string(k), 10, 64) ki64 := string(k) //Column name is ki64 myUserNetwork := db.GetAll(account, MATCHING_HASHTAGS, k).DataList var userOccurrence []string userOccurrence = append(userOccurrence, string(k)) //this is the userid for _, h := range Hashtags { if occurence, ok := myUserNetwork[h]; ok { userOccurrence = append(userOccurrence, string(occurence)) } else { userOccurrence = append(userOccurrence, strconv.Itoa(0)) } } myCSV = append(myCSV, userOccurrence) myNetworkMatrix[ki64] = make(map[string]int8) //fmt.Println("User ID is: " + string(k)) if len(myUserNetwork) > conf.HashtagCutOff { //Cutting off people that just tweeted 1 hashtag htagsatisfied := false //cutoff var for h, o := range myUserNetwork { occur, _ := strconv.Atoi(string(o)) //fmt.Println("Hastag: " + string(h) + " saw " + string(o) + " times") if occur > conf.HashtagOccurrenceCutOff { // Cutting off people that just tweeted the hashtag once myMatrix[string(h)] = append(myMatrix[string(h)], nodecount) // Generating adjacient map htagsatisfied = true //cutoff var, setting true to enable node counting } occurrences, _ := strconv.Atoi(string(o)) myNetworkMatrix[ki64][string(h)] = int8(occurrences) // convert the db to a map } if htagsatisfied { //Cutting off also nodes who satisfied the cuttoff above myMapNetwork[nodecount] = ki64 //mapping Graph user id with Tweet user id mygraph.Nodes = append(mygraph.Nodes, Node{Id: nodecount, Name: string(k), Group: 1}) nodecount++ } } } fmt.Println(">> Preparing graph for " + account) linkscount := 0 for hashtag, users := range myMatrix { for _, userid := range users { for _, userid2 := range users { if userid2 != userid { if int(myNetworkMatrix[myMapNetwork[userid]][hashtag]) > conf.HashtagOccurrenceCutOff { mygraph.Links = append(mygraph.Links, Link{Id: linkscount, Source: userid, Target: userid2, Value: float32(myNetworkMatrix[myMapNetwork[userid]][hashtag])}) linkscount++ } } } } } fmt.Println(">> Writing matrix to csv") utils.WriteCSV(myCSV, account+".csv") fmt.Println(">> Writing graph to json file") // // nUniqueMentions, _ := strconv.Atoi(string(unique_mentions.Data)) // nMentions_to_followed, _ := strconv.Atoi(string(mentions_to_followed.Data)) // nTweets, _ := strconv.Atoi(string(tweets.Data)) // nReTweets, _ := strconv.Atoi(string(retweets.Data)) //mygraph.Nodes = append(mygraph.Nodes, Node{Name: account, Group: group}) // for k, v := range myUniqueMentions { // weight, _ := strconv.Atoi(string(v)) // mygraph.Nodes = append(mygraph.Nodes, Node{Name: string(k), Group: group, Thickness: 0.01, Size: 0.01}) // mygraph.Links = append(mygraph.Links, Link{Source: innercount, Target: nodecount, Value: weight}) // innercount++ // } fileJson, _ := ffjson.Marshal(&mygraph) err = ioutil.WriteFile(account+".output", fileJson, 0644) if err != nil { log.Info("WriteFileJson ERROR: " + err.Error()) } out, err := xml.Marshal(mygraph) out = append([]byte(`<?xml version="1.0" encoding="UTF-8"?><gexf xmlns="http://www.gexf.net/1.2draft" version="1.2"> <meta lastmodifieddate="2009-03-20"> <creator>dark-lab</creator><description>Gephi file</description> </meta>`), out...) out = append(out, []byte(`</gexf>`)...) err = ioutil.WriteFile(account+".output.gexf", out, 0644) if err != nil { log.Info("WriteFileJson ERROR: " + err.Error()) } } }