func GenerateData(configurationFile string) { if configurationFile == "" { panic("I can't work without a configuration file") } log.Info("Loading config") conf, err := config.LoadConfig(configurationFile) if err != nil { panic(err) } //api := GetTwitter(&conf) db := nutz.NewStorage(configurationFile+".db", 0600, nil) mygraph := Graph{Nodes: []Node{}, Links: []Link{}} innercount := 0 nodecount := 0 group := 0 for _, account := range conf.TwitterAccounts { tweets := db.Get(account, "tweets") from := db.Get(account, "from") retweets := db.Get(account, "retweets") unique_mentions := db.Get(account, "unique_mentions") total_mentions := db.Get(account, "total_mentions") followers := db.Get(account, "followers") following := db.Get(account, "following") followers_followed := db.Get(account, "followers_followed") mentions_to_followed := db.Get(account, "mentions_to_followed") log.Info("Account: " + account) log.Info("from: " + string(from.Data)) log.Info("Tweets: " + string(tweets.Data)) log.Info("retweets: " + string(retweets.Data)) log.Info("unique_mentions: " + string(unique_mentions.Data)) log.Info("total_mentions: " + string(total_mentions.Data)) log.Info("followers: " + string(followers.Data)) log.Info("following: " + string(following.Data)) log.Info("followers_followed: " + string(followers_followed.Data)) log.Info("mentions_to_followed: " + string(mentions_to_followed.Data)) myUniqueMentions := db.GetAll(account, "map_unique_mentions").DataList nUniqueMentions, _ := strconv.Atoi(string(unique_mentions.Data)) nMentions_to_followed, _ := strconv.Atoi(string(mentions_to_followed.Data)) nTweets, _ := strconv.Atoi(string(tweets.Data)) nReTweets, _ := strconv.Atoi(string(retweets.Data)) om := OutsideMentions(nUniqueMentions, nMentions_to_followed) apt := AnswerPeopleTax(nUniqueMentions, nMentions_to_followed, nTweets, nReTweets) if math.IsNaN(float64(om)) { om = float32(0.01) } if math.IsNaN(float64(apt)) { apt = float32(0.01) } // fmt.Println("\tDemocracy tax: " + FloatToString(di)) fmt.Println("\tOutside of circle mentions: " + FloatToString(om)) fmt.Println("\t of answering to external people: " + FloatToString(apt)) mygraph.Nodes = append(mygraph.Nodes, Node{Name: account, Group: group, Thickness: om, Size: apt}) for k, v := range myUniqueMentions { //id, _ := strconv.ParseInt(k, 10, 64) //User, _ := api.GetUsersShowById(id, nil) //log.Info("[" + User.ScreenName + "]:" + string(v)) // now you can put User.ScreeName in the name of the node weight, _ := strconv.Atoi(string(v)) mygraph.Nodes = append(mygraph.Nodes, Node{Name: string(k), Group: group, Thickness: 0.01, Size: 0.01}) mygraph.Links = append(mygraph.Links, Link{Source: innercount, Target: nodecount, Value: weight}) innercount++ } innercount++ nodecount = innercount group++ } fileJson, _ := json.MarshalIndent(mygraph, "", " ") err = ioutil.WriteFile(configurationFile+".output", fileJson, 0644) if err != nil { log.Info("WriteFileJson ERROR: " + err.Error()) } }
func GenerateData(configurationFile string) { if configurationFile == "" { panic("I can't work without a configuration file") } log.Info("Loading config") conf, err := config.LoadConfig(configurationFile) if err != nil { panic(err) } //api := GetTwitter(&conf) fmt.Println(">> Exporting gathered data") mygraph := Graph{Nodes: []Node{}, Links: []Link{}, Mode: "static", Defaultedgetype: "undirected"} for _, account := range conf.TwitterAccounts { fmt.Println(">> Generating graph for " + account) mygraph = Graph{Nodes: []Node{}, Links: []Link{}, Mode: "static", Defaultedgetype: "undirected"} db := nutz.NewStorage(account+".db", 0600, nil) nodecount := 0 myNetwork := db.GetAll(account, MATCHING_HASHTAGS).DataList myMatrix := make(map[string][]int) // this is the Matrix Hashtags/ Users ID myNetworkMatrix := make(map[string]map[string]int8) //so we can extract later data easily myMapNetwork := make(map[int]string) //this will be used to resolve User ID of the graph <-> Twitter id var myCSV [][]string HashtagsMap := db.GetAll(account, GENERATED_HASHTAGS) var Hashtags []string Hashtags = append(Hashtags, "UserID") //First column reserved to userid for h, _ := range HashtagsMap.DataList { Hashtags = append(Hashtags, string(h)) } myCSV = append(myCSV, Hashtags) for k, _ := range myNetwork { // ki64, _ := strconv.ParseInt(string(k), 10, 64) ki64 := string(k) //Column name is ki64 myUserNetwork := db.GetAll(account, MATCHING_HASHTAGS, k).DataList var userOccurrence []string userOccurrence = append(userOccurrence, string(k)) //this is the userid for _, h := range Hashtags { if occurence, ok := myUserNetwork[h]; ok { userOccurrence = append(userOccurrence, string(occurence)) } else { userOccurrence = append(userOccurrence, strconv.Itoa(0)) } } myCSV = append(myCSV, userOccurrence) myNetworkMatrix[ki64] = make(map[string]int8) //fmt.Println("User ID is: " + string(k)) if len(myUserNetwork) > conf.HashtagCutOff { //Cutting off people that just tweeted 1 hashtag htagsatisfied := false //cutoff var for h, o := range myUserNetwork { occur, _ := strconv.Atoi(string(o)) //fmt.Println("Hastag: " + string(h) + " saw " + string(o) + " times") if occur > conf.HashtagOccurrenceCutOff { // Cutting off people that just tweeted the hashtag once myMatrix[string(h)] = append(myMatrix[string(h)], nodecount) // Generating adjacient map htagsatisfied = true //cutoff var, setting true to enable node counting } occurrences, _ := strconv.Atoi(string(o)) myNetworkMatrix[ki64][string(h)] = int8(occurrences) // convert the db to a map } if htagsatisfied { //Cutting off also nodes who satisfied the cuttoff above myMapNetwork[nodecount] = ki64 //mapping Graph user id with Tweet user id mygraph.Nodes = append(mygraph.Nodes, Node{Id: nodecount, Name: string(k), Group: 1}) nodecount++ } } } fmt.Println(">> Preparing graph for " + account) linkscount := 0 for hashtag, users := range myMatrix { for _, userid := range users { for _, userid2 := range users { if userid2 != userid { if int(myNetworkMatrix[myMapNetwork[userid]][hashtag]) > conf.HashtagOccurrenceCutOff { mygraph.Links = append(mygraph.Links, Link{Id: linkscount, Source: userid, Target: userid2, Value: float32(myNetworkMatrix[myMapNetwork[userid]][hashtag])}) linkscount++ } } } } } fmt.Println(">> Writing matrix to csv") utils.WriteCSV(myCSV, account+".csv") fmt.Println(">> Writing graph to json file") // // nUniqueMentions, _ := strconv.Atoi(string(unique_mentions.Data)) // nMentions_to_followed, _ := strconv.Atoi(string(mentions_to_followed.Data)) // nTweets, _ := strconv.Atoi(string(tweets.Data)) // nReTweets, _ := strconv.Atoi(string(retweets.Data)) //mygraph.Nodes = append(mygraph.Nodes, Node{Name: account, Group: group}) // for k, v := range myUniqueMentions { // weight, _ := strconv.Atoi(string(v)) // mygraph.Nodes = append(mygraph.Nodes, Node{Name: string(k), Group: group, Thickness: 0.01, Size: 0.01}) // mygraph.Links = append(mygraph.Links, Link{Source: innercount, Target: nodecount, Value: weight}) // innercount++ // } fileJson, _ := ffjson.Marshal(&mygraph) err = ioutil.WriteFile(account+".output", fileJson, 0644) if err != nil { log.Info("WriteFileJson ERROR: " + err.Error()) } out, err := xml.Marshal(mygraph) out = append([]byte(`<?xml version="1.0" encoding="UTF-8"?><gexf xmlns="http://www.gexf.net/1.2draft" version="1.2"> <meta lastmodifieddate="2009-03-20"> <creator>dark-lab</creator><description>Gephi file</description> </meta>`), out...) out = append(out, []byte(`</gexf>`)...) err = ioutil.WriteFile(account+".output.gexf", out, 0644) if err != nil { log.Info("WriteFileJson ERROR: " + err.Error()) } } }
func GatherData(configurationFile string) { if configurationFile == "" { panic("I can't work without a configuration file") } log.Info("Loading config") conf, err := config.LoadConfig(configurationFile) if err != nil { panic(err) } myTweets := make(map[string]timelinesTweets) api := GetTwitter(&conf) db := nutz.NewStorage(configurationFile+".db", 0600, nil) retweetRegex, _ := regexp.Compile(`^RT`) // detecting retweets for _, account := range conf.TwitterAccounts { log.Info("-== Timeline for Account: %#v ==-\n", account) myTweets[account] = GetTimelines(api, account, conf.FetchFrom) log.Info("-== END TIMELINE for %#v ==-\n", account) } log.Info("Analyzing && collecting data") for i := range myTweets { var retweets int var mymentions int var mentions struct { Name string Indices []int Screen_name string Id int64 Id_str string } var myUniqueMentions map[int64]int myUniqueMentions = make(map[int64]int) fmt.Println("-== Account: " + i + " ==-") fmt.Println("\tTweets: " + strconv.Itoa(len(myTweets[i]))) for _, t := range myTweets[i] { // detecting retweets if retweetRegex.MatchString(t.Text) == true { retweets++ } else { //detecting mentions outside retweets for _, mentions = range t.Entities.User_mentions { mymentions++ if t.InReplyToUserID != 0 { //we are interested only in replies myUniqueMentions[mentions.Id]++ } } } } if conf.FetchFollow == true { log.Info("-== GetFollowers for Account: %#v ==-\n", i) Followers := GetFollowers(api, i) log.Info("-== GetFollowing for Account: %#v ==-\n", i) Following := GetFollowing(api, i) log.Info("-== End getting Following/Followers for Account: %#v ==-\n", i) var Corrispective []int64 var MentionsWithCorrispective []int64 for _, i := range Following { if _, ok := myUniqueMentions[i]; ok { MentionsWithCorrispective = append(MentionsWithCorrispective, i) } if utils.IntInSlice(i, Followers) == true { Corrispective = append(Corrispective, i) } } fmt.Println("\tFollowers: " + strconv.Itoa(len(Followers))) fmt.Println("\tFollowing: " + strconv.Itoa(len(Following))) fmt.Println("\tFollowers && Following: " + strconv.Itoa(len(Corrispective))) fmt.Println("\tBetween mentions, those are whom the user is following: " + strconv.Itoa(len(MentionsWithCorrispective))) //di := DemocracyIndex(len(myUniqueMentions), len(MentionsWithCorrispective), len(myTweets[i]), retweets) om := OutsideMentions(len(myUniqueMentions), len(MentionsWithCorrispective)) apt := AnswerPeopleTax(len(myUniqueMentions), len(MentionsWithCorrispective), len(myTweets[i]), retweets) // fmt.Println("\tDemocracy tax: " + FloatToString(di)) fmt.Println("\tOutside of circle mentions: " + FloatToString(om)) fmt.Println("\t of answering to external people: " + FloatToString(apt)) db.Create(i, "followers", []byte(strconv.Itoa(len(Followers)))) db.Create(i, "following", []byte(strconv.Itoa(len(Following)))) db.Create(i, "followers_followed", []byte(strconv.Itoa(len(Corrispective)))) db.Create(i, "mentions_to_followed", []byte(strconv.Itoa(len(MentionsWithCorrispective)))) } fmt.Println("\tof wich, there are " + strconv.Itoa(retweets) + " retweets") fmt.Println("\tof wich, there are " + strconv.Itoa(len(myUniqueMentions)) + " unique mentions (not in retweets)") fmt.Println("\tof wich, there are " + strconv.Itoa(mymentions) + " total mentions (not in retweets)") db.Create(i, "from", []byte(conf.Date)) db.Create(i, "tweets", []byte(strconv.Itoa(len(myTweets[i])))) db.Create(i, "retweets", []byte(strconv.Itoa(retweets))) db.Create(i, "unique_mentions", []byte(strconv.Itoa(len(myUniqueMentions)))) db.Create(i, "total_mentions", []byte(strconv.Itoa(mymentions))) for k, v := range myUniqueMentions { db.Create(i, strconv.FormatInt(k, 10), []byte(strconv.Itoa(v)), "map_unique_mentions") } // Visualize example: //http://bl.ocks.org/mbostock/4062045 // Circle size is defined by it's radius (r) : .attr("r", 5) // TOOlTIP: http://bl.ocks.org/Caged/6476579 } }
func GatherDataFromAccount(crawler *TwitterCrawler, account string, timeLine timelinesTweets) { retweetRegex, _ := regexp.Compile(`^RT`) // detecting retweets log.Info(">> Depth look on " + account) retweets := 0 db := nutz.NewStorage(account+".db", 0600, nil) fmt.Println("-== Account: " + account + " ==-") fmt.Println("\tTweets: " + strconv.Itoa(len(timeLine))) var SocialNetwork map[string]struct{} SocialNetwork = make(map[string]struct{}) for _, t := range timeLine { // detecting hashtags for _, tag := range t.Entities.Hashtags { if tag.Text != "" { fmt.Println("\tFound hashtag: " + tag.Text) SocialNetwork[tag.Text] = struct{}{} } } if retweetRegex.MatchString(t.Text) == true { retweets++ } } fmt.Println("\tRetweets " + strconv.Itoa(retweets) + " retweets") fmt.Println("\t" + strconv.Itoa(len(SocialNetwork)) + " hashtags") var memory_network map[string]map[string]int memory_network = make(map[string]map[string]int) //Cycling on hashtags for k, _ := range SocialNetwork { db.Create(account, k, []byte(""), GENERATED_HASHTAGS) db.Create(account, "retweets", []byte(strconv.Itoa(retweets))) fmt.Println("\t Searching hashtag: " + k) var MyTweetsNetwork searchTweets // not searching right before we found an hashtag // storing them to be UNIQUE, then in another phase searching deep further if crawler.configuration.Number != 0 { MyTweetsNetwork = crawler.SearchN("#"+k, crawler.configuration.Number, crawler.configuration.Slices) } else { MyTweetsNetwork = crawler.Search("#" + k) } for _, tweet := range MyTweetsNetwork { if _, exists := memory_network[tweet.User.IdStr]; exists { memory_network[tweet.User.ScreenName][k]++ } else { memory_network[tweet.User.ScreenName] = make(map[string]int) memory_network[tweet.User.ScreenName][k]++ } } } for user, tags := range memory_network { for tag, occurrence := range tags { db.Create(account, tag, []byte( strconv.Itoa(occurrence)), MATCHING_HASHTAGS, user) } } }