// 有标识符UID的demo,保证了客户端链接唯一性 func main() { // 开启Teleport错误日志调试 debug.Debug = true tp := teleport.New().SetUID("C2", "abc") tp.Client("127.0.0.1", ":20125", true) tp.Request("我是短链接客户端,我来报个到", "短链接报到", "shortOne") select {} }
// 新建连接 func newManage() *Manage { m := &Manage{ Teleport: teleport.New(), result: make(chan [2]interface{}, 1), } uid := M_WEB + ":" + fmt.Sprint(time.Now().Unix()) m.SetAPI(newManageApi(m)).SetUID(uid).Client(MANANGE_SOCKET_IP, MANANGE_SOCKET_PORT) return m }
func newLogic() *Logic { return &Logic{ AppConf: cache.Task, Traversal: spider.Menu, status: status.STOPPED, Teleport: teleport.New(), TaskJar: distribute.NewTaskJar(), SpiderQueue: crawl.NewSpiderQueue(), CrawlPool: crawl.NewCrawlPool(), } }
// 有标识符UID的demo,保证了客户端链接唯一性 func main() { tp := teleport.New().SetUID("C").SetAPI(teleport.API{ "报到": func(receive *teleport.NetData) *teleport.NetData { log.Printf("默认:%v", receive.Body) return nil }, }) tp.Client("127.0.0.1", ":20125") tp.Request("我是客户端,我来报个到", "报到") select {} }
func newPholcus() *Node { return &Node{ RunMode: cache.Task.RunMode, Port: ":" + strconv.Itoa(cache.Task.Port), Master: cache.Task.Master, Teleport: teleport.New(), TaskJar: task.NewTaskJar(), Spiders: spiderqueue.New(), Crawls: crawlpool.New(), Status: status.RUN, } }
// 无标识符UID的demo func main() { tp := teleport.New() tp.SetAPI(teleport.API{ "报到": func(receive *teleport.NetData) *teleport.NetData { log.Printf("报到:%v", receive.Body) return teleport.ReturnData("我是服务器,我已经收到你的来信") }, }) tp.Server(":20125") select {} }
func NewNode(mode int, port int, master string) *Node { return &Node{ RunMode: mode, Port: ":" + strconv.Itoa(port), Master: master, Teleport: teleport.New(), TaskJar: task.NewTaskJar(), Spiders: spiderqueue.New(), Crawls: crawlpool.New(), Status: status.RUN, } }
func New() App { app := &Logic{ AppConf: cache.Task, Traversal: spider.Menu, Scheduler: scheduler.Sdl, status: status.STOP, Teleport: teleport.New(), TaskJar: distribute.NewTaskJar(), SpiderQueue: crawl.NewSpiderQueue(), CrawlPool: crawl.NewCrawlPool(), } return app }
// 无标识符UID的demo func main() { tp := teleport.New() tp.SetAPI(teleport.API{ "报到": func(receive *teleport.NetData) *teleport.NetData { log.Printf("报到:%v", receive.Body) return teleport.ReturnData("服务器:"+receive.From+"客户端已经报到!", "报到", "C3") }, // 短链接不可以直接转发请求 "短链接报到": func(receive *teleport.NetData) *teleport.NetData { log.Printf("报到:%v", receive.Body) tp.Request("服务器:"+receive.From+"客户端已经报到!", "报到", "C3") return nil }, }).Server(":20125") select {} }
// 使用App前必须进行先Init初始化,SetLog()除外 func (self *Logic) Init(mode int, port int, master string, w ...io.Writer) App { self.canSocketLog = false if len(w) > 0 { self.SetLog(w[0]) } self.LogGoOn() cache.Task.RunMode, cache.Task.Port, cache.Task.Master = mode, port, master self.Traversal = spider.Menu self.RunMode = mode self.Port = ":" + strconv.Itoa(port) self.Master = master self.Teleport = teleport.New() self.TaskJar = distribute.NewTaskJar() self.SpiderQueue = crawl.NewSpiderQueue() self.CrawlPool = crawl.NewCrawlPool() switch self.RunMode { case status.SERVER: if self.checkPort() { logs.Log.SetStealLevel() logs.Log.Informational(" !!当前运行模式为:[ 服务器 ] 模式!!") self.Teleport.SetAPI(distribute.ServerApi(self)).Server(self.Port) } case status.CLIENT: if self.checkAll() { logs.Log.SetStealLevel() logs.Log.Informational(" !!当前运行模式为:[ 客户端 ] 模式!!") self.Teleport.SetAPI(distribute.ClientApi(self)).Client(self.Master, self.Port) } case status.OFFLINE: logs.Log.Informational(" !!当前运行模式为:[ 单机 ] 模式!!") return self default: logs.Log.Warning(" * ——请指定正确的运行模式!——") return self } // 根据RunMode判断是否开启节点间log打印 self.canSocketLog = true go self.socketLog() return self }
// 有标识符UID的demo,保证了客户端链接唯一性 func main() { tp := teleport.New() tp.SetAPI(teleport.API{ "报到": func(receive *teleport.NetData) *teleport.NetData { if receive.Status == teleport.SUCCESS { log.Printf("%v", receive.Body) } if receive.Status == teleport.FAILURE { log.Printf("%v", "请求处理失败!") } return nil }, "非法请求测试": func(receive *teleport.NetData) *teleport.NetData { log.Printf("%v", receive.Body) tp.Close() return nil }, }) tp.Client("127.0.0.1", ":20125") tp.Request("我是客户端,我来报个到", "报到") tp.Request("我是客户端,我来报个到", "非法请求测试") select {} }
func init() { defer func() { // 获取输出方式列表 for out, _ := range Output { OutputLib = append(OutputLib, out) } util.StringsSort(OutputLib) }() /************************ excel 输出 ***************************/ Output["excel"] = func(self *Collector, dataIndex int) { defer func() { if err := recover(); err != nil { Log.Println(err) } }() var file *xlsx.File var sheet *xlsx.Sheet var row *xlsx.Row var cell *xlsx.Cell var err error folder1 := "result/data" folder2 := folder1 + "/" + self.startTime.Format("2006年01月02日 15时04分05秒") filename := folder2 + "/" + util.FileNameReplace(self.Spider.GetName()+"_"+self.Spider.GetKeyword()+" "+fmt.Sprintf("%v", self.sum[0])+"-"+fmt.Sprintf("%v", self.sum[1])) + ".xlsx" // 创建文件 file = xlsx.NewFile() // 添加分类数据工作表 for Name, Rule := range self.GetRules() { // 跳过不输出的数据 if len(Rule.GetOutFeild()) == 0 { continue } // 添加工作表 sheet = file.AddSheet(util.ExcelSheetNameReplace(Name)) // 写入表头 row = sheet.AddRow() for _, title := range Rule.GetOutFeild() { cell = row.AddCell() cell.Value = title } cell = row.AddCell() cell.Value = "当前链接" cell = row.AddCell() cell.Value = "上级链接" cell = row.AddCell() cell.Value = "下载时间" num := 0 //小计 for _, datacell := range self.DockerQueue.Dockers[dataIndex] { if datacell["RuleName"].(string) == Name { row = sheet.AddRow() for _, title := range Rule.GetOutFeild() { cell = row.AddCell() vd := datacell["Data"].(map[string]interface{}) if v, ok := vd[title].(string); ok || vd[title] == nil { cell.Value = v } else { cell.Value = util.JsonString(vd[title]) } } cell = row.AddCell() cell.Value = datacell["Url"].(string) cell = row.AddCell() cell.Value = datacell["ParentUrl"].(string) cell = row.AddCell() cell.Value = datacell["DownloadTime"].(string) num++ } } // Log.Printf("[任务:%v | 关键词:%v | 小类:%v] 输出 %v 条数据!!!\n", self.Spider.GetName(), self.Spider.GetKeyword(), Name, num) } // 创建/打开目录 f2, err := os.Stat(folder2) if err != nil || !f2.IsDir() { if err := os.MkdirAll(folder2, 0777); err != nil { Log.Printf("Error: %v\n", err) } } // 保存文件 err = file.Save(filename) if err != nil { Log.Println(err) } } /************************ CSV 输出 ***************************/ Output["csv"] = func(self *Collector, dataIndex int) { defer func() { if err := recover(); err != nil { Log.Println(err) } }() folder1 := "result/data" folder2 := folder1 + "/" + self.startTime.Format("2006年01月02日 15时04分05秒") filenameBase := folder2 + "/" + util.FileNameReplace(self.Spider.GetName()+"_"+self.Spider.GetKeyword()+" "+fmt.Sprintf("%v", self.sum[0])+"-"+fmt.Sprintf("%v", self.sum[1])) // 创建/打开目录 f2, err := os.Stat(folder2) if err != nil || !f2.IsDir() { if err := os.MkdirAll(folder2, 0777); err != nil { Log.Printf("Error: %v\n", err) } } // 按数据分类创建文件 for Name, Rule := range self.GetRules() { // 跳过不输出的数据 if len(Rule.GetOutFeild()) == 0 { continue } file, err := os.Create(filenameBase + " (" + util.FileNameReplace(Name) + ").csv") if err != nil { Log.Println(err) continue } file.WriteString("\xEF\xBB\xBF") // 写入UTF-8 BOM w := csv.NewWriter(file) th := Rule.GetOutFeild() th = append(th, []string{"当前链接", "上级链接", "下载时间"}...) w.Write(th) num := 0 //小计 for _, datacell := range self.DockerQueue.Dockers[dataIndex] { if datacell["RuleName"].(string) == Name { row := []string{} for _, title := range Rule.GetOutFeild() { vd := datacell["Data"].(map[string]interface{}) if v, ok := vd[title].(string); ok || vd[title] == nil { row = append(row, v) } else { row = append(row, util.JsonString(vd[title])) } } row = append(row, datacell["Url"].(string)) row = append(row, datacell["ParentUrl"].(string)) row = append(row, datacell["DownloadTime"].(string)) w.Write(row) num++ } } // 发送缓存数据流 w.Flush() // 关闭文件 file.Close() // 输出报告 // Log.Printf("[任务:%v | 关键词:%v | 小类:%v] 输出 %v 条数据!!!\n", self.Spider.GetName(), self.Spider.GetKeyword(), Name, num) } } /************************ MongoDB 输出 ***************************/ Output["mgo"] = func(self *Collector, dataIndex int) { session, err := mgo.Dial(config.DB_URL) //连接数据库 if err != nil { panic(err) } defer session.Close() session.SetMode(mgo.Monotonic, true) db := session.DB(config.DB_NAME) //数据库名称 collection := db.C(config.DB_COLLECTION) //如果该集合已经存在的话,则直接返回 for i, count := 0, len(self.DockerQueue.Dockers[dataIndex]); i < count; i++ { err = collection.Insert((interface{})(self.DockerQueue.Dockers[dataIndex][i])) if err != nil { panic(err) } } } /************************ HBase 输出 ***************************/ var master = cache.Task.Master var port = ":" + fmt.Sprintf("%v", cache.Task.Port) var hbaseSocket = teleport.New().SetPackHeader("tentinet") var hbaseOnce sync.Once Output["hbase"] = func(self *Collector, dataIndex int) { hbaseOnce.Do(func() { hbaseSocket.Client(master, port) }) for i, count := 0, len(self.DockerQueue.Dockers[dataIndex]); i < count; i++ { hbaseSocket.Request(self.DockerQueue.Dockers[dataIndex][i], "log") } } /************************ Mysql 输出 ***************************/ Output["mysql"] = func(self *Collector, dataIndex int) { db, err := sql.Open("mysql", config.MYSQL_USER+":"+config.MYSQL_PW+"@tcp("+config.MYSQL_HOST+")/"+config.MYSQL_DB+"?charset=utf8") if err != nil { fmt.Println(err) } defer db.Close() var newMysql = new(myTable) for Name, Rule := range self.GetRules() { //跳过不输出的数据 if len(Rule.GetOutFeild()) == 0 { continue } newMysql.setTableName("`" + self.Spider.GetName() + "-" + Name + "-" + self.Spider.GetKeyword() + "`") for _, title := range Rule.GetOutFeild() { newMysql.addColumn(title) } newMysql.addColumn("当前连接", "上级链接", "下载时间"). create(db) num := 0 //小计 for _, datacell := range self.DockerQueue.Dockers[dataIndex] { if datacell["RuleName"].(string) == Name { for _, title := range Rule.GetOutFeild() { vd := datacell["Data"].(map[string]interface{}) if v, ok := vd[title].(string); ok || vd[title] == nil { newMysql.addRow(v) } else { newMysql.addRow(util.JsonString(vd[title])) } } newMysql.addRow(datacell["Url"].(string), datacell["ParentUrl"].(string), datacell["DownloadTime"].(string)). update(db) num++ } } newMysql = new(myTable) } } }
session, err := mgo.Dial(config.DB_URL) //连接数据库 if err != nil { panic(err) } defer session.Close() session.SetMode(mgo.Monotonic, true) db := session.DB(config.DB_NAME) //数据库名称 collection := db.C(config.DB_COLLECTION) //如果该集合已经存在的话,则直接返回 for i, count := 0, len(self.DockerQueue.Dockers[dataIndex]); i < count; i++ { err = collection.Insert((interface{})(self.DockerQueue.Dockers[dataIndex][i])) if err != nil { panic(err) } } } /************************ HBase 输出 ***************************/ var master = cache.Task.Master var port = ":" + strconv.Itoa(cache.Task.Port) var hbaseSocket = teleport.New().SetPackHeader("tentinet") var once sync.Once func (self *Collector) hbase(dataIndex int) { once.Do(func() { hbaseSocket.Client(master, port) }) for i, count := 0, len(self.DockerQueue.Dockers[dataIndex]); i < count; i++ { hbaseSocket.Request(self.DockerQueue.Dockers[dataIndex][i], "log") } }
// 有标识符UID的demo,保证了客户端链接唯一性 func main() { tp := teleport.New().SetUID("C2", "abc") tp.Client("127.0.0.1", ":20125", true) tp.Request("我是短链接客户端,我来报个到", "短链接报到") select {} }