Пример #1
0
// 有标识符UID的demo,保证了客户端链接唯一性
func main() {
	// 开启Teleport错误日志调试
	debug.Debug = true
	tp := teleport.New().SetUID("C2", "abc")
	tp.Client("127.0.0.1", ":20125", true)
	tp.Request("我是短链接客户端,我来报个到", "短链接报到", "shortOne")
	select {}
}
Пример #2
0
// 新建连接
func newManage() *Manage {
	m := &Manage{
		Teleport: teleport.New(),
		result:   make(chan [2]interface{}, 1),
	}
	uid := M_WEB + ":" + fmt.Sprint(time.Now().Unix())
	m.SetAPI(newManageApi(m)).SetUID(uid).Client(MANANGE_SOCKET_IP, MANANGE_SOCKET_PORT)
	return m
}
Пример #3
0
func newLogic() *Logic {
	return &Logic{
		AppConf:     cache.Task,
		Traversal:   spider.Menu,
		status:      status.STOPPED,
		Teleport:    teleport.New(),
		TaskJar:     distribute.NewTaskJar(),
		SpiderQueue: crawl.NewSpiderQueue(),
		CrawlPool:   crawl.NewCrawlPool(),
	}
}
Пример #4
0
// 有标识符UID的demo,保证了客户端链接唯一性
func main() {
	tp := teleport.New().SetUID("C").SetAPI(teleport.API{
		"报到": func(receive *teleport.NetData) *teleport.NetData {
			log.Printf("默认:%v", receive.Body)
			return nil
		},
	})
	tp.Client("127.0.0.1", ":20125")
	tp.Request("我是客户端,我来报个到", "报到")
	select {}
}
Пример #5
0
func newPholcus() *Node {
	return &Node{
		RunMode:  cache.Task.RunMode,
		Port:     ":" + strconv.Itoa(cache.Task.Port),
		Master:   cache.Task.Master,
		Teleport: teleport.New(),
		TaskJar:  task.NewTaskJar(),
		Spiders:  spiderqueue.New(),
		Crawls:   crawlpool.New(),
		Status:   status.RUN,
	}
}
Пример #6
0
// 无标识符UID的demo
func main() {
	tp := teleport.New()
	tp.SetAPI(teleport.API{
		"报到": func(receive *teleport.NetData) *teleport.NetData {
			log.Printf("报到:%v", receive.Body)
			return teleport.ReturnData("我是服务器,我已经收到你的来信")
		},
	})

	tp.Server(":20125")
	select {}
}
Пример #7
0
func NewNode(mode int, port int, master string) *Node {
	return &Node{
		RunMode:  mode,
		Port:     ":" + strconv.Itoa(port),
		Master:   master,
		Teleport: teleport.New(),
		TaskJar:  task.NewTaskJar(),
		Spiders:  spiderqueue.New(),
		Crawls:   crawlpool.New(),
		Status:   status.RUN,
	}
}
Пример #8
0
func New() App {
	app := &Logic{
		AppConf:     cache.Task,
		Traversal:   spider.Menu,
		Scheduler:   scheduler.Sdl,
		status:      status.STOP,
		Teleport:    teleport.New(),
		TaskJar:     distribute.NewTaskJar(),
		SpiderQueue: crawl.NewSpiderQueue(),
		CrawlPool:   crawl.NewCrawlPool(),
	}
	return app
}
Пример #9
0
// 无标识符UID的demo
func main() {
	tp := teleport.New()
	tp.SetAPI(teleport.API{
		"报到": func(receive *teleport.NetData) *teleport.NetData {
			log.Printf("报到:%v", receive.Body)
			return teleport.ReturnData("服务器:"+receive.From+"客户端已经报到!", "报到", "C3")
		},

		// 短链接不可以直接转发请求
		"短链接报到": func(receive *teleport.NetData) *teleport.NetData {
			log.Printf("报到:%v", receive.Body)
			tp.Request("服务器:"+receive.From+"客户端已经报到!", "报到", "C3")
			return nil
		},
	}).Server(":20125")
	select {}
}
Пример #10
0
// 使用App前必须进行先Init初始化,SetLog()除外
func (self *Logic) Init(mode int, port int, master string, w ...io.Writer) App {
	self.canSocketLog = false
	if len(w) > 0 {
		self.SetLog(w[0])
	}
	self.LogGoOn()

	cache.Task.RunMode, cache.Task.Port, cache.Task.Master = mode, port, master

	self.Traversal = spider.Menu
	self.RunMode = mode
	self.Port = ":" + strconv.Itoa(port)
	self.Master = master
	self.Teleport = teleport.New()
	self.TaskJar = distribute.NewTaskJar()
	self.SpiderQueue = crawl.NewSpiderQueue()
	self.CrawlPool = crawl.NewCrawlPool()

	switch self.RunMode {
	case status.SERVER:
		if self.checkPort() {
			logs.Log.SetStealLevel()
			logs.Log.Informational("                                                                                               !!当前运行模式为:[ 服务器 ] 模式!!")
			self.Teleport.SetAPI(distribute.ServerApi(self)).Server(self.Port)
		}

	case status.CLIENT:
		if self.checkAll() {
			logs.Log.SetStealLevel()
			logs.Log.Informational("                                                                                               !!当前运行模式为:[ 客户端 ] 模式!!")
			self.Teleport.SetAPI(distribute.ClientApi(self)).Client(self.Master, self.Port)
		}
	case status.OFFLINE:
		logs.Log.Informational("                                                                                               !!当前运行模式为:[ 单机 ] 模式!!")
		return self
	default:
		logs.Log.Warning(" *    ——请指定正确的运行模式!——")
		return self
	}
	// 根据RunMode判断是否开启节点间log打印
	self.canSocketLog = true
	go self.socketLog()
	return self
}
Пример #11
0
// 有标识符UID的demo,保证了客户端链接唯一性
func main() {
	tp := teleport.New()
	tp.SetAPI(teleport.API{
		"报到": func(receive *teleport.NetData) *teleport.NetData {
			if receive.Status == teleport.SUCCESS {
				log.Printf("%v", receive.Body)
			}
			if receive.Status == teleport.FAILURE {
				log.Printf("%v", "请求处理失败!")
			}
			return nil
		},
		"非法请求测试": func(receive *teleport.NetData) *teleport.NetData {
			log.Printf("%v", receive.Body)
			tp.Close()
			return nil
		},
	})
	tp.Client("127.0.0.1", ":20125")
	tp.Request("我是客户端,我来报个到", "报到")
	tp.Request("我是客户端,我来报个到", "非法请求测试")
	select {}
}
Пример #12
0
func init() {
	defer func() {
		// 获取输出方式列表
		for out, _ := range Output {
			OutputLib = append(OutputLib, out)
		}
		util.StringsSort(OutputLib)
	}()

	/************************ excel 输出 ***************************/
	Output["excel"] = func(self *Collector, dataIndex int) {
		defer func() {
			if err := recover(); err != nil {
				Log.Println(err)
			}
		}()

		var file *xlsx.File
		var sheet *xlsx.Sheet
		var row *xlsx.Row
		var cell *xlsx.Cell
		var err error

		folder1 := "result/data"
		folder2 := folder1 + "/" + self.startTime.Format("2006年01月02日 15时04分05秒")
		filename := folder2 + "/" + util.FileNameReplace(self.Spider.GetName()+"_"+self.Spider.GetKeyword()+" "+fmt.Sprintf("%v", self.sum[0])+"-"+fmt.Sprintf("%v", self.sum[1])) + ".xlsx"

		// 创建文件
		file = xlsx.NewFile()

		// 添加分类数据工作表
		for Name, Rule := range self.GetRules() {
			// 跳过不输出的数据
			if len(Rule.GetOutFeild()) == 0 {
				continue
			}
			// 添加工作表
			sheet = file.AddSheet(util.ExcelSheetNameReplace(Name))
			// 写入表头
			row = sheet.AddRow()
			for _, title := range Rule.GetOutFeild() {
				cell = row.AddCell()
				cell.Value = title
			}
			cell = row.AddCell()
			cell.Value = "当前链接"
			cell = row.AddCell()
			cell.Value = "上级链接"
			cell = row.AddCell()
			cell.Value = "下载时间"

			num := 0 //小计
			for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
				if datacell["RuleName"].(string) == Name {
					row = sheet.AddRow()
					for _, title := range Rule.GetOutFeild() {
						cell = row.AddCell()
						vd := datacell["Data"].(map[string]interface{})
						if v, ok := vd[title].(string); ok || vd[title] == nil {
							cell.Value = v
						} else {
							cell.Value = util.JsonString(vd[title])
						}
					}
					cell = row.AddCell()
					cell.Value = datacell["Url"].(string)
					cell = row.AddCell()
					cell.Value = datacell["ParentUrl"].(string)
					cell = row.AddCell()
					cell.Value = datacell["DownloadTime"].(string)
					num++
				}
			}

			// Log.Printf("[任务:%v | 关键词:%v | 小类:%v] 输出 %v 条数据!!!\n", self.Spider.GetName(), self.Spider.GetKeyword(), Name, num)

		}

		// 创建/打开目录
		f2, err := os.Stat(folder2)
		if err != nil || !f2.IsDir() {
			if err := os.MkdirAll(folder2, 0777); err != nil {
				Log.Printf("Error: %v\n", err)
			}
		}

		// 保存文件
		err = file.Save(filename)

		if err != nil {
			Log.Println(err)
		}

	}

	/************************ CSV 输出 ***************************/
	Output["csv"] = func(self *Collector, dataIndex int) {
		defer func() {
			if err := recover(); err != nil {
				Log.Println(err)
			}
		}()

		folder1 := "result/data"
		folder2 := folder1 + "/" + self.startTime.Format("2006年01月02日 15时04分05秒")
		filenameBase := folder2 + "/" + util.FileNameReplace(self.Spider.GetName()+"_"+self.Spider.GetKeyword()+" "+fmt.Sprintf("%v", self.sum[0])+"-"+fmt.Sprintf("%v", self.sum[1]))

		// 创建/打开目录
		f2, err := os.Stat(folder2)
		if err != nil || !f2.IsDir() {
			if err := os.MkdirAll(folder2, 0777); err != nil {
				Log.Printf("Error: %v\n", err)
			}
		}

		// 按数据分类创建文件
		for Name, Rule := range self.GetRules() {
			// 跳过不输出的数据
			if len(Rule.GetOutFeild()) == 0 {
				continue
			}

			file, err := os.Create(filenameBase + " (" + util.FileNameReplace(Name) + ").csv")

			if err != nil {
				Log.Println(err)
				continue
			}

			file.WriteString("\xEF\xBB\xBF") // 写入UTF-8 BOM
			w := csv.NewWriter(file)
			th := Rule.GetOutFeild()
			th = append(th, []string{"当前链接", "上级链接", "下载时间"}...)
			w.Write(th)

			num := 0 //小计
			for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
				if datacell["RuleName"].(string) == Name {
					row := []string{}
					for _, title := range Rule.GetOutFeild() {
						vd := datacell["Data"].(map[string]interface{})
						if v, ok := vd[title].(string); ok || vd[title] == nil {
							row = append(row, v)
						} else {
							row = append(row, util.JsonString(vd[title]))
						}
					}

					row = append(row, datacell["Url"].(string))
					row = append(row, datacell["ParentUrl"].(string))
					row = append(row, datacell["DownloadTime"].(string))
					w.Write(row)

					num++
				}
			}
			// 发送缓存数据流
			w.Flush()
			// 关闭文件
			file.Close()
			// 输出报告
			// Log.Printf("[任务:%v | 关键词:%v | 小类:%v] 输出 %v 条数据!!!\n", self.Spider.GetName(), self.Spider.GetKeyword(), Name, num)
		}
	}

	/************************ MongoDB 输出 ***************************/

	Output["mgo"] = func(self *Collector, dataIndex int) {
		session, err := mgo.Dial(config.DB_URL) //连接数据库
		if err != nil {
			panic(err)
		}
		defer session.Close()
		session.SetMode(mgo.Monotonic, true)

		db := session.DB(config.DB_NAME)         //数据库名称
		collection := db.C(config.DB_COLLECTION) //如果该集合已经存在的话,则直接返回

		for i, count := 0, len(self.DockerQueue.Dockers[dataIndex]); i < count; i++ {
			err = collection.Insert((interface{})(self.DockerQueue.Dockers[dataIndex][i]))
			if err != nil {
				panic(err)
			}
		}
	}

	/************************ HBase 输出 ***************************/
	var master = cache.Task.Master
	var port = ":" + fmt.Sprintf("%v", cache.Task.Port)
	var hbaseSocket = teleport.New().SetPackHeader("tentinet")
	var hbaseOnce sync.Once

	Output["hbase"] = func(self *Collector, dataIndex int) {
		hbaseOnce.Do(func() { hbaseSocket.Client(master, port) })
		for i, count := 0, len(self.DockerQueue.Dockers[dataIndex]); i < count; i++ {
			hbaseSocket.Request(self.DockerQueue.Dockers[dataIndex][i], "log")
		}
	}

	/************************ Mysql 输出 ***************************/
	Output["mysql"] = func(self *Collector, dataIndex int) {
		db, err := sql.Open("mysql", config.MYSQL_USER+":"+config.MYSQL_PW+"@tcp("+config.MYSQL_HOST+")/"+config.MYSQL_DB+"?charset=utf8")
		if err != nil {
			fmt.Println(err)
		}
		defer db.Close()

		var newMysql = new(myTable)

		for Name, Rule := range self.GetRules() {
			//跳过不输出的数据
			if len(Rule.GetOutFeild()) == 0 {
				continue
			}

			newMysql.setTableName("`" + self.Spider.GetName() + "-" + Name + "-" + self.Spider.GetKeyword() + "`")

			for _, title := range Rule.GetOutFeild() {
				newMysql.addColumn(title)
			}

			newMysql.addColumn("当前连接", "上级链接", "下载时间").
				create(db)

			num := 0 //小计

			for _, datacell := range self.DockerQueue.Dockers[dataIndex] {
				if datacell["RuleName"].(string) == Name {
					for _, title := range Rule.GetOutFeild() {
						vd := datacell["Data"].(map[string]interface{})
						if v, ok := vd[title].(string); ok || vd[title] == nil {
							newMysql.addRow(v)
						} else {
							newMysql.addRow(util.JsonString(vd[title]))
						}
					}
					newMysql.addRow(datacell["Url"].(string), datacell["ParentUrl"].(string), datacell["DownloadTime"].(string)).
						update(db)

					num++
				}
			}
			newMysql = new(myTable)
		}
	}
}
Пример #13
0
	session, err := mgo.Dial(config.DB_URL) //连接数据库
	if err != nil {
		panic(err)
	}
	defer session.Close()
	session.SetMode(mgo.Monotonic, true)

	db := session.DB(config.DB_NAME)         //数据库名称
	collection := db.C(config.DB_COLLECTION) //如果该集合已经存在的话,则直接返回

	for i, count := 0, len(self.DockerQueue.Dockers[dataIndex]); i < count; i++ {
		err = collection.Insert((interface{})(self.DockerQueue.Dockers[dataIndex][i]))
		if err != nil {
			panic(err)
		}
	}
}

/************************ HBase 输出 ***************************/
var master = cache.Task.Master
var port = ":" + strconv.Itoa(cache.Task.Port)
var hbaseSocket = teleport.New().SetPackHeader("tentinet")
var once sync.Once

func (self *Collector) hbase(dataIndex int) {
	once.Do(func() { hbaseSocket.Client(master, port) })
	for i, count := 0, len(self.DockerQueue.Dockers[dataIndex]); i < count; i++ {
		hbaseSocket.Request(self.DockerQueue.Dockers[dataIndex][i], "log")
	}
}
Пример #14
0
// 有标识符UID的demo,保证了客户端链接唯一性
func main() {
	tp := teleport.New().SetUID("C2", "abc")
	tp.Client("127.0.0.1", ":20125", true)
	tp.Request("我是短链接客户端,我来报个到", "短链接报到")
	select {}
}