func appInit() { app.LogicApp = app.New().SetLog(Lsc).AsyncLog(true) spiderMenu = func() (spmenu []map[string]string) { // 获取蜘蛛家族 for _, sp := range app.LogicApp.GetSpiderLib() { spmenu = append(spmenu, map[string]string{"name": sp.GetName(), "description": sp.GetDescription()}) } return spmenu }() }
// [spider frame (golang)] Pholcus(幽灵蛛)是一款纯Go语言编写的高并发、分布式、重量级爬虫软件,支持单机、服务端、客户端三种运行模式,拥有Web、GUI、命令行三种操作界面;规则简单灵活、批量任务并发、输出方式丰富(mysql/mongodb/csv/excel等)、有大量Demo共享;同时她还支持横纵向两种抓取模式,支持模拟登录和任务暂停、取消等一系列高级功能; //(官方QQ群:Go大数据 42731170,欢迎加入我们的讨论)。 // GUI界面版。 package gui import ( "log" "runtime" "github.com/henrylee2cn/pholcus/app" "github.com/henrylee2cn/pholcus/app/spider" "github.com/lxn/walk" ) var LogicApp = app.New().AsyncLog(true) func Run() { // 开启最大核心数运行 runtime.GOMAXPROCS(runtime.NumCPU()) runmodeWindow() } func Init() { LogicApp.Init(Input.RunMode, Input.Port, Input.Master) } func SetTaskConf() { // 纠正协程数 if Input.ThreadNum == 0 { Input.ThreadNum = 1 }
}(conn) for { var req map[string]interface{} if err := ws.JSON.Receive(conn, &req); err != nil { reporter.Printf("websocket接收出错断开 (%v) !", err) return } reporter.Printf("Received from web: %v", req) wsApi[util.Atoa(req["operate"])](conn, req) } } var logicApp = app.New() var spiderMenu = make([]map[string]string, 0) var wsApi = map[string]func(*ws.Conn, map[string]interface{}){} func init() { // 设置log输出目标 logicApp.SetLog(Log) // 初始化运行 wsApi["init"] = func(conn *ws.Conn, req map[string]interface{}) { var mode = util.Atoi(req["mode"]) var port = util.Atoi(req["port"]) var master = util.Atoa(req["ip"]) //服务器(主节点)地址,不含端口 currMode := logicApp.GetRunMode() if currMode == -1 { logicApp.Init(mode, port, master) // 运行模式初始化
import ( "flag" // "bufio" "log" // "os" // "fmt" "runtime" "strconv" "strings" "github.com/henrylee2cn/pholcus/app" "github.com/henrylee2cn/pholcus/spider" ) var LogicApp = app.New() func Run() { // 开启最大核心数运行 runtime.GOMAXPROCS(runtime.NumCPU()) // //运行模式 // modeflag := flag.Int("运行模式", 0, "*运行模式: [0] 单机 [1] 服务端 [2] 客户端\r\n") LogicApp.Init(1, 0, "") // //端口号,非单机模式填写 // portflag := flag.Int("端口号", 0, "端口号: 只填写数字即可,不含冒号\r\n") // //主节点ip,客户端模式填写 // masterflag := flag.String("服务端IP", "127.0.0.1", "主节点IP: 服务端IP地址,不含端口\r\n")
import ( "github.com/henrylee2cn/pholcus/app" "github.com/henrylee2cn/pholcus/app/spider" "github.com/henrylee2cn/pholcus/common/util" "github.com/henrylee2cn/pholcus/config" "github.com/henrylee2cn/pholcus/logs" "github.com/henrylee2cn/pholcus/runtime/status" ws "github.com/henrylee2cn/websocket.google" ) var ( wchan chan interface{} wchanClosed bool isRunning bool logicApp = app.New().SetLog(Log).AsyncLog(true) spiderMenu = func() (spmenu []map[string]string) { // 获取蜘蛛家族 for _, sp := range logicApp.GetSpiderLib() { spmenu = append(spmenu, map[string]string{"name": sp.GetName(), "description": sp.GetDescription()}) } return spmenu }() wsApi = map[string]func(*ws.Conn, map[string]interface{}){} ) func wsHandle(conn *ws.Conn) { wchanClosed = false defer func() { // 连接断开前关闭正在运行的任务
import ( "flag" // "bufio" // "os" // "fmt" "runtime" "strconv" "strings" "github.com/henrylee2cn/pholcus/app" "github.com/henrylee2cn/pholcus/app/spider" "github.com/henrylee2cn/pholcus/logs" "github.com/henrylee2cn/pholcus/runtime/status" ) var LogicApp = app.New().Init(status.OFFLINE, 0, "") func Run() { // 开启最大核心数运行 runtime.GOMAXPROCS(runtime.NumCPU()) // //运行模式 // modeflag := flag.Int("运行模式", 0, "*运行模式: [0] 单机 [1] 服务端 [2] 客户端\r\n") // //端口号,非单机模式填写 // portflag := flag.Int("端口号", 0, "端口号: 只填写数字即可,不含冒号\r\n") // //主节点ip,客户端模式填写 // masterflag := flag.String("服务端IP", "127.0.0.1", "主节点IP: 服务端IP地址,不含端口\r\n") // 蜘蛛列表