//下载图片 func (self *PipelineMySQL) imgProcess(pageitems *robot.PageItems, task robot.Task) string { if img, ok := pageitems.GetItem("img"); ok { filename, _ := utils.DownloadImage(img, self.ImageStore) return filename } return "" }
func (self *PipelineFile) Process(items *robot.PageItems, t robot.Task) { self.file.WriteString("----------------------------------------------------------------------------------------------\n") self.file.WriteString("Crawled url :\t" + items.GetRequest().GetUrl() + "\n") self.file.WriteString("Crawled result : \n") for key, value := range items.GetAll() { self.file.WriteString(key + "\t:\t" + value + "\n") } }
func (self *PipelineConsole) Process(items *robot.PageItems, t robot.Task) { println("----------------------------------------------------------------------------------------------") println("Crawled url :\t" + items.GetRequest().GetUrl() + "\n") println("Crawled result : ") for key, value := range items.GetAll() { println(key + "\t:\t" + value) } }
func (self *PipelineMongo) Process(pageitems *robot.PageItems, task robot.Task) { items := Items{} items.Test1, _ = pageitems.GetItem("test1") items.Test2, _ = pageitems.GetItem("test2") err := self.c.Insert(&items) if err != nil { panic(err) } }
func (self *PipelineMySQL) Process(pageitems *robot.PageItems, task robot.Task) { if code, ok := pageitems.GetItem("code"); ok && code == "0" { firstid := self.firstProcess(pageitems, task) secondid := self.secondProcess(pageitems, task) picname := self.imgProcess(pageitems, task) if novelid, err := self.novelProcess(pageitems, task, firstid, secondid, picname); err == nil { self.contentProcess(pageitems, task, novelid, firstid, secondid) } else { log.Println(err.Error()) } } }
//如果二级分类存在则略过,不存在存储 func (self *PipelineMySQL) secondProcess(pageitems *robot.PageItems, task robot.Task) int64 { if secondname, ok := pageitems.GetItem("second"); ok { o := orm.NewOrm() second := &Second{ Secondname: secondname, Createtime: time.Now(), Updatetime: time.Now(), } //如果数据不存在 则创建 if _, id, err := o.ReadOrCreate(second, "secondname"); err == nil { return id } } return -1 }
//如果一级分类标签存在则略过, //不存在则将一级分类标签插入数据库,并存储一级分类标签的id func (self *PipelineMySQL) firstProcess(pageitems *robot.PageItems, task robot.Task) int64 { if firstname, ok := pageitems.GetItem("first"); ok { o := orm.NewOrm() first := &First{ Firstname: firstname, Createtime: time.Now(), Updatetime: time.Now(), } if _, id, err := o.ReadOrCreate(first, "firstname"); err == nil { return id } } return -1 }
//添加小说表 func (self *PipelineMySQL) novelProcess(pageitems *robot.PageItems, task robot.Task, firstid, secondid int64, picname string) (int64, error) { items := pageitems.GetAll() o := orm.NewOrm() novel := &Novel{ Title: items["title"], Firstid: firstid, Secondid: secondid, Author: items["author"], Introduction: items["introduction"], Picture: picname, Novelsource: items["novelsource"], Createtime: time.Now(), } //如果数据不存在 则创建 _, id, err := o.ReadOrCreate(novel, "novelsource") return id, err }
func (self *PipelineMySQL) contentProcess(pageitems *robot.PageItems, task robot.Task, novelid, firstid, secondid int64) { items := pageitems.GetAll() chapter, _ := strconv.Atoi(items["chapter"]) o := orm.NewOrm() content := &Content{ Novelid: novelid, Title: items["title"], Firstid: firstid, Secondid: secondid, Chapter: chapter, Subtitle: items["subtitle"], Text: items["content"], Contentsource: items["contenturl"], Createtime: time.Now(), } //如果数据不存在 则创建 _, _, err := o.ReadOrCreate(content, "contentsource") if err == nil { log.Println("创建content成功 subtitle:", items["subtitle"]) } }