func (r *PhantomJSRenderer) Do(s *gryffin.Scan) { r.chanResponse = make(chan *gryffin.Scan, 10) r.chanLinks = make(chan *gryffin.Scan, 10) r.done = make(chan string) // Construct the command. // render.js http(s)://<host>[:port][/path] [{"method":"post", "data":"a=1&b=2"}] url := s.Request.URL.String() cookies := make([]string, 0) // ua := s.Request.UserAgent() ua := "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36" for _, c := range s.Cookies { cookies = append(cookies, c.String()) } arg := input{ Method: s.Request.Method, Headers: inputHeaders{ UserAgent: ua, Cookie: strings.Join(cookies, ";"), }, } opt, err := json.Marshal(arg) if err != nil { s.Error("PhantomjsRenderer.Do", err) return } // s.Logmf("PhantomjsRenderer.Do", "Running: render.js %s '%s'", url, string(opt)) s.Logmf("PhantomjsRenderer.Do", "Running: render.js") cmd := exec.Command( os.Getenv("GOPATH")+"/src/github.com/yahoo/gryffin/renderer/resource/render.js", url, string(opt)) stdout, err := cmd.StdoutPipe() if err != nil { s.Error("PhantomjsRenderer.Do", err) return } if err := cmd.Start(); err != nil { s.Error("PhantomjsRenderer.Do", err) return } r.process = cmd.Process // wait until done or timeout. go r.extract(stdout, s) go r.wait(s) cmd.Wait() }
func (r *PhantomJSRenderer) kill(reason string, s *gryffin.Scan) { if err := r.process.Kill(); err == nil { s.Logmf("PhantomjsRenderer.Do", "[%s] Terminating the crawl process.", reason) } }
func (r *PhantomJSRenderer) Do(s *gryffin.Scan) { r.chanResponse = make(chan *gryffin.Scan, 10) r.chanLinks = make(chan *gryffin.Scan, 10) // Construct the command. // render.js http(s)://<host>[:port][/path] [{"method":"post", "data":"a=1&b=2"}] url := s.Request.URL.String() cookies := make([]string, 0) // ua := s.Request.UserAgent() ua := "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.85 Safari/537.36" for _, c := range s.Cookies { cookies = append(cookies, c.String()) } arg := input{ Method: s.Request.Method, Headers: inputHeaders{ UserAgent: ua, Cookie: strings.Join(cookies, ";"), }, } opt, err := json.Marshal(arg) if err != nil { s.Error("PhantomjsRenderer.Do", err) return } // s.Logmf("PhantomjsRenderer.Do", "Running: render.js %s '%s'", url, string(opt)) s.Logmf("PhantomjsRenderer.Do", "Running: render.js") cmd := exec.Command( os.Getenv("GOPATH")+"/src/github.com/yahoo/gryffin/renderer/resource/render.js", url, string(opt)) stdout, err := cmd.StdoutPipe() if err != nil { s.Error("PhantomjsRenderer.Do", err) return } if err := cmd.Start(); err != nil { s.Error("PhantomjsRenderer.Do", err) return } kill := func(reason string) { if err := cmd.Process.Kill(); err != nil { // TODO - forgive "os: process already finished" s.Error("PhantomjsRenderer.Do", err) // log.Printf("error: %s", err) } else { s.Logmf("PhantomjsRenderer.Do", "[%s] Terminating the crawl process.", reason) } } // Kill when timeout _ = time.Second if r.Timeout != 0 { timeout := func() { <-time.After(time.Duration(r.Timeout) * time.Second) kill("Timeout") } go timeout() } crawl := func() { defer close(r.chanResponse) defer close(r.chanLinks) dec := json.NewDecoder(stdout) for { var m message err := dec.Decode(&m) if err == io.EOF { return break } else { if m.responseMessage != nil { m.Response.fill(s) if s.IsDuplicatedPage() { kill("Duplicated") return } s.Logm("PhantomjsRenderer.Do.UniqueCrawl", m.MsgType) r.chanResponse <- s for _, link := range m.Response.Details.Links { if newScan := link.toScan(s); newScan != nil && newScan.IsScanAllowed() { r.chanLinks <- newScan } } } else if m.domMessage != nil { for _, link := range m.domMessage.Links { if newScan := link.toScan(s); newScan != nil && newScan.IsScanAllowed() { r.chanLinks <- newScan } } } } } cmd.Wait() } go crawl() }