// loadTestConfig loads the given test config yaml file. The given path is // assumed to be relative to the `walker/test/` directory, the location of this // test file. func loadTestConfig(filename string) { _, thisname, _, ok := runtime.Caller(0) if !ok { panic("Failed to get location of test source file") } walker.ReadConfigFile(path.Join(path.Dir(thisname), filename)) }
func TestConfigLoadingBadFiles(t *testing.T) { defer func() { // Reset config for the remaining tests loadTestConfig("test-walker.yaml") }() for _, c := range ConfigTestCases { err := walker.ReadConfigFile(c.file) if err == nil { t.Errorf("Expected an error trying to read %v but did not get one", c.file) } else if err.Error() != c.expected { t.Errorf("Reading config %v, expected: %v\nBut got: %v", c.file, c.expected, err) } } }
func TestConfigLoading(t *testing.T) { defer func() { // Reset config for the remaining tests loadTestConfig("test-walker.yaml") }() walker.Config.UserAgent = "Test Agent (set inline)" walker.SetDefaultConfig() expectedAgentInline := "Walker (http://github.com/iParadigms/walker)" if walker.Config.UserAgent != expectedAgentInline { t.Errorf("Failed to reset default config value (user_agent), expected: %v\nBut got: %v", expectedAgentInline, walker.Config.UserAgent) } err := walker.ReadConfigFile("test-walker2.yaml") if err != nil { t.Fatalf(err.Error()) } expectedAgentYaml := "Test Agent (set in yaml)" if walker.Config.UserAgent != expectedAgentYaml { t.Errorf("Failed to set config value (user_agent) via yaml, expected: %v\nBut got: %v", expectedAgentYaml, walker.Config.UserAgent) } }
func init() { walkerCommand := &cobra.Command{ Use: "walker", } var config string walkerCommand.PersistentFlags().StringVarP(&config, "config", "c", "", "path to a config file to load") readConfig := func() { if config != "" { if err := walker.ReadConfigFile(config); err != nil { panic(err.Error()) } } } var noConsole bool = false crawlCommand := &cobra.Command{ Use: "crawl", Short: "start an all-in-one crawler", Run: func(cmd *cobra.Command, args []string) { readConfig() if commander.Datastore == nil { ds, err := walker.NewCassandraDatastore() if err != nil { fatalf("Failed creating Cassandra datastore: %v", err) } commander.Datastore = ds commander.Dispatcher = &walker.CassandraDispatcher{} } if commander.Handler == nil { commander.Handler = &walker.SimpleWriterHandler{} } manager := &walker.FetchManager{ Datastore: commander.Datastore, Handler: commander.Handler, } go manager.Start() if commander.Dispatcher != nil { go func() { err := commander.Dispatcher.StartDispatcher() if err != nil { panic(err.Error()) } }() } if !noConsole { console.Start() } sig := make(chan os.Signal) signal.Notify(sig, syscall.SIGINT) <-sig if commander.Dispatcher != nil { commander.Dispatcher.StopDispatcher() } manager.Stop() }, } crawlCommand.Flags().BoolVarP(&noConsole, "no-console", "C", false, "Do not start the console") walkerCommand.AddCommand(crawlCommand) fetchCommand := &cobra.Command{ Use: "fetch", Short: "start only a walker fetch manager", Run: func(cmd *cobra.Command, args []string) { readConfig() if commander.Datastore == nil { ds, err := walker.NewCassandraDatastore() if err != nil { fatalf("Failed creating Cassandra datastore: %v", err) } commander.Datastore = ds commander.Dispatcher = &walker.CassandraDispatcher{} } if commander.Handler == nil { commander.Handler = &walker.SimpleWriterHandler{} } manager := &walker.FetchManager{ Datastore: commander.Datastore, Handler: commander.Handler, } go manager.Start() sig := make(chan os.Signal) signal.Notify(sig, syscall.SIGINT) <-sig manager.Stop() }, } walkerCommand.AddCommand(fetchCommand) dispatchCommand := &cobra.Command{ Use: "dispatch", Short: "start only a walker dispatcher", Run: func(cmd *cobra.Command, args []string) { readConfig() if commander.Dispatcher == nil { commander.Dispatcher = &walker.CassandraDispatcher{} } go func() { err := commander.Dispatcher.StartDispatcher() if err != nil { panic(err.Error()) } }() sig := make(chan os.Signal) signal.Notify(sig, syscall.SIGINT) <-sig commander.Dispatcher.StopDispatcher() }, } walkerCommand.AddCommand(dispatchCommand) var seedURL string seedCommand := &cobra.Command{ Use: "seed", Short: "add a seed URL to the datastore", Long: `Seed is useful for: - Adding starter links to bootstrap a broad crawl - Adding links when add_new_domains is false - Adding any other link that needs to be crawled soon This command will insert the provided link and also add its domain to the crawl, regardless of the add_new_domains configuration setting.`, Run: func(cmd *cobra.Command, args []string) { readConfig() orig := walker.Config.AddNewDomains defer func() { walker.Config.AddNewDomains = orig }() walker.Config.AddNewDomains = true if seedURL == "" { fatalf("Seed URL needed to execute; add on with --url/-u") } u, err := walker.ParseURL(seedURL) if err != nil { fatalf("Could not parse %v as a url: %v", seedURL, err) } if commander.Datastore == nil { ds, err := walker.NewCassandraDatastore() if err != nil { fatalf("Failed creating Cassandra datastore: %v", err) } commander.Datastore = ds } commander.Datastore.StoreParsedURL(u, nil) }, } seedCommand.Flags().StringVarP(&seedURL, "url", "u", "", "URL to add as a seed") walkerCommand.AddCommand(seedCommand) var outfile string schemaCommand := &cobra.Command{ Use: "schema", Short: "output the walker schema", Long: `Schema prints the walker schema to stdout, substituting schema-relevant configuration items (ex. keyspace, replication factor). Useful for something like: $ <edit walker.yaml as desired> $ walker schema -o schema.cql $ <edit schema.cql further as desired> $ cqlsh -f schema.cql `, Run: func(cmd *cobra.Command, args []string) { readConfig() if outfile == "" { fatalf("An output file is needed to execute; add with --out/-o") } out, err := os.Create(outfile) if err != nil { panic(err.Error()) } defer out.Close() schema, err := walker.GetCassandraSchema() if err != nil { panic(err.Error()) } fmt.Fprint(out, schema) }, } schemaCommand.Flags().StringVarP(&outfile, "out", "o", "", "File to write output to") walkerCommand.AddCommand(schemaCommand) consoleCommand := &cobra.Command{ Use: "console", Short: "Start up the walker console", Run: func(cmd *cobra.Command, args []string) { readConfig() console.Run() }, } walkerCommand.AddCommand(consoleCommand) commander.Command = walkerCommand }