func (w *OcrRpcWorker) handle(deliveries <-chan amqp.Delivery, done chan error) { for d := range deliveries { logg.LogTo( "OCR_WORKER", "got %d byte delivery: [%v]. Routing key: %v Reply to: %v", len(d.Body), d.DeliveryTag, d.RoutingKey, d.ReplyTo, ) ocrResult, err := w.resultForDelivery(d) if err != nil { msg := "Error generating ocr result. Error: %v" logg.LogError(fmt.Errorf(msg, err)) } logg.LogTo("OCR_WORKER", "Sending rpc response: %v", ocrResult) err = w.sendRpcResponse(ocrResult, d.ReplyTo, d.CorrelationId) if err != nil { msg := "Error returning ocr result: %v. Error: %v" logg.LogError(fmt.Errorf(msg, ocrResult, err)) // if we can't send our response, let's just abort done <- err break } } logg.LogTo("OCR_WORKER", "handle: deliveries channel closed") done <- fmt.Errorf("handle: deliveries channel closed") }
func (w *OcrRpcWorker) resultForDelivery(d amqp.Delivery) (OcrResult, error) { ocrRequest := OcrRequest{} ocrResult := OcrResult{Text: "Error"} err := json.Unmarshal(d.Body, &ocrRequest) if err != nil { msg := "Error unmarshaling json: %v. Error: %v" errMsg := fmt.Sprintf(msg, string(d.Body), err) logg.LogError(fmt.Errorf(errMsg)) ocrResult.Text = errMsg return ocrResult, err } ocrEngine := NewOcrEngine(ocrRequest.EngineType) ocrResult, err = ocrEngine.ProcessRequest(ocrRequest) if err != nil { msg := "Error processing image url: %v. Error: %v" errMsg := fmt.Sprintf(msg, ocrRequest.ImgUrl, err) logg.LogError(fmt.Errorf(errMsg)) ocrResult.Text = errMsg return ocrResult, err } return ocrResult, nil }
func (s *OcrHttpMultipartHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { defer req.Body.Close() ocrRequest, err := s.extractParts(req) if err != nil { logg.LogError(err) errStr := fmt.Sprintf("Error extracting multipart/related parts: %v", err) http.Error(w, errStr, 500) return } logg.LogTo("OCR_HTTP", "ocrRequest: %v", ocrRequest) ocrResult, err := HandleOcrRequest(ocrRequest, s.RabbitConfig) if err != nil { msg := "Unable to perform OCR decode. Error: %v" errMsg := fmt.Sprintf(msg, err) logg.LogError(fmt.Errorf(errMsg)) http.Error(w, errMsg, 500) return } logg.LogTo("OCR_HTTP", "ocrResult: %v", ocrResult) fmt.Fprintf(w, ocrResult.Text) }
// Gin middleware to connnect to the Sync Gw database given in the // dbUrl parameter, and set the connection object into the context. // This creates a new connection for each request, which is ultra-conservative // in case the connection object isn't safe to use among multiple goroutines // (and I believe it is). If it becomes a bottleneck, it's easy to create // another middleware that re-uses an existing connection. func DbConnector(dbUrl string) gin.HandlerFunc { return func(c *gin.Context) { // make sure the db url does not have a trailing slash if strings.HasSuffix(dbUrl, "/") { err := errors.New(fmt.Sprintf("dbUrl needs trailing slash: %v", dbUrl)) logg.LogError(err) c.Fail(500, err) return } db, err := couch.Connect(dbUrl) if err != nil { err = errors.New(fmt.Sprintf("Error %v | dbUrl: %v", err, dbUrl)) logg.LogError(err) c.Fail(500, err) return } c.Set(MIDDLEWARE_KEY_DB, db) c.Next() } }
func (s *OcrHttpHandler) ServeHTTP(w http.ResponseWriter, req *http.Request) { logg.LogTo("OCR_HTTP", "serveHttp called") defer req.Body.Close() ocrRequest := OcrRequest{} decoder := json.NewDecoder(req.Body) err := decoder.Decode(&ocrRequest) if err != nil { logg.LogError(err) http.Error(w, "Unable to unmarshal json", 500) return } ocrResult, err := HandleOcrRequest(ocrRequest, s.RabbitConfig) if err != nil { msg := "Unable to perform OCR decode. Error: %v" errMsg := fmt.Sprintf(msg, err) logg.LogError(fmt.Errorf(errMsg)) http.Error(w, errMsg, 500) return } logg.LogTo("OCR_HTTP", "ocrResult: %v", ocrResult) fmt.Fprintf(w, ocrResult.Text) }
// Codereview: de-dupe func (j TrainingJob) recordProcessingError(err error) { logg.LogError(err) db := j.Configuration.DbConnection() if err := j.Failed(db, err); err != nil { errMsg := fmt.Errorf("Error setting training job as failed: %v", err) logg.LogError(errMsg) } }
// Codereview: de-dupe func (d DatasetSplitter) recordProcessingError(err error) { logg.LogError(err) db := d.Configuration.DbConnection() if err := d.Dataset.Failed(db, err); err != nil { errMsg := fmt.Errorf("Error setting dataset as failed: %v", err) logg.LogError(errMsg) } }
func (w *PreprocessorRpcWorker) handleDelivery(d amqp.Delivery) error { ocrRequest := OcrRequest{} err := json.Unmarshal(d.Body, &ocrRequest) if err != nil { msg := "Error unmarshaling json: %v. Error: %v" errMsg := fmt.Sprintf(msg, string(d.Body), err) logg.LogError(fmt.Errorf(errMsg)) return err } logg.LogTo("PREPROCESSOR_WORKER", "ocrRequest before: %v", ocrRequest) routingKey := ocrRequest.nextPreprocessor(w.rabbitConfig.RoutingKey) logg.LogTo("PREPROCESSOR_WORKER", "publishing with routing key %q", routingKey) logg.LogTo("PREPROCESSOR_WORKER", "ocrRequest after: %v", ocrRequest) err = w.preprocessImage(&ocrRequest) if err != nil { msg := "Error preprocessing image: %v. Error: %v" errMsg := fmt.Sprintf(msg, ocrRequest, err) logg.LogError(fmt.Errorf(errMsg)) return err } ocrRequestJson, err := json.Marshal(ocrRequest) if err != nil { return err } logg.LogTo("PREPROCESSOR_WORKER", "sendRpcResponse to: %v", routingKey) if err := w.channel.Publish( w.rabbitConfig.Exchange, // publish to an exchange routingKey, // routing to 0 or more queues false, // mandatory false, // immediate amqp.Publishing{ Headers: amqp.Table{}, ContentType: "text/plain", ContentEncoding: "", Body: []byte(ocrRequestJson), DeliveryMode: amqp.Transient, // 1=non-persistent, 2=persistent Priority: 0, // 0-9 ReplyTo: d.ReplyTo, CorrelationId: d.CorrelationId, // a bunch of application/implementation-specific fields }, ); err != nil { return err } logg.LogTo("PREPROCESSOR_WORKER", "handleDelivery succeeded") return nil }
// Copy the contents of Datafile.Url to CBFS and return the cbfs dest path func (d Datafile) CopyToBlobStore(db couch.Database, blobStore BlobStore) (string, error) { if !d.HasValidId() { errMsg := fmt.Errorf("Datafile: %+v must have an id", d) logg.LogError(errMsg) return "", errMsg } if len(d.Url) == 0 { errMsg := fmt.Errorf("Datafile: %+v must have a non empty url", d) logg.LogError(errMsg) return "", errMsg } logg.LogTo("MODEL", "datafile url: |%v|", d.Url) // figure out dest path to save to on blobStore u, err := url.Parse(d.Url) if err != nil { errMsg := fmt.Errorf("Error parsing: %v. Err %v", d.Url, err) logg.LogError(errMsg) return "", errMsg } urlPath := u.Path _, filename := path.Split(urlPath) destPath := fmt.Sprintf("%v/%v", d.Id, filename) // open input stream to url resp, err := http.Get(d.Url) if err != nil { errMsg := fmt.Errorf("Error opening: %v. Err %v", d.Url, err) logg.LogError(errMsg) return "", errMsg } defer resp.Body.Close() // write to blobStore options := BlobPutOptions{} options.ContentType = resp.Header.Get("Content-Type") if err := blobStore.Put("", destPath, resp.Body, options); err != nil { errMsg := fmt.Errorf("Error writing %v to blobStore: %v", destPath, err) logg.LogError(errMsg) return "", errMsg } logg.LogTo("MODEL", "copied datafile url %v to blobStore: %v", d.Url, destPath) return destPath, nil }
func stressTest(doneChannel chan<- bool) { imageUrls := imageUrls() logg.LogTo("CLI", "imageUrls: %v", imageUrls) logg.LogTo("CLI", "numIterations: %v", *numIterations) openOcrUrl := *ocrUrl client := ocrclient.NewHttpClient(openOcrUrl) for i := 0; i < *numIterations; i++ { index := randomIntInRange(0, numTestImages) imageUrl := imageUrls[index] logg.LogTo("CLI", "OCR decoding: %v. index: %d", imageUrl, index) ocrRequest := ocrclient.OcrRequest{ ImgUrl: imageUrl, EngineType: ocrclient.ENGINE_TESSERACT, } ocrDecoded, err := client.DecodeImageUrl(ocrRequest) if err != nil { logg.LogError(fmt.Errorf("Error decoding image: %v", err)) } else { logg.LogTo("CLI", "OCR decoded: %v", ocrDecoded) } } doneChannel <- true }
func main() { var preprocessor string flagFunc := func() { flag.StringVar( &preprocessor, "preprocessor", "identity", "The preprocessor to use, eg, stroke-width-transform", ) } rabbitConfig := ocrworker.DefaultConfigFlagsOverride(flagFunc) // inifinite loop, since sometimes worker <-> rabbitmq connection // gets broken. see https://github.com/tleyden/open-ocr/issues/4 for { logg.LogTo("PREPROCESSOR_WORKER", "Creating new Preprocessor Worker") preprocessorWorker, err := ocrworker.NewPreprocessorRpcWorker( rabbitConfig, preprocessor, ) if err != nil { logg.LogPanic("Could not create rpc worker: %v", err) } preprocessorWorker.Run() // this happens when connection is closed err = <-preprocessorWorker.Done logg.LogError(fmt.Errorf("Preprocessor Worker failed with error: %v", err)) } }
func NewGameStateFromString(jsonString string) GameState { gameState := &GameState{} jsonBytes := []byte(jsonString) err := json.Unmarshal(jsonBytes, gameState) if err != nil { logg.LogError(err) } return *gameState }
// Read from source tar stream and write training and test to given tar writers func (d DatasetSplitter) transform(source *tar.Reader, train, test *tar.Writer) error { splitter := d.splitter(train, test) for { hdr, err := source.Next() if err == io.EOF { // end of tar archive break } if err != nil { return err } tw := splitter(hdr.Name) if err := tw.WriteHeader(hdr); err != nil { return err } _, err = io.Copy(tw, source) if err != nil { return err } } // close writers if err := train.Close(); err != nil { errMsg := fmt.Errorf("Error closing tar writer: %v", err) logg.LogError(errMsg) return err } if err := test.Close(); err != nil { errMsg := fmt.Errorf("Error closing tar reader: %v", err) logg.LogError(errMsg) return err } return nil }
func (j TrainingJob) getSolver() (*Solver, error) { db := j.Configuration.DbConnection() solver := &Solver{} err := db.Retrieve(j.SolverId, solver) if err != nil { errMsg := fmt.Errorf("Didn't retrieve: %v - %v", j.SolverId, err) logg.LogError(errMsg) return nil, errMsg } solver.Configuration = j.Configuration return solver, nil }
func (game *Game) updateUserGameNumber(gameState GameState) { gameNumberChanged := (game.gameState.Number != gameState.Number) if gameNumberChanged { game.user.GameNumber = gameState.Number newRevision, err := game.db.Edit(game.user) if err != nil { logg.LogError(err) return } logg.LogTo("MAIN", "user update, rev: %v", newRevision) } }
func TestOcrEngineTypeJson(t *testing.T) { testJson := `{"img_url":"foo", "engine":"tesseract"}` ocrRequest := OcrRequest{} err := json.Unmarshal([]byte(testJson), &ocrRequest) if err != nil { logg.LogError(err) } assert.True(t, err == nil) assert.Equals(t, ocrRequest.EngineType, ENGINE_TESSERACT) logg.LogTo("TEST", "ocrRequest: %v", ocrRequest) }
func FakePiece() Piece { jsonString := `{"location":7,"validMoves":[{"locations":[1],"captures":[{"team":1,"piece":11}],"king":true}]}` piecePtr := &Piece{} jsonBytes := []byte(jsonString) err := json.Unmarshal(jsonBytes, piecePtr) if err != nil { logg.LogError(err) } piece := *piecePtr return piece }
func HandleOcrRequest(ocrRequest OcrRequest, rabbitConfig RabbitConfig) (OcrResult, error) { switch ocrRequest.InplaceDecode { case true: // inplace decode: short circuit rabbitmq, and just call // ocr engine directly ocrEngine := NewOcrEngine(ocrRequest.EngineType) ocrResult, err := ocrEngine.ProcessRequest(ocrRequest) if err != nil { msg := "Error processing ocr request. Error: %v" errMsg := fmt.Sprintf(msg, err) logg.LogError(fmt.Errorf(errMsg)) return OcrResult{}, err } return ocrResult, nil default: // add a new job to rabbitmq and wait for worker to respond w/ result ocrClient, err := NewOcrRpcClient(rabbitConfig) if err != nil { logg.LogError(err) return OcrResult{}, err } ocrResult, err := ocrClient.DecodeImage(ocrRequest) if err != nil { logg.LogError(err) return OcrResult{}, err } return ocrResult, nil } }
func (n NsqWorker) HandleEvents() { // not really sure if I need to use channels at all here, // since at the moment there is only one worker channelName := "channel" // pull event off of nsql topic config := nsq.NewConfig() q, _ := nsq.NewConsumer(n.Configuration.NsqdTopic, channelName, config) q.AddHandler(nsq.HandlerFunc(func(message *nsq.Message) error { logg.LogTo("NSQ_WORKER", "Got a message!: %v", string(message.Body)) // create jobDescriptor from json jobDescriptor := JobDescriptor{} err := json.Unmarshal(message.Body, &jobDescriptor) if err != nil { bodyStr := string(message.Body) logg.LogTo("NSQ_WORKER", "Error unmarshalling msg: %v", bodyStr) return err } logg.LogTo("NSQ_WORKER", "Job descriptor: %+v", jobDescriptor) // create job from job descriptor job, err := CreateJob(n.Configuration, jobDescriptor) if err != nil { logg.LogTo("NSQ_WORKER", "Error creating job from: %+v", jobDescriptor) return err } logg.LogTo("NSQ_WORKER", "Job: %+v", job) // run job go job.Run(nil) return nil })) err := q.ConnectToNSQLookupd(n.Configuration.NsqLookupdUrl) if err != nil { errMsg := fmt.Errorf("Error connecting to nsq: %v", err) logg.LogError(errMsg) } logg.LogTo("NSQ_WORKER", "connected to nsq as a consumer") }
func (w *PreprocessorRpcWorker) preprocessImage(ocrRequest *OcrRequest) error { descriptor := w.bindingKey // eg, "stroke-width-transform" preprocessor := w.preprocessorMap[descriptor] logg.LogTo("PREPROCESSOR_WORKER", "Preproces %v via %v", ocrRequest, descriptor) err := preprocessor.preprocess(ocrRequest) if err != nil { msg := "Error doing %s on: %v. Error: %v" errMsg := fmt.Sprintf(msg, descriptor, ocrRequest, err) logg.LogError(fmt.Errorf(errMsg)) return err } return nil }
func TestGetModifiedSolverSpec(t *testing.T) { protoText := ` # The train/test net protocol buffer definition net: "this_should_get_replaced" # test_iter specifies how many forward passes the test should carry out. # In the case of MNIST, we have test batch size 100 and 100 test iterations, # covering the full 10,000 testing images. test_iter: 100 # Carry out testing every 500 training iterations. test_interval: 500 # The base learning rate, momentum and the weight decay of the network. base_lr: 0.01 momentum: 0.9 weight_decay: 0.0005 # The learning rate policy lr_policy: "inv" gamma: 0.0001 power: 0.75 # Display every 100 iterations display: 100 # The maximum number of iterations max_iter: 10000 # snapshot intermediate results snapshot: 5000 snapshot_prefix: "snapshot" # solver mode: CPU or GPU solver_mode: CPU` modifiedBytes, err := modifySolverSpec([]byte(protoText)) if err != nil { logg.LogError(err) } assert.True(t, err == nil) assert.True(t, len(modifiedBytes) != 0) logg.LogTo("TEST", "modified prototxt: %v", string(modifiedBytes)) // instantiate proto object based on modified bytes solverParam := &caffe.SolverParameter{} err = proto.UnmarshalText(string(modifiedBytes), solverParam) assert.True(t, err == nil) assert.True(t, solverParam.Net != nil) assert.Equals(t, *(solverParam.Net), "solver-net.prototxt") assert.Equals(t, *(solverParam.SnapshotPrefix), "snapshot") }
// - make sure one of the changes is a game, if not, ignore it // - get the latest game document // - if it's not our turn, do nothing // - if it is our turn // - call thinker to calculate next move // - make next move by inserting a new revision of votes doc func (game *Game) handleChanges(changes Changes) { gameDocChanged := game.hasGameDocChanged(changes) if gameDocChanged { gameState, err := game.fetchLatestGameState() if err != nil { logg.LogError(err) return } game.updateUserGameNumber(gameState) game.gameState = gameState if isOurTurn := game.isOurTurn(gameState); !isOurTurn { logg.LogTo("DEBUG", "It's not our turn, ignoring changes") return } bestMove := game.thinker.Think(gameState) game.PostChosenMove(bestMove) } }
func (game *Game) PostChosenMove(validMove ValidMove) { logg.LogTo("MAIN", "post chosen move: %v", validMove) preMoveSleepSeconds := game.calculatePreMoveSleepSeconds() logg.LogTo("MAIN", "sleep %v (s) before posting move", preMoveSleepSeconds) time.Sleep(time.Second * time.Duration(preMoveSleepSeconds)) if len(validMove.Locations) == 0 { logg.LogTo("MAIN", "invalid move, ignoring: %v", validMove) } u4, err := uuid.NewV4() if err != nil { logg.LogPanic("Error generating uuid", err) } votes := &OutgoingVotes{} votes.Id = fmt.Sprintf("vote:%s", u4) votes.Turn = game.gameState.Turn votes.PieceId = validMove.PieceId votes.TeamId = game.ourTeamId votes.GameId = game.gameState.Number // TODO: this is actually a bug, because if there is a // double jump it will only send the first jump move endLocation := validMove.Locations[0] locations := []int{validMove.StartLocation, endLocation} votes.Locations = locations newId, newRevision, err := game.db.Insert(votes) logg.LogTo("MAIN", "newId: %v, newRevision: %v err: %v", newId, newRevision, err) if err != nil { logg.LogError(err) return } }
func (w *PreprocessorRpcWorker) handle(deliveries <-chan amqp.Delivery, done chan error) { for d := range deliveries { logg.LogTo( "PREPROCESSOR_WORKER", "got %d byte delivery: [%v]. Routing key: %s Reply to: %v", len(d.Body), d.DeliveryTag, d.RoutingKey, d.ReplyTo, ) err := w.handleDelivery(d) if err != nil { msg := "Error handling delivery in preprocessor. Error: %v" logg.LogError(fmt.Errorf(msg, err)) } } logg.LogTo("PREPROCESSOR_WORKER", "handle: deliveries channel closed") done <- fmt.Errorf("handle: deliveries channel closed") }
func main() { noOpFlagFunc := ocrworker.NoOpFlagFunction() rabbitConfig := ocrworker.DefaultConfigFlagsOverride(noOpFlagFunc) // inifinite loop, since sometimes worker <-> rabbitmq connection // gets broken. see https://github.com/tleyden/open-ocr/issues/4 for { logg.LogTo("OCR_WORKER", "Creating new OCR Worker") ocrWorker, err := ocrworker.NewOcrRpcWorker(rabbitConfig) if err != nil { logg.LogPanic("Could not create rpc worker") } ocrWorker.Run() // this happens when connection is closed err = <-ocrWorker.Done logg.LogError(fmt.Errorf("OCR Worker failed with error: %v", err)) } }
func (c ChangesListener) handleTrainingJobChange(change couch.Change, doc ElasticThoughtDoc) { logg.LogTo("CHANGES", "got a training job doc: %+v", doc) // create a Training Job doc from the ElasticThoughtDoc trainingJob := &TrainingJob{} if err := c.Database.Retrieve(change.Id, &trainingJob); err != nil { errMsg := fmt.Errorf("Didn't retrieve: %v - %v", change.Id, err) logg.LogError(errMsg) return } // check the state, only schedule if state == pending if trainingJob.ProcessingState != Pending { logg.LogTo("CHANGES", "State != pending: %+v", trainingJob) return } job := NewJobDescriptor(doc.Id) c.JobScheduler.ScheduleJob(*job) }
func (c ChangesListener) handleClassifyJobChange(change couch.Change, doc ElasticThoughtDoc) { logg.LogTo("CHANGES", "got a classify job doc: %+v", doc) // create a Training Job doc from the ElasticThoughtDoc classifyJob := NewClassifyJob(c.Configuration) if err := classifyJob.Find(change.Id); err != nil { errMsg := fmt.Errorf("Could not find: %v - %v", change.Id, err) logg.LogError(errMsg) return } // check the state, only schedule if state == pending if classifyJob.ProcessingState != Pending { logg.LogTo("CHANGES", "State != pending: %+v", classifyJob) return } job := NewJobDescriptor(doc.Id) c.JobScheduler.ScheduleJob(*job) }
func (c ChangesListener) processChanges(changes couch.Changes) { for _, change := range changes.Results { if change.Deleted { logg.LogTo("CHANGES", "change was deleted, skipping") continue } // ignore certain docs, like "_user/*" if strings.HasPrefix(change.Id, "_user") { logg.LogTo("CHANGES", "Ignoring change: %v", change.Id) continue } doc := ElasticThoughtDoc{} err := c.Database.Retrieve(change.Id, &doc) if err != nil { errMsg := fmt.Errorf("Didn't retrieve: %v - %v", change.Id, err) logg.LogError(errMsg) continue } switch doc.Type { case DOC_TYPE_DATAFILE: c.handleDatafileChange(change, doc) case DOC_TYPE_DATASET: c.handleDatasetChange(change, doc) case DOC_TYPE_TRAINING_JOB: c.handleTrainingJobChange(change, doc) case DOC_TYPE_CLASSIFY_JOB: c.handleClassifyJobChange(change, doc) } } }
func (c ChangesListener) handleDatafileChange(change couch.Change, doc ElasticThoughtDoc) { logg.LogTo("CHANGES", "got a datafile doc: %+v", doc) // create a Datafile doc from the ElasticThoughtDoc datafile := NewDatafile(c.Configuration) if err := c.Database.Retrieve(change.Id, &datafile); err != nil { errMsg := fmt.Errorf("Didn't retrieve: %v - %v", change.Id, err) logg.LogError(errMsg) return } logg.LogTo("CHANGES", "convert to datafile: %+v", datafile) // check the state, only schedule if state == pending if datafile.ProcessingState != Pending { logg.LogTo("CHANGES", "Datafile state != pending: %+v", datafile) return } job := NewJobDescriptor(doc.Id) c.JobScheduler.ScheduleJob(*job) }
func main() { var http_port int flagFunc := func() { flag.IntVar( &http_port, "http_port", 8080, "The http port to listen on, eg, 8081", ) } rabbitConfig := ocrworker.DefaultConfigFlagsOverride(flagFunc) // any requests to root, just redirect to main page http.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) { text := `<h1>OpenOCR is running!<h1> Need <a href="http://www.openocr.net">docs</a>?` fmt.Fprintf(w, text) }) http.Handle("/ocr", ocrworker.NewOcrHttpHandler(rabbitConfig)) http.Handle("/ocr-file-upload", ocrworker.NewOcrHttpMultipartHandler(rabbitConfig)) // add a handler to serve up an image from the filesystem. // ignore this, was just something for testing .. http.HandleFunc("/img", func(w http.ResponseWriter, r *http.Request) { http.ServeFile(w, r, "../refactoring.png") }) listenAddr := fmt.Sprintf(":%d", http_port) logg.LogTo("OCR_HTTP", "Starting listener on %v", listenAddr) logg.LogError(http.ListenAndServe(listenAddr, nil)) }