// Merges all of the individual chunks into a single object and stores it on s3. // The log chunks are stored in the database, while the object is uploaded to s3. func MergeLogChunks(ctx context.Context, artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket) error { switch artifact.State { case model.APPEND_COMPLETE: // TODO: Reimplement using GorpDatabase // If the file is empty, don't bother creating an object on S3. if artifact.Size == 0 { artifact.State = model.CLOSED_WITHOUT_DATA artifact.S3URL = "" // Conversion between *DatabaseEror and error is tricky. If we don't do this, a nil // *DatabaseError can become a non-nil error. return db.UpdateArtifact(artifact).GetError() } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } fileName := artifact.DefaultS3URL() r := newLogChunkReaderWithReadahead(artifact, db) if err := uploadArtifactToS3(s3bucket, fileName, artifact.Size, r); err != nil { return err } // XXX This is a long operation and should probably be asynchronous from the // actual HTTP request, and the client should poll to check when its uploaded. artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } // From this point onwards, we will not send back any errors back to the user. If we are // unable to delete logchunks, we log it to Sentry instead. if _, err := db.DeleteLogChunksForArtifact(artifact.Id); err != nil { sentry.ReportError(ctx, err) return nil } return nil case model.WAITING_FOR_UPLOAD: fallthrough case model.ERROR: fallthrough case model.APPENDING: fallthrough case model.UPLOADED: fallthrough case model.UPLOADING: return fmt.Errorf("Artifact can only be merged when in APPEND_COMPLETE state, but state is %s", artifact.State) default: return fmt.Errorf("Illegal artifact state! State code is %d", artifact.State) } }
// PutArtifact writes a streamed artifact to S3. The entire file contents are streamed directly // through to S3. If S3 is not accessible, we don't make any attempt to buffer on disk and fail // immediately. func PutArtifact(ctx context.Context, artifact *model.Artifact, db database.Database, bucket *s3.Bucket, req PutArtifactReq) error { if artifact.State != model.WAITING_FOR_UPLOAD { return fmt.Errorf("Expected artifact to be in state WAITING_FOR_UPLOAD: %s", artifact.State) } // New file being inserted into DB. // Mark status change to UPLOADING and start uploading to S3. // // First, verify that the size of the content being uploaded matches our expected size. var fileSize int64 var err error if req.ContentLength != "" { fileSize, err = strconv.ParseInt(req.ContentLength, 10, 64) // string, base, bits // This should never happen if a sane HTTP client is used. Nonetheless ... if err != nil { return fmt.Errorf("Invalid Content-Length specified") } } else { // This too should never happen if a sane HTTP client is used. Nonetheless ... return fmt.Errorf("Content-Length not specified") } if fileSize != artifact.Size { return fmt.Errorf("Content length %d does not match expected file size %d", fileSize, artifact.Size) } artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } cleanupAndReturn := func(err error) error { // TODO: Is there a better way to detect and handle errors? // Use a channel to signify upload completion. In defer, check if the channel is empty. If // yes, mark error. Else ignore. if err != nil { // TODO: s/ERROR/WAITING_FOR_UPLOAD/ ? sentry.ReportError(ctx, err) artifact.State = model.ERROR err2 := db.UpdateArtifact(artifact) if err2 != nil { log.Printf("Error while handling error: %s", err2.Error()) } return err } return nil } b := new(bytes.Buffer) // Note: Storing entire contents of uploaded artifact in memory can cause OOMS. if n, err := io.CopyN(b, req.Body, artifact.Size); err != nil { return cleanupAndReturn(fmt.Errorf("Error reading from request body (for artifact %s/%s, bytes (%d/%d) read): %s", artifact.BucketId, artifact.Name, n, artifact.Size, err)) } fileName := artifact.DefaultS3URL() if err := uploadArtifactToS3(bucket, fileName, artifact.Size, bytes.NewReader(b.Bytes())); err != nil { return cleanupAndReturn(err) } artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } return nil }
func PutArtifact(artifact *model.Artifact, db database.Database, bucket *s3.Bucket, req PutArtifactReq) error { if artifact.State != model.WAITING_FOR_UPLOAD { return fmt.Errorf("Expected artifact to be in state WAITING_FOR_UPLOAD: %s", artifact.State) } // New file being inserted into DB. // Mark status change to UPLOADING and start uploading to S3. // // First, verify that the size of the content being uploaded matches our expected size. var fileSize int64 var err error if req.ContentLength != "" { fileSize, err = strconv.ParseInt(req.ContentLength, 10, 64) // string, base, bits // This should never happen if a sane HTTP client is used. Nonetheless ... if err != nil { return fmt.Errorf("Invalid Content-Length specified") } } else { // This too should never happen if a sane HTTP client is used. Nonetheless ... return fmt.Errorf("Content-Length not specified") } if fileSize != artifact.Size { return fmt.Errorf("Content length %d does not match expected file size %d", fileSize, artifact.Size) } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } cleanupAndReturn := func(err error) error { // TODO: Is there a better way to detect and handle errors? // Use a channel to signify upload completion. In defer, check if the channel is empty. If // yes, mark error. Else ignore. if err != nil { // TODO: s/ERROR/WAITING_FOR_UPLOAD/ ? log.Printf("Error uploading to S3: %s\n", err) artifact.State = model.ERROR err2 := db.UpdateArtifact(artifact) if err2 != nil { log.Printf("Error while handling error: %s", err2.Error()) } return err } return nil } fileName := artifact.DefaultS3URL() if err := bucket.PutReader(fileName, req.Body, artifact.Size, "binary/octet-stream", s3.PublicRead); err != nil { return cleanupAndReturn(fmt.Errorf("Error uploading to S3: %s", err)) } artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } return nil }
// Merges all of the individual chunks into a single object and stores it on s3. // The log chunks are stored in the database, while the object is uploaded to s3. func MergeLogChunks(artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket) error { switch artifact.State { case model.APPEND_COMPLETE: // TODO: Reimplement using GorpDatabase // If the file is empty, don't bother creating an object on S3. if artifact.Size == 0 { artifact.State = model.CLOSED_WITHOUT_DATA artifact.S3URL = "" // Conversion between *DatabaseEror and error is tricky. If we don't do this, a nil // *DatabaseError can become a non-nil error. return db.UpdateArtifact(artifact).GetError() } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } logChunks, err := db.ListLogChunksInArtifact(artifact.Id) if err != nil { return err } r, w := io.Pipe() errChan := make(chan error) uploadCompleteChan := make(chan bool) fileName := artifact.DefaultS3URL() // Asynchronously upload the object to s3 while reading from the r, w // pipe. Thus anything written to "w" will be sent to S3. go func() { defer close(errChan) defer close(uploadCompleteChan) defer r.Close() if err := s3bucket.PutReader(fileName, r, artifact.Size, "binary/octet-stream", s3.PublicRead); err != nil { errChan <- fmt.Errorf("Error uploading to S3: %s", err) return } uploadCompleteChan <- true }() for _, logChunk := range logChunks { w.Write([]byte(logChunk.Content)) } w.Close() // Wait either for S3 upload to complete or for it to fail with an error. // XXX This is a long operation and should probably be asynchronous from the // actual HTTP request, and the client should poll to check when its uploaded. select { case _ = <-uploadCompleteChan: artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } // From this point onwards, we will not send back any errors back to the user. If we are // unable to delete logchunks, we log it to Sentry instead. if n, err := db.DeleteLogChunksForArtifact(artifact.Id); err != nil { // TODO: Send this error to Sentry log.Printf("Error deleting logchunks for artifact %d: %v\n", artifact.Id, err) return nil } else if n != int64(len(logChunks)) { // TODO: Send this error to Sentry log.Printf("Mismatch in number of logchunks while deleting logchunks for artifact %d:"+ "Expected: %d Actual: %d\n", artifact.Id, len(logChunks), n) } return nil case err := <-errChan: return err } case model.WAITING_FOR_UPLOAD: fallthrough case model.ERROR: fallthrough case model.APPENDING: fallthrough case model.UPLOADED: fallthrough case model.UPLOADING: return fmt.Errorf("Artifact can only be merged when in APPEND_COMPLETE state, but state is %s", artifact.State) default: return fmt.Errorf("Illegal artifact state! State code is %d", artifact.State) } }