// Merges all of the individual chunks into a single object and stores it on s3. // The log chunks are stored in the database, while the object is uploaded to s3. func MergeLogChunks(ctx context.Context, artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket) error { switch artifact.State { case model.APPEND_COMPLETE: // TODO: Reimplement using GorpDatabase // If the file is empty, don't bother creating an object on S3. if artifact.Size == 0 { artifact.State = model.CLOSED_WITHOUT_DATA artifact.S3URL = "" // Conversion between *DatabaseEror and error is tricky. If we don't do this, a nil // *DatabaseError can become a non-nil error. return db.UpdateArtifact(artifact).GetError() } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } fileName := artifact.DefaultS3URL() r := newLogChunkReaderWithReadahead(artifact, db) if err := uploadArtifactToS3(s3bucket, fileName, artifact.Size, r); err != nil { return err } // XXX This is a long operation and should probably be asynchronous from the // actual HTTP request, and the client should poll to check when its uploaded. artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } // From this point onwards, we will not send back any errors back to the user. If we are // unable to delete logchunks, we log it to Sentry instead. if _, err := db.DeleteLogChunksForArtifact(artifact.Id); err != nil { sentry.ReportError(ctx, err) return nil } return nil case model.WAITING_FOR_UPLOAD: fallthrough case model.ERROR: fallthrough case model.APPENDING: fallthrough case model.UPLOADED: fallthrough case model.UPLOADING: return fmt.Errorf("Artifact can only be merged when in APPEND_COMPLETE state, but state is %s", artifact.State) default: return fmt.Errorf("Illegal artifact state! State code is %d", artifact.State) } }
func GetArtifactContent(ctx context.Context, r render.Render, req *http.Request, res http.ResponseWriter, db database.Database, s3bucket *s3.Bucket, artifact *model.Artifact) { if artifact == nil { LogAndRespondWithErrorf(ctx, r, http.StatusBadRequest, "No artifact specified") return } switch artifact.State { case model.UPLOADED: // Fetch from S3 url := s3bucket.SignedURL(artifact.S3URL, time.Now().Add(30*time.Minute)) rq, err := http.NewRequest("GET", url, nil) if byteRanges := req.Header.Get("Range"); byteRanges != "" { // If request contains Range: headers, pass them right through to S3. // TODO(anupc): Validation? We're sending user input through to the data store. rq.Header.Add("Range", byteRanges) } resp, err := http.DefaultClient.Do(rq) if err != nil { LogAndRespondWithError(ctx, r, http.StatusInternalServerError, err) return } if resp.StatusCode != http.StatusPartialContent && resp.StatusCode != http.StatusOK { LogAndRespondWithErrorf(ctx, r, http.StatusInternalServerError, fmt.Sprintf("Bad status code %d recieved from S3", resp.StatusCode)) return } contentdisposition.SetFilename(res, filepath.Base(artifact.RelativePath)) res.Header().Add("Content-Length", strconv.Itoa(int(artifact.Size))) if n, err := io.CopyN(res, resp.Body, artifact.Size); err != nil { sentry.ReportError(ctx, fmt.Errorf("Error transferring artifact (for artifact %s/%s, bytes (%d/%d) read): %s", artifact.BucketId, artifact.Name, n, artifact.Size, err)) return } return case model.UPLOADING: // Not done uploading to S3 yet. Error. LogAndRespondWithErrorf(ctx, r, http.StatusNotFound, "Waiting for content to complete uploading") return case model.APPENDING: fallthrough case model.APPEND_COMPLETE: // Pick from log chunks contentdisposition.SetFilename(res, filepath.Base(artifact.RelativePath)) // All written bytes are immutable. So, unless size changes, all previously read contents can be cached. res.Header().Add("ETag", strconv.Itoa(int(artifact.Size))) http.ServeContent(res, req, filepath.Base(artifact.RelativePath), time.Time{}, newLogChunkReaderWithReadahead(artifact, db)) return case model.WAITING_FOR_UPLOAD: // Not started yet. Error LogAndRespondWithErrorf(ctx, r, http.StatusNotFound, "Waiting for content to get uploaded") return } }
// LogAndRespondWithError posts a JSON-serialized error and statuscode on the HTTP response object // (using Martini render). // Log the error message to Sentry. func LogAndRespondWithError(ctx context.Context, render render.Render, code int, err error) { sentry.ReportError(ctx, err) render.JSON(code, map[string]string{"error": err.Error()}) }
// LogAndRespondWithErrorf posts a JSON-serialized error message and statuscode on the HTTP response // object (using Martini render). // Log the error message to Sentry. func LogAndRespondWithErrorf(ctx context.Context, render render.Render, code int, errStr string, params ...interface{}) { msg := fmt.Sprintf(errStr, params...) sentry.ReportError(ctx, errors.New(msg)) render.JSON(code, map[string]string{"error": msg}) }
// PutArtifact writes a streamed artifact to S3. The entire file contents are streamed directly // through to S3. If S3 is not accessible, we don't make any attempt to buffer on disk and fail // immediately. func PutArtifact(ctx context.Context, artifact *model.Artifact, db database.Database, bucket *s3.Bucket, req PutArtifactReq) error { if artifact.State != model.WAITING_FOR_UPLOAD { return fmt.Errorf("Expected artifact to be in state WAITING_FOR_UPLOAD: %s", artifact.State) } // New file being inserted into DB. // Mark status change to UPLOADING and start uploading to S3. // // First, verify that the size of the content being uploaded matches our expected size. var fileSize int64 var err error if req.ContentLength != "" { fileSize, err = strconv.ParseInt(req.ContentLength, 10, 64) // string, base, bits // This should never happen if a sane HTTP client is used. Nonetheless ... if err != nil { return fmt.Errorf("Invalid Content-Length specified") } } else { // This too should never happen if a sane HTTP client is used. Nonetheless ... return fmt.Errorf("Content-Length not specified") } if fileSize != artifact.Size { return fmt.Errorf("Content length %d does not match expected file size %d", fileSize, artifact.Size) } artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } cleanupAndReturn := func(err error) error { // TODO: Is there a better way to detect and handle errors? // Use a channel to signify upload completion. In defer, check if the channel is empty. If // yes, mark error. Else ignore. if err != nil { // TODO: s/ERROR/WAITING_FOR_UPLOAD/ ? sentry.ReportError(ctx, err) artifact.State = model.ERROR err2 := db.UpdateArtifact(artifact) if err2 != nil { log.Printf("Error while handling error: %s", err2.Error()) } return err } return nil } b := new(bytes.Buffer) // Note: Storing entire contents of uploaded artifact in memory can cause OOMS. if n, err := io.CopyN(b, req.Body, artifact.Size); err != nil { return cleanupAndReturn(fmt.Errorf("Error reading from request body (for artifact %s/%s, bytes (%d/%d) read): %s", artifact.BucketId, artifact.Name, n, artifact.Size, err)) } fileName := artifact.DefaultS3URL() if err := uploadArtifactToS3(bucket, fileName, artifact.Size, bytes.NewReader(b.Bytes())); err != nil { return cleanupAndReturn(err) } artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } return nil }