// CloseArtifact closes an artifact for further writes and begins process of merging and uploading // the artifact. This operation is only valid for artifacts which are being uploaded in chunks. // In all other cases, an error is returned. func CloseArtifact(ctx context.Context, artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket, failIfAlreadyClosed bool) error { switch artifact.State { case model.UPLOADED: // Already closed. Nothing to do here. fallthrough case model.APPEND_COMPLETE: // This artifact will be eventually shipped to S3. No change required. return nil case model.APPENDING: artifact.State = model.APPEND_COMPLETE if err := db.UpdateArtifact(artifact); err != nil { return err } return MergeLogChunks(ctx, artifact, db, s3bucket) case model.WAITING_FOR_UPLOAD: // Streaming artifact was not uploaded artifact.State = model.CLOSED_WITHOUT_DATA if err := db.UpdateArtifact(artifact); err != nil { return err } return nil default: return fmt.Errorf("Unexpected artifact state: %s", artifact.State) } }
// Merges all of the individual chunks into a single object and stores it on s3. // The log chunks are stored in the database, while the object is uploaded to s3. func MergeLogChunks(ctx context.Context, artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket) error { switch artifact.State { case model.APPEND_COMPLETE: // TODO: Reimplement using GorpDatabase // If the file is empty, don't bother creating an object on S3. if artifact.Size == 0 { artifact.State = model.CLOSED_WITHOUT_DATA artifact.S3URL = "" // Conversion between *DatabaseEror and error is tricky. If we don't do this, a nil // *DatabaseError can become a non-nil error. return db.UpdateArtifact(artifact).GetError() } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } fileName := artifact.DefaultS3URL() r := newLogChunkReaderWithReadahead(artifact, db) if err := uploadArtifactToS3(s3bucket, fileName, artifact.Size, r); err != nil { return err } // XXX This is a long operation and should probably be asynchronous from the // actual HTTP request, and the client should poll to check when its uploaded. artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } // From this point onwards, we will not send back any errors back to the user. If we are // unable to delete logchunks, we log it to Sentry instead. if _, err := db.DeleteLogChunksForArtifact(artifact.Id); err != nil { sentry.ReportError(ctx, err) return nil } return nil case model.WAITING_FOR_UPLOAD: fallthrough case model.ERROR: fallthrough case model.APPENDING: fallthrough case model.UPLOADED: fallthrough case model.UPLOADING: return fmt.Errorf("Artifact can only be merged when in APPEND_COMPLETE state, but state is %s", artifact.State) default: return fmt.Errorf("Illegal artifact state! State code is %d", artifact.State) } }
// CreateArtifact creates a new artifact in a open bucket. // // If an artifact with the same name already exists in the same bucket, we attempt to rename the // artifact by adding a suffix. // If the request specifies a chunked artifact, the size field is ignored and always set to zero. // If the request is for a streamed artifact, size is mandatory. // A relative path field may be specified to preserve the original file name and path. If no path is // specified, the original artifact name is used by default. func CreateArtifact(req createArtifactReq, bucket *model.Bucket, db database.Database) (*model.Artifact, *HttpError) { if len(req.Name) == 0 { return nil, NewHttpError(http.StatusBadRequest, "Artifact name not provided") } if bucket.State != model.OPEN { return nil, NewHttpError(http.StatusBadRequest, "Bucket is already closed") } artifact := new(model.Artifact) artifact.Name = req.Name artifact.BucketId = bucket.Id artifact.DateCreated = time.Now() if req.DeadlineMins == 0 { artifact.DeadlineMins = DEFAULT_DEADLINE } else { artifact.DeadlineMins = req.DeadlineMins } if req.Chunked { artifact.State = model.APPENDING } else { if req.Size == 0 { return nil, NewHttpError(http.StatusBadRequest, "Cannot create a new upload artifact without size.") } else if req.Size > MaxArtifactSizeBytes { return nil, NewHttpError(http.StatusRequestEntityTooLarge, fmt.Sprintf("Entity '%s' (size %d) is too large (limit %d)", req.Name, req.Size, MaxArtifactSizeBytes)) } artifact.Size = req.Size artifact.State = model.WAITING_FOR_UPLOAD } if req.RelativePath == "" { // Use artifact name provided as default relativePath artifact.RelativePath = req.Name } else { artifact.RelativePath = req.RelativePath } // Attempt to insert artifact and retry with a different name if it fails. if err := db.InsertArtifact(artifact); err != nil { for attempt := 1; attempt <= MaxDuplicateFileNameResolutionAttempts; attempt++ { // Unable to create new artifact - if an artifact already exists, the above insert failed // because of a collision. if _, err := db.GetArtifactByName(bucket.Id, artifact.Name); err != nil { // This could be a transient DB error (down/unreachable), in which case we expect the client // to retry. There is no value in attempting alternate artifact names. // // We have no means of verifying there was a name collision - bail with an internal error. return nil, NewHttpError(http.StatusInternalServerError, err.Error()) } // File name collision - attempt to resolve artifact.Name = fmt.Sprintf(DuplicateArtifactNameFormat, req.Name, randString(5)) if err := db.InsertArtifact(artifact); err == nil { return artifact, nil } } return nil, NewHttpError(http.StatusInternalServerError, "Exceeded retry limit avoiding duplicates") } return artifact, nil }
// PutArtifact writes a streamed artifact to S3. The entire file contents are streamed directly // through to S3. If S3 is not accessible, we don't make any attempt to buffer on disk and fail // immediately. func PutArtifact(ctx context.Context, artifact *model.Artifact, db database.Database, bucket *s3.Bucket, req PutArtifactReq) error { if artifact.State != model.WAITING_FOR_UPLOAD { return fmt.Errorf("Expected artifact to be in state WAITING_FOR_UPLOAD: %s", artifact.State) } // New file being inserted into DB. // Mark status change to UPLOADING and start uploading to S3. // // First, verify that the size of the content being uploaded matches our expected size. var fileSize int64 var err error if req.ContentLength != "" { fileSize, err = strconv.ParseInt(req.ContentLength, 10, 64) // string, base, bits // This should never happen if a sane HTTP client is used. Nonetheless ... if err != nil { return fmt.Errorf("Invalid Content-Length specified") } } else { // This too should never happen if a sane HTTP client is used. Nonetheless ... return fmt.Errorf("Content-Length not specified") } if fileSize != artifact.Size { return fmt.Errorf("Content length %d does not match expected file size %d", fileSize, artifact.Size) } artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } cleanupAndReturn := func(err error) error { // TODO: Is there a better way to detect and handle errors? // Use a channel to signify upload completion. In defer, check if the channel is empty. If // yes, mark error. Else ignore. if err != nil { // TODO: s/ERROR/WAITING_FOR_UPLOAD/ ? sentry.ReportError(ctx, err) artifact.State = model.ERROR err2 := db.UpdateArtifact(artifact) if err2 != nil { log.Printf("Error while handling error: %s", err2.Error()) } return err } return nil } b := new(bytes.Buffer) // Note: Storing entire contents of uploaded artifact in memory can cause OOMS. if n, err := io.CopyN(b, req.Body, artifact.Size); err != nil { return cleanupAndReturn(fmt.Errorf("Error reading from request body (for artifact %s/%s, bytes (%d/%d) read): %s", artifact.BucketId, artifact.Name, n, artifact.Size, err)) } fileName := artifact.DefaultS3URL() if err := uploadArtifactToS3(bucket, fileName, artifact.Size, bytes.NewReader(b.Bytes())); err != nil { return cleanupAndReturn(err) } artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } return nil }
func PutArtifact(artifact *model.Artifact, db database.Database, bucket *s3.Bucket, req PutArtifactReq) error { if artifact.State != model.WAITING_FOR_UPLOAD { return fmt.Errorf("Expected artifact to be in state WAITING_FOR_UPLOAD: %s", artifact.State) } // New file being inserted into DB. // Mark status change to UPLOADING and start uploading to S3. // // First, verify that the size of the content being uploaded matches our expected size. var fileSize int64 var err error if req.ContentLength != "" { fileSize, err = strconv.ParseInt(req.ContentLength, 10, 64) // string, base, bits // This should never happen if a sane HTTP client is used. Nonetheless ... if err != nil { return fmt.Errorf("Invalid Content-Length specified") } } else { // This too should never happen if a sane HTTP client is used. Nonetheless ... return fmt.Errorf("Content-Length not specified") } if fileSize != artifact.Size { return fmt.Errorf("Content length %d does not match expected file size %d", fileSize, artifact.Size) } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } cleanupAndReturn := func(err error) error { // TODO: Is there a better way to detect and handle errors? // Use a channel to signify upload completion. In defer, check if the channel is empty. If // yes, mark error. Else ignore. if err != nil { // TODO: s/ERROR/WAITING_FOR_UPLOAD/ ? log.Printf("Error uploading to S3: %s\n", err) artifact.State = model.ERROR err2 := db.UpdateArtifact(artifact) if err2 != nil { log.Printf("Error while handling error: %s", err2.Error()) } return err } return nil } fileName := artifact.DefaultS3URL() if err := bucket.PutReader(fileName, req.Body, artifact.Size, "binary/octet-stream", s3.PublicRead); err != nil { return cleanupAndReturn(fmt.Errorf("Error uploading to S3: %s", err)) } artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } return nil }
// Merges all of the individual chunks into a single object and stores it on s3. // The log chunks are stored in the database, while the object is uploaded to s3. func MergeLogChunks(artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket) error { switch artifact.State { case model.APPEND_COMPLETE: // TODO: Reimplement using GorpDatabase // If the file is empty, don't bother creating an object on S3. if artifact.Size == 0 { artifact.State = model.CLOSED_WITHOUT_DATA artifact.S3URL = "" // Conversion between *DatabaseEror and error is tricky. If we don't do this, a nil // *DatabaseError can become a non-nil error. return db.UpdateArtifact(artifact).GetError() } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } logChunks, err := db.ListLogChunksInArtifact(artifact.Id) if err != nil { return err } r, w := io.Pipe() errChan := make(chan error) uploadCompleteChan := make(chan bool) fileName := artifact.DefaultS3URL() // Asynchronously upload the object to s3 while reading from the r, w // pipe. Thus anything written to "w" will be sent to S3. go func() { defer close(errChan) defer close(uploadCompleteChan) defer r.Close() if err := s3bucket.PutReader(fileName, r, artifact.Size, "binary/octet-stream", s3.PublicRead); err != nil { errChan <- fmt.Errorf("Error uploading to S3: %s", err) return } uploadCompleteChan <- true }() for _, logChunk := range logChunks { w.Write([]byte(logChunk.Content)) } w.Close() // Wait either for S3 upload to complete or for it to fail with an error. // XXX This is a long operation and should probably be asynchronous from the // actual HTTP request, and the client should poll to check when its uploaded. select { case _ = <-uploadCompleteChan: artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } // From this point onwards, we will not send back any errors back to the user. If we are // unable to delete logchunks, we log it to Sentry instead. if n, err := db.DeleteLogChunksForArtifact(artifact.Id); err != nil { // TODO: Send this error to Sentry log.Printf("Error deleting logchunks for artifact %d: %v\n", artifact.Id, err) return nil } else if n != int64(len(logChunks)) { // TODO: Send this error to Sentry log.Printf("Mismatch in number of logchunks while deleting logchunks for artifact %d:"+ "Expected: %d Actual: %d\n", artifact.Id, len(logChunks), n) } return nil case err := <-errChan: return err } case model.WAITING_FOR_UPLOAD: fallthrough case model.ERROR: fallthrough case model.APPENDING: fallthrough case model.UPLOADED: fallthrough case model.UPLOADING: return fmt.Errorf("Artifact can only be merged when in APPEND_COMPLETE state, but state is %s", artifact.State) default: return fmt.Errorf("Illegal artifact state! State code is %d", artifact.State) } }