// CloseArtifact closes an artifact for further writes and begins process of merging and uploading // the artifact. This operation is only valid for artifacts which are being uploaded in chunks. // In all other cases, an error is returned. func CloseArtifact(ctx context.Context, artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket, failIfAlreadyClosed bool) error { switch artifact.State { case model.UPLOADED: // Already closed. Nothing to do here. fallthrough case model.APPEND_COMPLETE: // This artifact will be eventually shipped to S3. No change required. return nil case model.APPENDING: artifact.State = model.APPEND_COMPLETE if err := db.UpdateArtifact(artifact); err != nil { return err } return MergeLogChunks(ctx, artifact, db, s3bucket) case model.WAITING_FOR_UPLOAD: // Streaming artifact was not uploaded artifact.State = model.CLOSED_WITHOUT_DATA if err := db.UpdateArtifact(artifact); err != nil { return err } return nil default: return fmt.Errorf("Unexpected artifact state: %s", artifact.State) } }
// Merges all of the individual chunks into a single object and stores it on s3. // The log chunks are stored in the database, while the object is uploaded to s3. func MergeLogChunks(ctx context.Context, artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket) error { switch artifact.State { case model.APPEND_COMPLETE: // TODO: Reimplement using GorpDatabase // If the file is empty, don't bother creating an object on S3. if artifact.Size == 0 { artifact.State = model.CLOSED_WITHOUT_DATA artifact.S3URL = "" // Conversion between *DatabaseEror and error is tricky. If we don't do this, a nil // *DatabaseError can become a non-nil error. return db.UpdateArtifact(artifact).GetError() } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } fileName := artifact.DefaultS3URL() r := newLogChunkReaderWithReadahead(artifact, db) if err := uploadArtifactToS3(s3bucket, fileName, artifact.Size, r); err != nil { return err } // XXX This is a long operation and should probably be asynchronous from the // actual HTTP request, and the client should poll to check when its uploaded. artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } // From this point onwards, we will not send back any errors back to the user. If we are // unable to delete logchunks, we log it to Sentry instead. if _, err := db.DeleteLogChunksForArtifact(artifact.Id); err != nil { sentry.ReportError(ctx, err) return nil } return nil case model.WAITING_FOR_UPLOAD: fallthrough case model.ERROR: fallthrough case model.APPENDING: fallthrough case model.UPLOADED: fallthrough case model.UPLOADING: return fmt.Errorf("Artifact can only be merged when in APPEND_COMPLETE state, but state is %s", artifact.State) default: return fmt.Errorf("Illegal artifact state! State code is %d", artifact.State) } }
func AppendLogChunk(db database.Database, artifact *model.Artifact, logChunk *model.LogChunk) *HttpError { if artifact.State != model.APPENDING { return NewHttpError(http.StatusBadRequest, fmt.Sprintf("Unexpected artifact state: %s", artifact.State)) } if logChunk.Size <= 0 { return NewHttpError(http.StatusBadRequest, "Invalid chunk size %d", logChunk.Size) } if logChunk.Content == "" { return NewHttpError(http.StatusBadRequest, "Empty content string") } if int64(len(logChunk.Content)) != logChunk.Size { return NewHttpError(http.StatusBadRequest, "Content length does not match indicated size") } // Find previous chunk in DB - append only if nextByteOffset, err := db.GetLastByteSeenForArtifact(artifact.Id); err != nil { return NewHttpError(http.StatusInternalServerError, "Error while checking for previous byte range: %s", err) } else if nextByteOffset != logChunk.ByteOffset { return NewHttpError(http.StatusBadRequest, "Overlapping ranges detected, expected offset: %d, actual offset: %d", nextByteOffset, logChunk.ByteOffset) } logChunk.ArtifactId = artifact.Id // Expand artifact size - redundant after above change. if artifact.Size < logChunk.ByteOffset+logChunk.Size { artifact.Size = logChunk.ByteOffset + logChunk.Size if err := db.UpdateArtifact(artifact); err != nil { return NewHttpError(http.StatusInternalServerError, err.Error()) } } if err := db.InsertLogChunk(logChunk); err != nil { return NewHttpError(http.StatusBadRequest, "Error updating log chunk: %s", err) } return nil }
// CreateArtifact creates a new artifact in a open bucket. // // If an artifact with the same name already exists in the same bucket, we attempt to rename the // artifact by adding a suffix. // If the request specifies a chunked artifact, the size field is ignored and always set to zero. // If the request is for a streamed artifact, size is mandatory. // A relative path field may be specified to preserve the original file name and path. If no path is // specified, the original artifact name is used by default. func CreateArtifact(req createArtifactReq, bucket *model.Bucket, db database.Database) (*model.Artifact, *HttpError) { if len(req.Name) == 0 { return nil, NewHttpError(http.StatusBadRequest, "Artifact name not provided") } if bucket.State != model.OPEN { return nil, NewHttpError(http.StatusBadRequest, "Bucket is already closed") } artifact := new(model.Artifact) artifact.Name = req.Name artifact.BucketId = bucket.Id artifact.DateCreated = time.Now() if req.DeadlineMins == 0 { artifact.DeadlineMins = DEFAULT_DEADLINE } else { artifact.DeadlineMins = req.DeadlineMins } if req.Chunked { artifact.State = model.APPENDING } else { if req.Size == 0 { return nil, NewHttpError(http.StatusBadRequest, "Cannot create a new upload artifact without size.") } else if req.Size > MaxArtifactSizeBytes { return nil, NewHttpError(http.StatusRequestEntityTooLarge, fmt.Sprintf("Entity '%s' (size %d) is too large (limit %d)", req.Name, req.Size, MaxArtifactSizeBytes)) } artifact.Size = req.Size artifact.State = model.WAITING_FOR_UPLOAD } if req.RelativePath == "" { // Use artifact name provided as default relativePath artifact.RelativePath = req.Name } else { artifact.RelativePath = req.RelativePath } // Attempt to insert artifact and retry with a different name if it fails. if err := db.InsertArtifact(artifact); err != nil { for attempt := 1; attempt <= MaxDuplicateFileNameResolutionAttempts; attempt++ { // Unable to create new artifact - if an artifact already exists, the above insert failed // because of a collision. if _, err := db.GetArtifactByName(bucket.Id, artifact.Name); err != nil { // This could be a transient DB error (down/unreachable), in which case we expect the client // to retry. There is no value in attempting alternate artifact names. // // We have no means of verifying there was a name collision - bail with an internal error. return nil, NewHttpError(http.StatusInternalServerError, err.Error()) } // File name collision - attempt to resolve artifact.Name = fmt.Sprintf(DuplicateArtifactNameFormat, req.Name, randString(5)) if err := db.InsertArtifact(artifact); err == nil { return artifact, nil } } return nil, NewHttpError(http.StatusInternalServerError, "Exceeded retry limit avoiding duplicates") } return artifact, nil }
// PutArtifact writes a streamed artifact to S3. The entire file contents are streamed directly // through to S3. If S3 is not accessible, we don't make any attempt to buffer on disk and fail // immediately. func PutArtifact(ctx context.Context, artifact *model.Artifact, db database.Database, bucket *s3.Bucket, req PutArtifactReq) error { if artifact.State != model.WAITING_FOR_UPLOAD { return fmt.Errorf("Expected artifact to be in state WAITING_FOR_UPLOAD: %s", artifact.State) } // New file being inserted into DB. // Mark status change to UPLOADING and start uploading to S3. // // First, verify that the size of the content being uploaded matches our expected size. var fileSize int64 var err error if req.ContentLength != "" { fileSize, err = strconv.ParseInt(req.ContentLength, 10, 64) // string, base, bits // This should never happen if a sane HTTP client is used. Nonetheless ... if err != nil { return fmt.Errorf("Invalid Content-Length specified") } } else { // This too should never happen if a sane HTTP client is used. Nonetheless ... return fmt.Errorf("Content-Length not specified") } if fileSize != artifact.Size { return fmt.Errorf("Content length %d does not match expected file size %d", fileSize, artifact.Size) } artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } cleanupAndReturn := func(err error) error { // TODO: Is there a better way to detect and handle errors? // Use a channel to signify upload completion. In defer, check if the channel is empty. If // yes, mark error. Else ignore. if err != nil { // TODO: s/ERROR/WAITING_FOR_UPLOAD/ ? sentry.ReportError(ctx, err) artifact.State = model.ERROR err2 := db.UpdateArtifact(artifact) if err2 != nil { log.Printf("Error while handling error: %s", err2.Error()) } return err } return nil } b := new(bytes.Buffer) // Note: Storing entire contents of uploaded artifact in memory can cause OOMS. if n, err := io.CopyN(b, req.Body, artifact.Size); err != nil { return cleanupAndReturn(fmt.Errorf("Error reading from request body (for artifact %s/%s, bytes (%d/%d) read): %s", artifact.BucketId, artifact.Name, n, artifact.Size, err)) } fileName := artifact.DefaultS3URL() if err := uploadArtifactToS3(bucket, fileName, artifact.Size, bytes.NewReader(b.Bytes())); err != nil { return cleanupAndReturn(err) } artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } return nil }
// AppendLogChunk appends a logchunk to an artifact. // If the logchunk position does not match the current end of artifact, an error is returned. // An exception to this is made when the last seen logchunk is repeated, which is silently ignored // without an error. func AppendLogChunk(ctx context.Context, db database.Database, artifact *model.Artifact, logChunkReq *createLogChunkReq) *HttpError { if artifact.State != model.APPENDING { return NewHttpError(http.StatusBadRequest, fmt.Sprintf("Unexpected artifact state: %s", artifact.State)) } if logChunkReq.Size <= 0 { return NewHttpError(http.StatusBadRequest, "Invalid chunk size %d", logChunkReq.Size) } var contentBytes []byte if len(logChunkReq.Bytes) != 0 { // If request sent Bytes, use Bytes. if int64(len(logChunkReq.Bytes)) != logChunkReq.Size { return NewHttpError(http.StatusBadRequest, "Content length %d does not match indicated size %d", len(logChunkReq.Bytes), logChunkReq.Size) } contentBytes = logChunkReq.Bytes } else { // Otherwise, allow Content, for now. if len(logChunkReq.Content) == 0 { return NewHttpError(http.StatusBadRequest, "Empty content string") } if int64(len(logChunkReq.Content)) != logChunkReq.Size { return NewHttpError(http.StatusBadRequest, "Content length %d does not match indicated size %d", len(logChunkReq.Content), logChunkReq.Size) } contentBytes = []byte(logChunkReq.Content) } // Find previous chunk in DB - append only nextByteOffset := artifact.Size if nextByteOffset != logChunkReq.ByteOffset { // There is a possibility the previous logchunk is being retried - we need to handle cases where // a server/proxy time out caused the client not to get an ACK when it successfully uploaded the // previous logchunk, due to which it is retrying. // // This is a best-effort check - if we encounter DB errors or any mismatch in the chunk // contents, we ignore this test and claim that a range mismatch occured. if nextByteOffset != 0 && nextByteOffset == logChunkReq.ByteOffset+logChunkReq.Size { if prevLogChunk, err := db.GetLastLogChunkSeenForArtifact(artifact.Id); err == nil { if prevLogChunk != nil && prevLogChunk.ByteOffset == logChunkReq.ByteOffset && prevLogChunk.Size == logChunkReq.Size && bytes.Equal(prevLogChunk.ContentBytes, contentBytes) { sentry.ReportMessage(ctx, fmt.Sprintf("Received duplicate chunk for artifact %v of size %d at byte %d", artifact.Id, logChunkReq.Size, logChunkReq.ByteOffset)) return nil } } } return NewHttpError(http.StatusBadRequest, "Overlapping ranges detected, expected offset: %d, actual offset: %d", nextByteOffset, logChunkReq.ByteOffset) } // Expand artifact size - redundant after above change. if artifact.Size < logChunkReq.ByteOffset+logChunkReq.Size { artifact.Size = logChunkReq.ByteOffset + logChunkReq.Size if err := db.UpdateArtifact(artifact); err != nil { return NewHttpError(http.StatusInternalServerError, err.Error()) } } logChunk := &model.LogChunk{ ArtifactId: artifact.Id, ByteOffset: logChunkReq.ByteOffset, ContentBytes: contentBytes, Size: logChunkReq.Size, } if err := db.InsertLogChunk(logChunk); err != nil { return NewHttpError(http.StatusBadRequest, "Error updating log chunk: %s", err) } return nil }
func PutArtifact(artifact *model.Artifact, db database.Database, bucket *s3.Bucket, req PutArtifactReq) error { if artifact.State != model.WAITING_FOR_UPLOAD { return fmt.Errorf("Expected artifact to be in state WAITING_FOR_UPLOAD: %s", artifact.State) } // New file being inserted into DB. // Mark status change to UPLOADING and start uploading to S3. // // First, verify that the size of the content being uploaded matches our expected size. var fileSize int64 var err error if req.ContentLength != "" { fileSize, err = strconv.ParseInt(req.ContentLength, 10, 64) // string, base, bits // This should never happen if a sane HTTP client is used. Nonetheless ... if err != nil { return fmt.Errorf("Invalid Content-Length specified") } } else { // This too should never happen if a sane HTTP client is used. Nonetheless ... return fmt.Errorf("Content-Length not specified") } if fileSize != artifact.Size { return fmt.Errorf("Content length %d does not match expected file size %d", fileSize, artifact.Size) } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } cleanupAndReturn := func(err error) error { // TODO: Is there a better way to detect and handle errors? // Use a channel to signify upload completion. In defer, check if the channel is empty. If // yes, mark error. Else ignore. if err != nil { // TODO: s/ERROR/WAITING_FOR_UPLOAD/ ? log.Printf("Error uploading to S3: %s\n", err) artifact.State = model.ERROR err2 := db.UpdateArtifact(artifact) if err2 != nil { log.Printf("Error while handling error: %s", err2.Error()) } return err } return nil } fileName := artifact.DefaultS3URL() if err := bucket.PutReader(fileName, req.Body, artifact.Size, "binary/octet-stream", s3.PublicRead); err != nil { return cleanupAndReturn(fmt.Errorf("Error uploading to S3: %s", err)) } artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } return nil }
// Merges all of the individual chunks into a single object and stores it on s3. // The log chunks are stored in the database, while the object is uploaded to s3. func MergeLogChunks(artifact *model.Artifact, db database.Database, s3bucket *s3.Bucket) error { switch artifact.State { case model.APPEND_COMPLETE: // TODO: Reimplement using GorpDatabase // If the file is empty, don't bother creating an object on S3. if artifact.Size == 0 { artifact.State = model.CLOSED_WITHOUT_DATA artifact.S3URL = "" // Conversion between *DatabaseEror and error is tricky. If we don't do this, a nil // *DatabaseError can become a non-nil error. return db.UpdateArtifact(artifact).GetError() } // XXX Do we need to commit here or is this handled transparently? artifact.State = model.UPLOADING if err := db.UpdateArtifact(artifact); err != nil { return err } logChunks, err := db.ListLogChunksInArtifact(artifact.Id) if err != nil { return err } r, w := io.Pipe() errChan := make(chan error) uploadCompleteChan := make(chan bool) fileName := artifact.DefaultS3URL() // Asynchronously upload the object to s3 while reading from the r, w // pipe. Thus anything written to "w" will be sent to S3. go func() { defer close(errChan) defer close(uploadCompleteChan) defer r.Close() if err := s3bucket.PutReader(fileName, r, artifact.Size, "binary/octet-stream", s3.PublicRead); err != nil { errChan <- fmt.Errorf("Error uploading to S3: %s", err) return } uploadCompleteChan <- true }() for _, logChunk := range logChunks { w.Write([]byte(logChunk.Content)) } w.Close() // Wait either for S3 upload to complete or for it to fail with an error. // XXX This is a long operation and should probably be asynchronous from the // actual HTTP request, and the client should poll to check when its uploaded. select { case _ = <-uploadCompleteChan: artifact.State = model.UPLOADED artifact.S3URL = fileName if err := db.UpdateArtifact(artifact); err != nil { return err } // From this point onwards, we will not send back any errors back to the user. If we are // unable to delete logchunks, we log it to Sentry instead. if n, err := db.DeleteLogChunksForArtifact(artifact.Id); err != nil { // TODO: Send this error to Sentry log.Printf("Error deleting logchunks for artifact %d: %v\n", artifact.Id, err) return nil } else if n != int64(len(logChunks)) { // TODO: Send this error to Sentry log.Printf("Mismatch in number of logchunks while deleting logchunks for artifact %d:"+ "Expected: %d Actual: %d\n", artifact.Id, len(logChunks), n) } return nil case err := <-errChan: return err } case model.WAITING_FOR_UPLOAD: fallthrough case model.ERROR: fallthrough case model.APPENDING: fallthrough case model.UPLOADED: fallthrough case model.UPLOADING: return fmt.Errorf("Artifact can only be merged when in APPEND_COMPLETE state, but state is %s", artifact.State) default: return fmt.Errorf("Illegal artifact state! State code is %d", artifact.State) } }