func (d *driver) StartCommit(repo *pfs.Repo, commitID string, parentID string, branch string, started *google_protobuf.Timestamp, provenance []*pfs.Commit, shards map[uint64]bool) error { d.lock.Lock() defer d.lock.Unlock() // make sure that the parent commit exists if parentID != "" { _, err := d.inspectCommit(client.NewCommit(repo.Name, parentID), shards) if err != nil { return err } } for shard := range shards { if len(provenance) != 0 { diffInfo, ok := d.diffs.get(client.NewDiff(repo.Name, "", shard)) if !ok { return pfsserver.NewErrRepoNotFound(repo.Name) } provRepos := repoSetFromCommits(diffInfo.Provenance) for _, provCommit := range provenance { if !provRepos[provCommit.Repo.Name] { return fmt.Errorf("cannot use %s/%s as provenance, %s is not provenance of %s", provCommit.Repo.Name, provCommit.ID, provCommit.Repo.Name, repo.Name) } } } diffInfo := &pfs.DiffInfo{ Diff: client.NewDiff(repo.Name, commitID, shard), Started: started, Appends: make(map[string]*pfs.Append), Branch: branch, Provenance: provenance, } if branch != "" { parentCommit, err := d.branchParent(client.NewCommit(repo.Name, commitID), branch) if err != nil { return err } if parentCommit != nil && parentID != "" { return fmt.Errorf("branch %s already exists as %s, can't create with %s as parent", branch, parentCommit.ID, parentID) } diffInfo.ParentCommit = parentCommit } if diffInfo.ParentCommit == nil && parentID != "" { diffInfo.ParentCommit = client.NewCommit(repo.Name, parentID) } if err := d.insertDiffInfo(diffInfo); err != nil { return err } } d.commitConds[commitID] = sync.NewCond(&d.lock) return nil }
func (d *driver) AddShard(shard uint64) error { blockClient, err := d.getBlockClient() if err != nil { return err } listDiffClient, err := blockClient.ListDiff(context.Background(), &pfs.ListDiffRequest{Shard: shard}) if err != nil { return err } diffInfos := make(diffMap) dags := make(map[string]*dag.DAG) for { diffInfo, err := listDiffClient.Recv() if err != nil && err != io.EOF { return err } if err == io.EOF { break } if diffInfo.Diff == nil || diffInfo.Diff.Commit == nil || diffInfo.Diff.Commit.Repo == nil { return fmt.Errorf("broken diff info: %v; this is likely a bug", diffInfo) } repoName := diffInfo.Diff.Commit.Repo.Name if _, ok := diffInfos[repoName]; !ok { diffInfos[repoName] = make(map[uint64]map[string]*pfs.DiffInfo) dags[repoName] = dag.NewDAG(nil) } updateDAG(diffInfo, dags[repoName]) if err := diffInfos.insert(diffInfo); err != nil { return err } } for repoName, dag := range dags { if ghosts := dag.Ghosts(); len(ghosts) != 0 { return fmt.Errorf("error adding shard %d, repo %s has ghost commits: %+v", shard, repoName, ghosts) } } d.lock.Lock() defer d.lock.Unlock() for repoName, dag := range dags { for _, commitID := range dag.Sorted() { d.createRepoState(client.NewRepo(repoName)) if diffInfo, ok := diffInfos.get(client.NewDiff(repoName, commitID, shard)); ok { if err := d.insertDiffInfo(diffInfo); err != nil { return err } if diffInfo.Finished == nil { return fmt.Errorf("diff %s/%s/%d is not finished; this is likely a bug", repoName, commitID, shard) } } else { return fmt.Errorf("diff %s/%s/%d not found; this is likely a bug", repoName, commitID, shard) } } } return nil }
func (d *driver) CreateRepo(repo *pfs.Repo, created *google_protobuf.Timestamp, provenance []*pfs.Repo, shards map[uint64]bool) error { d.lock.Lock() defer d.lock.Unlock() if _, ok := d.diffs[repo.Name]; ok { return fmt.Errorf("repo %s exists", repo.Name) } if err := validateRepoName(repo.Name); err != nil { return err } for _, provRepo := range provenance { if _, err := d.inspectRepo(provRepo, shards); err != nil { return nil } } d.createRepoState(repo) blockClient, err := d.getBlockClient() if err != nil { return err } var wg sync.WaitGroup errCh := make(chan error, 1) for shard := range shards { wg.Add(1) diffInfo := &pfs.DiffInfo{ Diff: client.NewDiff(repo.Name, "", shard), Finished: created, } for _, provRepo := range provenance { diffInfo.Provenance = append(diffInfo.Provenance, client.NewCommit(provRepo.Name, "")) } if err := d.diffs.insert(diffInfo); err != nil { return err } go func() { defer wg.Done() if _, err := blockClient.CreateDiff(context.Background(), diffInfo); err != nil { select { case errCh <- err: default: } return } }() } wg.Wait() select { case err := <-errCh: return err default: } return nil }
func (d *driver) MakeDirectory(file *pfs.File, shard uint64) (retErr error) { defer func() { if retErr == nil { metrics.AddFiles(1) } }() d.lock.Lock() defer d.lock.Unlock() fileType, err := d.getFileType(file, shard) if err != nil { return err } if fileType == pfs.FileType_FILE_TYPE_REGULAR { return fmt.Errorf("%s already exists and is a file", file.Path) } else if fileType == pfs.FileType_FILE_TYPE_DIR { return nil } canonicalCommit, err := d.canonicalCommit(file.Commit) if err != nil { return err } diffInfo, ok := d.diffs.get(client.NewDiff(canonicalCommit.Repo.Name, canonicalCommit.ID, shard)) if !ok { return pfsserver.NewErrCommitNotFound(canonicalCommit.Repo.Name, canonicalCommit.ID) } if diffInfo.Finished != nil { return fmt.Errorf("commit %s/%s has already been finished", canonicalCommit.Repo.Name, canonicalCommit.ID) } d.addDirs(diffInfo, file, shard) _append, ok := diffInfo.Appends[path.Clean(file.Path)] if !ok { _append = newAppend(pfs.FileType_FILE_TYPE_DIR) } else { _append.FileType = pfs.FileType_FILE_TYPE_DIR } if diffInfo.ParentCommit != nil { _append.LastRef = d.lastRef( client.NewFile( diffInfo.ParentCommit.Repo.Name, diffInfo.ParentCommit.ID, file.Path, ), shard, ) } diffInfo.Appends[path.Clean(file.Path)] = _append // The fact that this is a directory is signified by setting Children // to non-nil _append.Children = make(map[string]bool) return nil }
// lastRef assumes the diffInfo file exists in finished func (d *driver) lastRef(file *pfs.File, shard uint64) *pfs.Commit { commit := file.Commit for commit != nil { diffInfo, _ := d.diffs.get(client.NewDiff(commit.Repo.Name, commit.ID, shard)) if _, ok := diffInfo.Appends[path.Clean(file.Path)]; ok { return commit } commit = diffInfo.ParentCommit } return nil }
func (d *driver) inspectCommit(commit *pfs.Commit, shards map[uint64]bool) (*pfs.CommitInfo, error) { var commitInfos []*pfs.CommitInfo canonicalCommit, err := d.canonicalCommit(commit) if err != nil { return nil, err } for shard := range shards { var diffInfo *pfs.DiffInfo var ok bool commitInfo := &pfs.CommitInfo{Commit: canonicalCommit} diff := client.NewDiff(canonicalCommit.Repo.Name, canonicalCommit.ID, shard) if diffInfo, ok = d.diffs.get(diff); !ok { return nil, pfsserver.NewErrCommitNotFound(canonicalCommit.Repo.Name, canonicalCommit.ID) } if diffInfo.Finished == nil { commitInfo.CommitType = pfs.CommitType_COMMIT_TYPE_WRITE } else { commitInfo.CommitType = pfs.CommitType_COMMIT_TYPE_READ } commitInfo.Branch = diffInfo.Branch commitInfo.ParentCommit = diffInfo.ParentCommit commitInfo.Started = diffInfo.Started commitInfo.Finished = diffInfo.Finished commitInfo.SizeBytes = diffInfo.SizeBytes commitInfo.Cancelled = diffInfo.Cancelled commitInfos = append(commitInfos, commitInfo) } commitInfo := pfsserver.ReduceCommitInfos(commitInfos) if len(commitInfo) < 1 { // we should have caught this above return nil, pfsserver.NewErrCommitNotFound(canonicalCommit.Repo.Name, canonicalCommit.ID) } if len(commitInfo) > 1 { return nil, fmt.Errorf("multiple commitInfos, (this is likely a bug)") } result := commitInfo[0] provenance, err := d.commitProvenance(canonicalCommit, shards) if err != nil { return nil, err } result.Provenance = provenance return commitInfo[0], nil }
func (d *driver) getFileType(file *pfs.File, shard uint64) (pfs.FileType, error) { commit, err := d.canonicalCommit(file.Commit) if err != nil { return pfs.FileType_FILE_TYPE_NONE, err } for commit != nil { diffInfo, ok := d.diffs.get(client.NewDiff(commit.Repo.Name, commit.ID, shard)) if !ok { return pfs.FileType_FILE_TYPE_NONE, pfsserver.NewErrCommitNotFound(commit.Repo.Name, commit.ID) } if _append, ok := diffInfo.Appends[path.Clean(file.Path)]; ok { if _append.FileType == pfs.FileType_FILE_TYPE_NONE { break } return _append.FileType, nil } commit = diffInfo.ParentCommit } return pfs.FileType_FILE_TYPE_NONE, nil }
func (d *driver) deleteFile(file *pfs.File, shard uint64, unsafe bool, handle string) error { d.lock.Lock() defer d.lock.Unlock() canonicalCommit, err := d.canonicalCommit(file.Commit) if err != nil { return err } diffInfo, ok := d.diffs.get(client.NewDiff(canonicalCommit.Repo.Name, canonicalCommit.ID, shard)) if !ok { // This is a weird case since the commit existed above, it means someone // deleted the commit while the above code was running return pfsserver.NewErrCommitNotFound(canonicalCommit.Repo.Name, canonicalCommit.ID) } if diffInfo.Finished != nil { return fmt.Errorf("commit %s/%s has already been finished", canonicalCommit.Repo.Name, canonicalCommit.ID) } cleanPath := path.Clean(file.Path) if _append, ok := diffInfo.Appends[cleanPath]; !ok { // we have no append for this file, we create on so that we can set the // Delete flag in it diffInfo.Appends[cleanPath] = newAppend(pfs.FileType_FILE_TYPE_NONE) } else if unsafe { // we have an append for this file and unsafe is true so we need to modify the append if handle == "" { diffInfo.Appends[cleanPath] = newAppend(pfs.FileType_FILE_TYPE_NONE) } else { delete(_append.Handles, handle) } } if !unsafe || handle == "" { diffInfo.Appends[cleanPath].Delete = true } else { diffInfo.Appends[cleanPath].HandleDeletes[handle] = true } d.deleteFromDir(diffInfo, file, shard) return nil }
// If recurse is set to true, and if the file being inspected is a directory, // its children will have the correct sizes. If recurse is false and the file // is a directory, its children will have size of 0. // If unsafe is set to true, you can inspect files in an open commit func (d *driver) inspectFile(file *pfs.File, filterShard *pfs.Shard, shard uint64, from *pfs.Commit, recurse bool, unsafe bool, handle string) (*pfs.FileInfo, []*pfs.BlockRef, error) { fileInfo := &pfs.FileInfo{File: file} var blockRefs []*pfs.BlockRef children := make(map[string]bool) deletedChildren := make(map[string]bool) commit, err := d.canonicalCommit(file.Commit) if err != nil { return nil, nil, err } for commit != nil && (from == nil || commit.ID != from.ID) { diffInfo, ok := d.diffs.get(client.NewDiff(commit.Repo.Name, commit.ID, shard)) if !ok { return nil, nil, pfsserver.NewErrCommitNotFound(commit.Repo.Name, commit.ID) } if !unsafe && diffInfo.Finished == nil { commit = diffInfo.ParentCommit continue } if _append, ok := diffInfo.Appends[path.Clean(file.Path)]; ok { if _append.FileType == pfs.FileType_FILE_TYPE_NONE && !_append.Delete && len(_append.HandleDeletes) == 0 { return nil, nil, fmt.Errorf("the append for %s has file type NONE, this is likely a bug", path.Clean(file.Path)) } if _append.FileType == pfs.FileType_FILE_TYPE_REGULAR { if fileInfo.FileType == pfs.FileType_FILE_TYPE_DIR { return nil, nil, fmt.Errorf("mixed dir and regular file %s/%s/%s, (this is likely a bug)", file.Commit.Repo.Name, file.Commit.ID, file.Path) } if fileInfo.FileType == pfs.FileType_FILE_TYPE_NONE { // the first time we find out it's a regular file we check // the file shard, dirs get returned regardless of sharding, // since they might have children from any shard if !pfsserver.FileInShard(filterShard, file) { return nil, nil, pfsserver.NewErrFileNotFound(file.Path, file.Commit.Repo.Name, file.Commit.ID) } } fileInfo.FileType = pfs.FileType_FILE_TYPE_REGULAR filtered := filterBlockRefs(filterShard, _append.BlockRefs) if handle == "" { for _, handleBlockRefs := range _append.Handles { filtered = append(filtered, filterBlockRefs(filterShard, handleBlockRefs.BlockRef)...) } } else { if handleBlockRefs, ok := _append.Handles[handle]; ok { filtered = append(filtered, filterBlockRefs(filterShard, handleBlockRefs.BlockRef)...) } } blockRefs = append(filtered, blockRefs...) for _, blockRef := range filtered { fileInfo.SizeBytes += (blockRef.Range.Upper - blockRef.Range.Lower) } } else if _append.FileType == pfs.FileType_FILE_TYPE_DIR { if fileInfo.FileType == pfs.FileType_FILE_TYPE_REGULAR { return nil, nil, fmt.Errorf("mixed dir and regular file %s/%s/%s, (this is likely a bug)", file.Commit.Repo.Name, file.Commit.ID, file.Path) } fileInfo.FileType = pfs.FileType_FILE_TYPE_DIR for child, add := range _append.Children { if !add { deletedChildren[child] = true continue } if !children[child] && !deletedChildren[child] { childFile := client.NewFile(commit.Repo.Name, commit.ID, child) if pfsserver.FileInShard(filterShard, childFile) { fileInfo.Children = append( fileInfo.Children, client.NewFile(commit.Repo.Name, commit.ID, child), ) if recurse { childFileInfo, _, err := d.inspectFile(&pfs.File{ Commit: file.Commit, Path: child, }, filterShard, shard, from, recurse, unsafe, handle) if err != nil { return nil, nil, err } fileInfo.SizeBytes += childFileInfo.SizeBytes } } } children[child] = true } } // If Delete is true, then everything before this commit is irrelevant if _append.Delete || (unsafe && handle != "" && _append.HandleDeletes[handle]) { break } if fileInfo.CommitModified == nil { fileInfo.CommitModified = commit fileInfo.Modified = diffInfo.Finished } commit = _append.LastRef continue } commit = diffInfo.ParentCommit } if fileInfo.FileType == pfs.FileType_FILE_TYPE_NONE { return nil, nil, pfsserver.NewErrFileNotFound(file.Path, file.Commit.Repo.Name, file.Commit.ID) } return fileInfo, blockRefs, nil }
func (d *driver) PutFile(file *pfs.File, handle string, delimiter pfs.Delimiter, shard uint64, reader io.Reader) (retErr error) { blockClient, err := d.getBlockClient() if err != nil { return err } _client := client.APIClient{BlockAPIClient: blockClient} blockRefs, err := _client.PutBlock(delimiter, reader) if err != nil { return err } defer func() { if retErr == nil { metrics.AddFiles(1) for _, blockRef := range blockRefs.BlockRef { metrics.AddBytes(int64(blockRef.Range.Upper - blockRef.Range.Lower)) } } }() d.lock.Lock() defer d.lock.Unlock() fileType, err := d.getFileType(file, shard) if err != nil { return err } if fileType == pfs.FileType_FILE_TYPE_DIR { return fmt.Errorf("%s is a directory", file.Path) } canonicalCommit, err := d.canonicalCommit(file.Commit) if err != nil { return err } diffInfo, ok := d.diffs.get(client.NewDiff(canonicalCommit.Repo.Name, canonicalCommit.ID, shard)) if !ok { // This is a weird case since the commit existed above, it means someone // deleted the commit while the above code was running return pfsserver.NewErrCommitNotFound(canonicalCommit.Repo.Name, canonicalCommit.ID) } if diffInfo.Finished != nil { return fmt.Errorf("commit %s/%s has already been finished", canonicalCommit.Repo.Name, canonicalCommit.ID) } d.addDirs(diffInfo, file, shard) _append, ok := diffInfo.Appends[path.Clean(file.Path)] if !ok { _append = newAppend(pfs.FileType_FILE_TYPE_REGULAR) } else { _append.FileType = pfs.FileType_FILE_TYPE_REGULAR } if diffInfo.ParentCommit != nil { _append.LastRef = d.lastRef( client.NewFile(diffInfo.ParentCommit.Repo.Name, diffInfo.ParentCommit.ID, file.Path), shard, ) } diffInfo.Appends[path.Clean(file.Path)] = _append if handle == "" { _append.BlockRefs = append(_append.BlockRefs, blockRefs.BlockRef...) } else { handleBlockRefs, ok := _append.Handles[handle] if !ok { handleBlockRefs = &pfs.BlockRefs{} _append.Handles[handle] = handleBlockRefs } handleBlockRefs.BlockRef = append(handleBlockRefs.BlockRef, blockRefs.BlockRef...) } for _, blockRef := range blockRefs.BlockRef { diffInfo.SizeBytes += blockRef.Range.Upper - blockRef.Range.Lower } return nil }
// FinishCommit blocks until its parent has been finished/cancelled func (d *driver) FinishCommit(commit *pfs.Commit, finished *google_protobuf.Timestamp, cancel bool, shards map[uint64]bool) error { canonicalCommit, err := d.canonicalCommit(commit) if err != nil { return err } // closure so we can defer Unlock var diffInfos []*pfs.DiffInfo if err := func() error { d.lock.Lock() defer d.lock.Unlock() for shard := range shards { diffInfo, ok := d.diffs.get(client.NewDiff(canonicalCommit.Repo.Name, canonicalCommit.ID, shard)) if !ok { return pfsserver.NewErrCommitNotFound(canonicalCommit.Repo.Name, canonicalCommit.ID) } if diffInfo.ParentCommit != nil { parentDiffInfo, ok := d.diffs.get(client.NewDiff(canonicalCommit.Repo.Name, diffInfo.ParentCommit.ID, shard)) if !ok { return pfsserver.NewErrParentCommitNotFound(canonicalCommit.Repo.Name, diffInfo.ParentCommit.ID) } // Wait for parent to finish for parentDiffInfo.Finished == nil { cond, ok := d.commitConds[diffInfo.ParentCommit.ID] if !ok { return fmt.Errorf("parent commit %s/%s was not finished but a corresponding conditional variable could not be found; this is likely a bug", canonicalCommit.Repo.Name, diffInfo.ParentCommit.ID) } cond.Wait() } diffInfo.Cancelled = parentDiffInfo.Cancelled } diffInfo.Finished = finished for _, _append := range diffInfo.Appends { coalesceHandles(_append) } diffInfo.Cancelled = diffInfo.Cancelled || cancel diffInfos = append(diffInfos, diffInfo) } return nil }(); err != nil { return err } blockClient, err := d.getBlockClient() if err != nil { return err } var wg sync.WaitGroup errCh := make(chan error, 1) for _, diffInfo := range diffInfos { diffInfo := diffInfo wg.Add(1) go func() { defer wg.Done() if _, err := blockClient.CreateDiff(context.Background(), diffInfo); err != nil { select { case errCh <- err: default: } return } }() } wg.Wait() select { case err := <-errCh: return err default: } d.lock.Lock() defer d.lock.Unlock() cond, ok := d.commitConds[canonicalCommit.ID] if !ok { return fmt.Errorf("could not found a conditional variable to signal commit completion; this is likely a bug") } cond.Broadcast() delete(d.commitConds, canonicalCommit.ID) return nil }