func FatalWithMessage(arvLogger *logger.Logger, message string) { if arvLogger != nil { arvLogger.FinalUpdate(func(p map[string]interface{}, e map[string]interface{}) { p["FATAL"] = message runInfo := logger.GetOrCreateMap(p, "run_info") runInfo["finished_at"] = time.Now() }) } log.Fatalf(message) }
func GetServerStatus(arvLogger *logger.Logger, keepServer ServerAddress, client http.Client) { url := fmt.Sprintf("http://%s:%d/status.json", keepServer.Host, keepServer.Port) if arvLogger != nil { now := time.Now() arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { keepInfo := logger.GetOrCreateMap(p, "keep_info") serverInfo := make(map[string]interface{}) serverInfo["status_request_sent_at"] = now serverInfo["host"] = keepServer.Host serverInfo["port"] = keepServer.Port keepInfo[keepServer.Uuid] = serverInfo }) } resp, err := client.Get(url) if err != nil { loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error getting keep status from %s: %v", url, err)) } else if resp.StatusCode != 200 { loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Received error code %d in response to request "+ "for %s status: %s", resp.StatusCode, url, resp.Status)) } var keepStatus map[string]interface{} decoder := json.NewDecoder(resp.Body) decoder.UseNumber() err = decoder.Decode(&keepStatus) if err != nil { loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error decoding keep status from %s: %v", url, err)) } if arvLogger != nil { now := time.Now() arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { keepInfo := logger.GetOrCreateMap(p, "keep_info") serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{}) serverInfo["status_response_processed_at"] = now serverInfo["status"] = keepStatus }) } }
// Summarize results from keep server func (readServers *ReadServers) Summarize(arvLogger *logger.Logger) { readServers.BlockReplicationCounts = make(map[int]int) for _, infos := range readServers.BlockToServers { replication := len(infos) readServers.BlockReplicationCounts[replication]++ } if arvLogger != nil { arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { keepInfo := logger.GetOrCreateMap(p, "keep_info") keepInfo["distinct_blocks_stored"] = len(readServers.BlockToServers) }) } }
// Useful to call at the begining of execution to log info about the // current run. func LogRunInfo(arvLogger *logger.Logger) { if arvLogger != nil { now := time.Now() arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { runInfo := logger.GetOrCreateMap(p, "run_info") runInfo["started_at"] = now runInfo["args"] = os.Args hostname, err := os.Hostname() if err != nil { runInfo["hostname_error"] = err.Error() } else { runInfo["hostname"] = hostname } runInfo["pid"] = os.Getpid() }) } }
// Summarize the collections read func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) { readCollections.OwnerToCollectionSize = make(map[string]int) readCollections.BlockToDesiredReplication = make(map[blockdigest.DigestWithSize]int) numCollections := len(readCollections.UUIDToCollection) readCollections.CollectionUUIDToIndex = make(map[string]int, numCollections) readCollections.CollectionIndexToUUID = make([]string, 0, numCollections) readCollections.BlockToCollectionIndices = make(map[blockdigest.DigestWithSize][]int) for _, coll := range readCollections.UUIDToCollection { collectionIndex := len(readCollections.CollectionIndexToUUID) readCollections.CollectionIndexToUUID = append(readCollections.CollectionIndexToUUID, coll.UUID) readCollections.CollectionUUIDToIndex[coll.UUID] = collectionIndex readCollections.OwnerToCollectionSize[coll.OwnerUUID] = readCollections.OwnerToCollectionSize[coll.OwnerUUID] + coll.TotalSize for block, size := range coll.BlockDigestToSize { locator := blockdigest.DigestWithSize{Digest: block, Size: uint32(size)} readCollections.BlockToCollectionIndices[locator] = append(readCollections.BlockToCollectionIndices[locator], collectionIndex) storedReplication := readCollections.BlockToDesiredReplication[locator] if coll.ReplicationLevel > storedReplication { readCollections.BlockToDesiredReplication[locator] = coll.ReplicationLevel } } } if arvLogger != nil { arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { collectionInfo := logger.GetOrCreateMap(p, "collection_info") // Since maps are shallow copied, we run a risk of concurrent // updates here. By copying results.OwnerToCollectionSize into // the log, we're assuming that it won't be updated. collectionInfo["owner_to_collection_size"] = readCollections.OwnerToCollectionSize collectionInfo["distinct_blocks_named"] = len(readCollections.BlockToDesiredReplication) }) } return }
// WritePullLists writes each pull list to a file. // The filename is based on the hostname. // // This is just a hack for prototyping, it is not expected to be used // in production. func WritePullLists(arvLogger *logger.Logger, pullLists map[string]PullList, dryRun bool) error { r := strings.NewReplacer(":", ".") for host, list := range pullLists { if arvLogger != nil { // We need a local variable because Update doesn't call our mutator func until later, // when our list variable might have been reused by the next loop iteration. host := host listLen := len(list) arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { pullListInfo := logger.GetOrCreateMap(p, "pull_list_len") pullListInfo[host] = listLen }) } if dryRun { log.Print("dry run, not sending pull list to service %s with %d blocks", host, len(list)) continue } filename := fmt.Sprintf("pull_list.%s", r.Replace(RemoveProtocolPrefix(host))) pullListFile, err := os.Create(filename) if err != nil { return err } defer pullListFile.Close() enc := json.NewEncoder(pullListFile) err = enc.Encode(list) if err != nil { return err } log.Printf("Wrote pull list to %s.", filename) } return nil }
// CreateIndexRequest to the keep server func CreateIndexRequest(arvLogger *logger.Logger, keepServer ServerAddress, arv arvadosclient.ArvadosClient) (req *http.Request, err error) { url := fmt.Sprintf("http://%s:%d/index", keepServer.Host, keepServer.Port) log.Println("About to fetch keep server contents from " + url) if arvLogger != nil { now := time.Now() arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { keepInfo := logger.GetOrCreateMap(p, "keep_info") serverInfo := keepInfo[keepServer.UUID].(map[string]interface{}) serverInfo["index_request_sent_at"] = now }) } req, err = http.NewRequest("GET", url, nil) if err != nil { return req, fmt.Errorf("Error building http request for %s: %v", url, err) } req.Header.Add("Authorization", "OAuth2 "+arv.ApiToken) return req, err }
func CreateIndexRequest(arvLogger *logger.Logger, keepServer ServerAddress) (req *http.Request) { url := fmt.Sprintf("http://%s:%d/index", keepServer.Host, keepServer.Port) log.Println("About to fetch keep server contents from " + url) if arvLogger != nil { now := time.Now() arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { keepInfo := logger.GetOrCreateMap(p, "keep_info") serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{}) serverInfo["index_request_sent_at"] = now }) } req, err := http.NewRequest("GET", url, nil) if err != nil { loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error building http request for %s: %v", url, err)) } req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", GetDataManagerToken(arvLogger))) return }
func ReadServerResponse(arvLogger *logger.Logger, keepServer ServerAddress, resp *http.Response) (response ServerResponse) { if resp.StatusCode != 200 { loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Received error code %d in response to request "+ "for %s index: %s", resp.StatusCode, keepServer.String(), resp.Status)) } if arvLogger != nil { now := time.Now() arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { keepInfo := logger.GetOrCreateMap(p, "keep_info") serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{}) serverInfo["index_response_received_at"] = now }) } response.Address = keepServer response.Contents.BlockDigestToInfo = make(map[blockdigest.DigestWithSize]BlockInfo) reader := bufio.NewReader(resp.Body) numLines, numDuplicates, numSizeDisagreements := 0, 0, 0 for { numLines++ line, err := reader.ReadString('\n') if err == io.EOF { loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Index from %s truncated at line %d", keepServer.String(), numLines)) } else if err != nil { loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error reading index response from %s at line %d: %v", keepServer.String(), numLines, err)) } if line == "\n" { if _, err := reader.Peek(1); err == nil { extra, _ := reader.ReadString('\n') loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Index from %s had trailing data at line %d after EOF marker: %s", keepServer.String(), numLines+1, extra)) } else if err != io.EOF { loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Index from %s had read error after EOF marker at line %d: %v", keepServer.String(), numLines, err)) } numLines-- break } blockInfo, err := parseBlockInfoFromIndexLine(line) if err != nil { loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error parsing BlockInfo from index line "+ "received from %s: %v", keepServer.String(), err)) } if storedBlock, ok := response.Contents.BlockDigestToInfo[blockInfo.Digest]; ok { // This server returned multiple lines containing the same block digest. numDuplicates += 1 // Keep the block that's newer. if storedBlock.Mtime < blockInfo.Mtime { response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo } } else { response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo } } log.Printf("%s index contained %d lines with %d duplicates with "+ "%d size disagreements", keepServer.String(), numLines, numDuplicates, numSizeDisagreements) if arvLogger != nil { now := time.Now() arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { keepInfo := logger.GetOrCreateMap(p, "keep_info") serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{}) serverInfo["processing_finished_at"] = now serverInfo["lines_received"] = numLines serverInfo["duplicates_seen"] = numDuplicates serverInfo["size_disagreements_seen"] = numSizeDisagreements }) } resp.Body.Close() return }
func singlerun() error { arv, err := arvadosclient.MakeArvadosClient() if err != nil { log.Fatalf("Error setting up arvados client %s", err.Error()) } if is_admin, err := util.UserIsAdmin(arv); err != nil { log.Fatalf("Error querying current arvados user %s", err.Error()) } else if !is_admin { log.Fatalf("Current user is not an admin. Datamanager can only be run by admins.") } var arvLogger *logger.Logger if logEventTypePrefix != "" { arvLogger = logger.NewLogger(logger.LoggerParams{ Client: arv, EventTypePrefix: logEventTypePrefix, WriteInterval: time.Second * time.Duration(logFrequencySeconds)}) } loggerutil.LogRunInfo(arvLogger) if arvLogger != nil { arvLogger.AddWriteHook(loggerutil.LogMemoryAlloc) } var ( dataFetcher summary.DataFetcher readCollections collection.ReadCollections keepServerInfo keep.ReadServers ) if summary.ShouldReadData() { dataFetcher = summary.ReadData } else { dataFetcher = BuildDataFetcher(arv) } dataFetcher(arvLogger, &readCollections, &keepServerInfo) summary.MaybeWriteData(arvLogger, readCollections, keepServerInfo) buckets := summary.BucketReplication(readCollections, keepServerInfo) bucketCounts := buckets.Counts() replicationSummary := buckets.SummarizeBuckets(readCollections) replicationCounts := replicationSummary.ComputeCounts() log.Printf("Blocks In Collections: %d, "+ "\nBlocks In Keep: %d.", len(readCollections.BlockToDesiredReplication), len(keepServerInfo.BlockToServers)) log.Println(replicationCounts.PrettyPrint()) log.Printf("Blocks Histogram:") for _, rlbss := range bucketCounts { log.Printf("%+v: %10d", rlbss.Levels, rlbss.Count) } kc, err := keepclient.MakeKeepClient(&arv) if err != nil { loggerutil.FatalWithMessage(arvLogger, fmt.Sprintf("Error setting up keep client %s", err.Error())) } // Log that we're finished. We force the recording, since go will // not wait for the write timer before exiting. if arvLogger != nil { defer arvLogger.FinalUpdate(func(p map[string]interface{}, e map[string]interface{}) { summaryInfo := logger.GetOrCreateMap(p, "summary_info") summaryInfo["block_replication_counts"] = bucketCounts summaryInfo["replication_summary"] = replicationCounts p["summary_info"] = summaryInfo p["run_info"].(map[string]interface{})["finished_at"] = time.Now() }) } pullServers := summary.ComputePullServers(kc, &keepServerInfo, readCollections.BlockToDesiredReplication, replicationSummary.UnderReplicatedBlocks) pullLists := summary.BuildPullLists(pullServers) trashLists, trashErr := summary.BuildTrashLists(kc, &keepServerInfo, replicationSummary.KeepBlocksNotInCollections) summary.WritePullLists(arvLogger, pullLists) if trashErr != nil { return err } else { keep.SendTrashLists(keep.GetDataManagerToken(arvLogger), kc, trashLists) } return nil }
// SendTrashLists to trash queue func SendTrashLists(arvLogger *logger.Logger, kc *keepclient.KeepClient, spl map[string]TrashList, dryRun bool) (errs []error) { count := 0 barrier := make(chan error) client := kc.Client for url, v := range spl { if arvLogger != nil { // We need a local variable because Update doesn't call our mutator func until later, // when our list variable might have been reused by the next loop iteration. url := url trashLen := len(v) arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) { trashListInfo := logger.GetOrCreateMap(p, "trash_list_len") trashListInfo[url] = trashLen }) } if dryRun { log.Printf("dry run, not sending trash list to service %s with %d blocks", url, len(v)) continue } count++ log.Printf("Sending trash list to %v", url) go (func(url string, v TrashList) { pipeReader, pipeWriter := io.Pipe() go (func() { enc := json.NewEncoder(pipeWriter) enc.Encode(v) pipeWriter.Close() })() req, err := http.NewRequest("PUT", fmt.Sprintf("%s/trash", url), pipeReader) if err != nil { log.Printf("Error creating trash list request for %v error: %v", url, err.Error()) barrier <- err return } req.Header.Add("Authorization", "OAuth2 "+kc.Arvados.ApiToken) // Make the request var resp *http.Response if resp, err = client.Do(req); err != nil { log.Printf("Error sending trash list to %v error: %v", url, err.Error()) barrier <- err return } log.Printf("Sent trash list to %v: response was HTTP %v", url, resp.Status) io.Copy(ioutil.Discard, resp.Body) resp.Body.Close() if resp.StatusCode != 200 { barrier <- errors.New(fmt.Sprintf("Got HTTP code %v", resp.StatusCode)) } else { barrier <- nil } })(url, v) } for i := 0; i < count; i++ { b := <-barrier if b != nil { errs = append(errs, b) } } return errs }