func (bhcs *httpBatchStore) sendWriteRequests(hashes hash.HashSet, hints types.Hints) { if len(hashes) == 0 { return } bhcs.rateLimit <- struct{}{} go func() { defer func() { <-bhcs.rateLimit bhcs.unwrittenPuts.Clear(hashes) bhcs.requestWg.Add(-len(hashes)) }() var res *http.Response var err error for tryAgain := true; tryAgain; { chunkChan := make(chan *chunks.Chunk, 1024) go func() { bhcs.unwrittenPuts.ExtractChunks(hashes, chunkChan) close(chunkChan) }() body := buildWriteValueRequest(chunkChan, hints) url := *bhcs.host url.Path = httprouter.CleanPath(bhcs.host.Path + constants.WriteValuePath) // TODO: Make this accept snappy encoding req := newRequest("POST", bhcs.auth, url.String(), body, http.Header{ "Accept-Encoding": {"gzip"}, "Content-Encoding": {"x-snappy-framed"}, "Content-Type": {"application/octet-stream"}, }) res, err = bhcs.httpClient.Do(req) d.PanicIfError(err) expectVersion(res) defer closeResponse(res.Body) if tryAgain = res.StatusCode == httpStatusTooManyRequests; tryAgain { reader := res.Body if strings.Contains(res.Header.Get("Content-Encoding"), "gzip") { gr, err := gzip.NewReader(reader) d.PanicIfError(err) defer gr.Close() reader = gr } /*hashes :=*/ deserializeHashes(reader) // TODO: BUG 1259 Since the client must currently send all chunks in one batch, the only thing to do in response to backpressure is send EVERYTHING again. Once batching is again possible, this code should figure out how to resend the chunks indicated by hashes. } } d.PanicIfTrue(http.StatusCreated != res.StatusCode, "Unexpected response: %s", formatErrorResponse(res)) }() }
func Parse(s string) Hash { r, ok := MaybeParse(s) if !ok { d.PanicIfError(fmt.Errorf("Cound not parse Hash: %s", s)) } return r }
// DumbCopy copies the contents of a regular file at srcPath (following symlinks) to a new regular file at dstPath. New file is created with same mode. func DumbCopy(srcPath, dstPath string) { chkClose := func(c io.Closer) { d.PanicIfError(c.Close()) } info, err := os.Stat(srcPath) d.PanicIfError(err) src, err := os.Open(srcPath) d.PanicIfError(err) defer chkClose(src) dst, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, info.Mode()) d.PanicIfError(err) defer chkClose(dst) _, err = io.Copy(dst, src) d.PanicIfError(err) }
func (lbs *localBatchStore) Flush() { lbs.once.Do(lbs.expectVersion) chunkChan := make(chan *chunks.Chunk, 128) go func() { err := lbs.unwrittenPuts.ExtractChunks(lbs.hashes, chunkChan) d.Chk.NoError(err) close(chunkChan) }() lbs.vbs.Prepare(lbs.hints) var bpe chunks.BackpressureError for c := range chunkChan { if bpe == nil { bpe = lbs.vbs.Enqueue(*c) } else { bpe = append(bpe, c.Hash()) } } if bpe == nil { bpe = lbs.vbs.Flush() } // Should probably do a thing with bpe. Will need to keep track of chunk hashes that are SechedulePut'd in order to do this :-/ if bpe != nil { d.PanicIfError(bpe) // guarded because if bpe == nil, this still fires for some reason. Maybe something to do with custom error type?? } lbs.unwrittenPuts.Clear(lbs.hashes) lbs.hashes = hash.HashSet{} lbs.hints = types.Hints{} }
func writeEncodedValue(w io.Writer, v types.Value) { if v.Type().Kind() == types.BlobKind { w.Write([]byte("Blob (")) w.Write([]byte(humanize.Bytes(v.(types.Blob).Len()))) w.Write([]byte(")")) } else { d.PanicIfError(types.WriteEncodedValue(w, v)) } }
func bodyReader(req *http.Request) (reader io.ReadCloser) { reader = req.Body if strings.Contains(req.Header.Get("Content-Encoding"), "gzip") { gr, err := gzip.NewReader(reader) d.PanicIfError(err) reader = gr } else if strings.Contains(req.Header.Get("Content-Encoding"), "x-snappy-framed") { sr := snappy.NewReader(reader) reader = ioutil.NopCloser(sr) } return }
func extractHashes(req *http.Request) hash.HashSlice { err := req.ParseForm() d.PanicIfError(err) hashStrs := req.PostForm["ref"] d.PanicIfTrue(len(hashStrs) <= 0, "PostForm is empty") hashes := make(hash.HashSlice, len(hashStrs)) for idx, refStr := range hashStrs { hashes[idx] = hash.Parse(refStr) } return hashes }
func expectVersion(res *http.Response) { dataVersion := res.Header.Get(NomsVersionHeader) if constants.NomsVersion != dataVersion { b, _ := ioutil.ReadAll(res.Body) res.Body.Close() d.PanicIfError(fmt.Errorf( "Version mismatch\n\r"+ "\tSDK version '%s' is incompatible with data of version: '%s'\n\r"+ "\tHTTP Response: %d (%s): %s\n", constants.NomsVersion, dataVersion, res.StatusCode, res.Status, string(b))) } }
// MaybeStartProfile checks the -blockProfile, -cpuProfile, and -memProfile flag and, for each that is set, attempts to start gathering profiling data into the appropriate files. It returns an object with one method, Stop(), that must be called in order to flush profile data to disk before the process terminates. func MaybeStartProfile() interface { Stop() } { p := &prof{} if blockProfile != "" { f, err := os.Create(blockProfile) d.PanicIfError(err) runtime.SetBlockProfileRate(1) p.bp = f } if cpuProfile != "" { f, err := os.Create(cpuProfile) d.PanicIfError(err) pprof.StartCPUProfile(f) p.cpu = f } if memProfile != "" { f, err := os.Create(memProfile) d.PanicIfError(err) p.mem = f } return p }
func newBackingStore(dir string, maxFileHandles int, dumpStats bool) *internalLevelDBStore { d.PanicIfTrue(dir == "", "dir cannot be empty") d.PanicIfError(os.MkdirAll(dir, 0700)) db, err := leveldb.OpenFile(dir, &opt.Options{ Compression: opt.NoCompression, Filter: filter.NewBloomFilter(10), // 10 bits/key OpenFilesCacheCapacity: maxFileHandles, WriteBuffer: 1 << 24, // 16MiB, }) d.Chk.NoError(err, "opening internalLevelDBStore in %s", dir) return &internalLevelDBStore{ db: db, mu: &sync.Mutex{}, concurrentWriteLimit: make(chan struct{}, maxFileHandles), dumpStats: dumpStats, } }
func (bhcs *httpBatchStore) requestRoot(method string, current, last hash.Hash) *http.Response { u := *bhcs.host u.Path = httprouter.CleanPath(bhcs.host.Path + constants.RootPath) if method == "POST" { d.PanicIfTrue(current.IsEmpty(), "Unexpected empty value") params := u.Query() params.Add("last", last.String()) params.Add("current", current.String()) u.RawQuery = params.Encode() } req := newRequest(method, bhcs.auth, u.String(), nil, nil) res, err := bhcs.httpClient.Do(req) d.PanicIfError(err) return res }
func newOrderedChunkCache() *orderedChunkCache { dir, err := ioutil.TempDir("", "") d.PanicIfError(err) db, err := leveldb.OpenFile(dir, &opt.Options{ Compression: opt.NoCompression, Filter: filter.NewBloomFilter(10), // 10 bits/key OpenFilesCacheCapacity: 24, NoSync: true, // We dont need this data to be durable. LDB is acting as sorting temporary storage that can be larger than main memory. WriteBuffer: 1 << 27, // 128MiB }) d.Chk.NoError(err, "opening put cache in %s", dir) return &orderedChunkCache{ orderedChunks: db, chunkIndex: map[hash.Hash][]byte{}, dbDir: dir, mu: &sync.RWMutex{}, } }
// Serial serially runs all instances of filename found under dir, mapping stdout and stderr to each subprocess in the obvious way. env is overlaid on the environment of the current process. If args are provided, they're passed en masse to each subprocess. func Serial(stdout, stderr io.Writer, env Env, dir, filename string, args ...string) bool { success := true err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { if os.IsNotExist(err) { // Some programs like npm create temporary log files which confuse filepath.Walk. return nil } d.PanicIfTrue(err != nil, "Failed directory traversal at %s", path) if !info.IsDir() && filepath.Base(path) == filename { scriptAndArgs := append([]string{filepath.Base(path)}, args...) runErr := runEnvDir(stdout, stderr, env, filepath.Dir(path), "python", scriptAndArgs...) if runErr != nil { success = false fmt.Fprintf(stderr, "Running %s failed with %v\n", path, runErr) } } return nil }) d.PanicIfError(err) return success }
func (s *DynamoStore) Root() hash.Hash { result, err := s.ddbsvc.GetItem(&dynamodb.GetItemInput{ TableName: aws.String(s.table), Key: map[string]*dynamodb.AttributeValue{ refAttr: {B: s.rootKey}, }, }) d.PanicIfError(err) itemLen := len(result.Item) if itemLen == 0 { return hash.Hash{} } d.Chk.True(itemLen == 2 || itemLen == 3, "Root should have 2 or three attributes on it: %+v", result.Item) if itemLen == 3 { d.Chk.True(result.Item[compAttr] != nil) d.Chk.True(result.Item[compAttr].S != nil) d.Chk.True(noneValue == *result.Item[compAttr].S) } return hash.FromSlice(result.Item[chunkAttr].B) }
func main() { flag.Usage = func() { fmt.Fprintf(os.Stderr, "usage: %s <url> <dataset>\n", os.Args[0]) flag.PrintDefaults() } spec.RegisterDatabaseFlags(flag.CommandLine) flag.Parse(true) if len(flag.Args()) != 2 { d.CheckError(errors.New("expected url and dataset flags")) } ds, err := spec.GetDataset(flag.Arg(1)) d.CheckError(err) url := flag.Arg(0) if url == "" { flag.Usage() } res, err := http.Get(url) if err != nil { log.Fatalf("Error fetching %s: %+v\n", url, err) } else if res.StatusCode != 200 { log.Fatalf("Error fetching %s: %s\n", url, res.Status) } defer res.Body.Close() var jsonObject interface{} err = json.NewDecoder(res.Body).Decode(&jsonObject) if err != nil { log.Fatalln("Error decoding JSON: ", err) } _, err = ds.CommitValue(jsontonoms.NomsValueFromDecodedJSON(jsonObject, true)) d.PanicIfError(err) ds.Database().Close() }
func main() { flag.Usage = func() { fmt.Fprintf(os.Stderr, "Usage of %s:\n %s path/to/staging/dir\n", os.Args[0], os.Args[0]) } flag.Parse(true) if flag.Arg(0) == "" { flag.Usage() os.Exit(1) } err := d.Try(func() { stagingDir, err := filepath.Abs(flag.Arg(0)) d.PanicIfTrue(err != nil, "Path to staging directory (first arg) must be valid, not %s", flag.Arg(0)) d.PanicIfError(os.MkdirAll(stagingDir, 0755)) goPath := os.Getenv("GOPATH") d.PanicIfTrue(goPath == "", "GOPATH must be set!") workspace := os.Getenv("WORKSPACE") if workspace == "" { fmt.Printf("WORKSPACE not set in environment; using GOPATH (%s).\n", goPath) workspace = goPath } pythonPath := filepath.Join(goPath, nomsCheckoutPath, "tools") env := runner.Env{ "PYTHONPATH": pythonPath, } if !runner.Serial(os.Stdout, os.Stderr, env, ".", buildScript) { os.Exit(1) } if !runner.Serial(os.Stdout, os.Stderr, env, ".", stageScript, stagingDir) { os.Exit(1) } }) if err != nil { log.Fatal(err) } }
func newHTTPBatchStore(baseURL, auth string) *httpBatchStore { u, err := url.Parse(baseURL) d.PanicIfError(err) d.PanicIfTrue(u.Scheme != "http" && u.Scheme != "https", "Unrecognized scheme: %s", u.Scheme) buffSink := &httpBatchStore{ host: u, httpClient: makeHTTPClient(httpChunkSinkConcurrency), auth: auth, getQueue: make(chan chunks.ReadRequest, readBufferSize), hasQueue: make(chan chunks.ReadRequest, readBufferSize), writeQueue: make(chan writeRequest, writeBufferSize), flushChan: make(chan struct{}), finishedChan: make(chan struct{}), rateLimit: make(chan struct{}, httpChunkSinkConcurrency), requestWg: &sync.WaitGroup{}, workerWg: &sync.WaitGroup{}, unwrittenPuts: newOrderedChunkCache(), } buffSink.batchGetRequests() buffSink.batchHasRequests() buffSink.batchPutRequests() return buffSink }
func main() { err := d.Try(func() { spec.RegisterDatabaseFlags(flag.CommandLine) profile.RegisterProfileFlags(flag.CommandLine) flag.Usage = customUsage flag.Parse(true) if flag.NArg() != 2 { d.CheckError(errors.New("Expected directory path followed by dataset")) } dir := flag.Arg(0) ds, err := spec.GetDataset(flag.Arg(1)) d.CheckError(err) defer profile.MaybeStartProfile().Stop() cpuCount := runtime.NumCPU() filesChan := make(chan fileIndex, 1024) refsChan := make(chan refIndex, 1024) getFilePaths := func() { index := 0 err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { d.PanicIfTrue(err != nil, "Cannot traverse directories") if !info.IsDir() && filepath.Ext(path) == ".xml" { filesChan <- fileIndex{path, index} index++ } return nil }) d.PanicIfError(err) close(filesChan) } wg := sync.WaitGroup{} importXML := func() { expectedType := types.NewMap() for f := range filesChan { file, err := os.Open(f.path) d.PanicIfTrue(err != nil, "Error getting XML") xmlObject, err := mxj.NewMapXmlReader(file) d.PanicIfTrue(err != nil, "Error decoding XML") object := xmlObject.Old() file.Close() nomsObj := jsontonoms.NomsValueFromDecodedJSON(object, false) d.Chk.IsType(expectedType, nomsObj) var r types.Ref if !*noIO { r = ds.Database().WriteValue(nomsObj) } refsChan <- refIndex{r, f.index} } wg.Done() } go getFilePaths() for i := 0; i < cpuCount*8; i++ { wg.Add(1) go importXML() } go func() { wg.Wait() close(refsChan) // done converting xml to noms }() refList := refIndexList{} for r := range refsChan { refList = append(refList, r) } sort.Sort(refList) refs := make([]types.Value, len(refList)) for idx, r := range refList { refs[idx] = r.ref } rl := types.NewList(refs...) if !*noIO { _, err := ds.CommitValue(rl) d.PanicIfError(err) } }) if err != nil { log.Fatal(err) } }
func main() { // Actually the delimiter uses runes, which can be multiple characters long. // https://blog.golang.org/strings delimiter := flag.String("delimiter", ",", "field delimiter for csv file, must be exactly one character long.") comment := flag.String("comment", "", "comment to add to commit's meta data") header := flag.String("header", "", "header row. If empty, we'll use the first row of the file") name := flag.String("name", "Row", "struct name. The user-visible name to give to the struct type that will hold each row of data.") columnTypes := flag.String("column-types", "", "a comma-separated list of types representing the desired type of each column. if absent all types default to be String") pathDescription := "noms path to blob to import" path := flag.String("path", "", pathDescription) flag.StringVar(path, "p", "", pathDescription) dateFlag := flag.String("date", "", fmt.Sprintf(`date of commit in ISO 8601 format ("%s"). By default, the current date is used.`, dateFormat)) noProgress := flag.Bool("no-progress", false, "prevents progress from being output if true") destType := flag.String("dest-type", "list", "the destination type to import to. can be 'list' or 'map:<pk>', where <pk> is the index position (0-based) of the column that is a the unique identifier for the column") skipRecords := flag.Uint("skip-records", 0, "number of records to skip at beginning of file") destTypePattern := regexp.MustCompile("^(list|map):(\\d+)$") spec.RegisterDatabaseFlags(flag.CommandLine) profile.RegisterProfileFlags(flag.CommandLine) flag.Usage = func() { fmt.Fprintf(os.Stderr, "Usage: csv-import [options] <csvfile> <dataset>\n\n") flag.PrintDefaults() } flag.Parse(true) var err error switch { case flag.NArg() == 0: err = errors.New("Maybe you put options after the dataset?") case flag.NArg() == 1 && *path == "": err = errors.New("If <csvfile> isn't specified, you must specify a noms path with -p") case flag.NArg() == 2 && *path != "": err = errors.New("Cannot specify both <csvfile> and a noms path with -p") case flag.NArg() > 2: err = errors.New("Too many arguments") } d.CheckError(err) var date = *dateFlag if date == "" { date = time.Now().UTC().Format(dateFormat) } else { _, err := time.Parse(dateFormat, date) d.CheckErrorNoUsage(err) } defer profile.MaybeStartProfile().Stop() var r io.Reader var size uint64 var filePath string var dataSetArgN int if *path != "" { db, val, err := spec.GetPath(*path) d.CheckError(err) if val == nil { d.CheckError(fmt.Errorf("Path %s not found\n", *path)) } blob, ok := val.(types.Blob) if !ok { d.CheckError(fmt.Errorf("Path %s not a Blob: %s\n", *path, types.EncodedValue(val.Type()))) } defer db.Close() r = blob.Reader() size = blob.Len() dataSetArgN = 0 } else { filePath = flag.Arg(0) res, err := os.Open(filePath) d.CheckError(err) defer res.Close() fi, err := res.Stat() d.CheckError(err) r = res size = uint64(fi.Size()) dataSetArgN = 1 } if !*noProgress { r = progressreader.New(r, getStatusPrinter(size)) } comma, err := csv.StringToRune(*delimiter) d.CheckErrorNoUsage(err) var dest int var pk int if *destType == "list" { dest = destList } else if match := destTypePattern.FindStringSubmatch(*destType); match != nil { dest = destMap pk, err = strconv.Atoi(match[2]) d.CheckErrorNoUsage(err) } else { fmt.Println("Invalid dest-type: ", *destType) return } cr := csv.NewCSVReader(r, comma) for i := uint(0); i < *skipRecords; i++ { cr.Read() } var headers []string if *header == "" { headers, err = cr.Read() d.PanicIfError(err) } else { headers = strings.Split(*header, string(comma)) } ds, err := spec.GetDataset(flag.Arg(dataSetArgN)) d.CheckError(err) defer ds.Database().Close() kinds := []types.NomsKind{} if *columnTypes != "" { kinds = csv.StringsToKinds(strings.Split(*columnTypes, ",")) } var value types.Value if dest == destList { value, _ = csv.ReadToList(cr, *name, headers, kinds, ds.Database()) } else { value = csv.ReadToMap(cr, headers, pk, kinds, ds.Database()) } mi := metaInfoForCommit(date, filePath, *path, *comment) _, err = ds.Commit(value, dataset.CommitOptions{Meta: mi}) if !*noProgress { status.Clear() } d.PanicIfError(err) }
func write(w io.Writer, b []byte) { _, err := w.Write(b) d.PanicIfError(err) }
func writeEncodedValueWithTags(w io.Writer, v types.Value) { d.PanicIfError(types.WriteEncodedValueWithTags(w, v)) }