Example #1
0
func (bhcs *httpBatchStore) sendWriteRequests(hashes hash.HashSet, hints types.Hints) {
	if len(hashes) == 0 {
		return
	}
	bhcs.rateLimit <- struct{}{}
	go func() {
		defer func() {
			<-bhcs.rateLimit
			bhcs.unwrittenPuts.Clear(hashes)
			bhcs.requestWg.Add(-len(hashes))
		}()

		var res *http.Response
		var err error
		for tryAgain := true; tryAgain; {
			chunkChan := make(chan *chunks.Chunk, 1024)
			go func() {
				bhcs.unwrittenPuts.ExtractChunks(hashes, chunkChan)
				close(chunkChan)
			}()

			body := buildWriteValueRequest(chunkChan, hints)
			url := *bhcs.host
			url.Path = httprouter.CleanPath(bhcs.host.Path + constants.WriteValuePath)
			// TODO: Make this accept snappy encoding
			req := newRequest("POST", bhcs.auth, url.String(), body, http.Header{
				"Accept-Encoding":  {"gzip"},
				"Content-Encoding": {"x-snappy-framed"},
				"Content-Type":     {"application/octet-stream"},
			})

			res, err = bhcs.httpClient.Do(req)
			d.PanicIfError(err)
			expectVersion(res)
			defer closeResponse(res.Body)

			if tryAgain = res.StatusCode == httpStatusTooManyRequests; tryAgain {
				reader := res.Body
				if strings.Contains(res.Header.Get("Content-Encoding"), "gzip") {
					gr, err := gzip.NewReader(reader)
					d.PanicIfError(err)
					defer gr.Close()
					reader = gr
				}
				/*hashes :=*/ deserializeHashes(reader)
				// TODO: BUG 1259 Since the client must currently send all chunks in one batch, the only thing to do in response to backpressure is send EVERYTHING again. Once batching is again possible, this code should figure out how to resend the chunks indicated by hashes.
			}
		}

		d.PanicIfTrue(http.StatusCreated != res.StatusCode, "Unexpected response: %s", formatErrorResponse(res))
	}()
}
Example #2
0
func Parse(s string) Hash {
	r, ok := MaybeParse(s)
	if !ok {
		d.PanicIfError(fmt.Errorf("Cound not parse Hash: %s", s))
	}
	return r
}
Example #3
0
// DumbCopy copies the contents of a regular file at srcPath (following symlinks) to a new regular file at dstPath. New file is created with same mode.
func DumbCopy(srcPath, dstPath string) {
	chkClose := func(c io.Closer) { d.PanicIfError(c.Close()) }

	info, err := os.Stat(srcPath)
	d.PanicIfError(err)

	src, err := os.Open(srcPath)
	d.PanicIfError(err)
	defer chkClose(src)

	dst, err := os.OpenFile(dstPath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, info.Mode())
	d.PanicIfError(err)
	defer chkClose(dst)
	_, err = io.Copy(dst, src)
	d.PanicIfError(err)
}
Example #4
0
func (lbs *localBatchStore) Flush() {
	lbs.once.Do(lbs.expectVersion)

	chunkChan := make(chan *chunks.Chunk, 128)
	go func() {
		err := lbs.unwrittenPuts.ExtractChunks(lbs.hashes, chunkChan)
		d.Chk.NoError(err)
		close(chunkChan)
	}()

	lbs.vbs.Prepare(lbs.hints)
	var bpe chunks.BackpressureError
	for c := range chunkChan {
		if bpe == nil {
			bpe = lbs.vbs.Enqueue(*c)
		} else {
			bpe = append(bpe, c.Hash())
		}
	}
	if bpe == nil {
		bpe = lbs.vbs.Flush()
	}
	// Should probably do a thing with bpe. Will need to keep track of chunk hashes that are SechedulePut'd in order to do this :-/
	if bpe != nil {
		d.PanicIfError(bpe) // guarded because if bpe == nil, this still fires for some reason. Maybe something to do with custom error type??
	}
	lbs.unwrittenPuts.Clear(lbs.hashes)
	lbs.hashes = hash.HashSet{}
	lbs.hints = types.Hints{}
}
Example #5
0
func writeEncodedValue(w io.Writer, v types.Value) {
	if v.Type().Kind() == types.BlobKind {
		w.Write([]byte("Blob ("))
		w.Write([]byte(humanize.Bytes(v.(types.Blob).Len())))
		w.Write([]byte(")"))
	} else {
		d.PanicIfError(types.WriteEncodedValue(w, v))
	}
}
func bodyReader(req *http.Request) (reader io.ReadCloser) {
	reader = req.Body
	if strings.Contains(req.Header.Get("Content-Encoding"), "gzip") {
		gr, err := gzip.NewReader(reader)
		d.PanicIfError(err)
		reader = gr
	} else if strings.Contains(req.Header.Get("Content-Encoding"), "x-snappy-framed") {
		sr := snappy.NewReader(reader)
		reader = ioutil.NopCloser(sr)
	}
	return
}
func extractHashes(req *http.Request) hash.HashSlice {
	err := req.ParseForm()
	d.PanicIfError(err)
	hashStrs := req.PostForm["ref"]
	d.PanicIfTrue(len(hashStrs) <= 0, "PostForm is empty")

	hashes := make(hash.HashSlice, len(hashStrs))
	for idx, refStr := range hashStrs {
		hashes[idx] = hash.Parse(refStr)
	}
	return hashes
}
Example #8
0
func expectVersion(res *http.Response) {
	dataVersion := res.Header.Get(NomsVersionHeader)
	if constants.NomsVersion != dataVersion {
		b, _ := ioutil.ReadAll(res.Body)
		res.Body.Close()
		d.PanicIfError(fmt.Errorf(
			"Version mismatch\n\r"+
				"\tSDK version '%s' is incompatible with data of version: '%s'\n\r"+
				"\tHTTP Response: %d (%s): %s\n",
			constants.NomsVersion, dataVersion,
			res.StatusCode, res.Status, string(b)))
	}
}
Example #9
0
// MaybeStartProfile checks the -blockProfile, -cpuProfile, and -memProfile flag and, for each that is set, attempts to start gathering profiling data into the appropriate files. It returns an object with one method, Stop(), that must be called in order to flush profile data to disk before the process terminates.
func MaybeStartProfile() interface {
	Stop()
} {
	p := &prof{}
	if blockProfile != "" {
		f, err := os.Create(blockProfile)
		d.PanicIfError(err)
		runtime.SetBlockProfileRate(1)
		p.bp = f
	}
	if cpuProfile != "" {
		f, err := os.Create(cpuProfile)
		d.PanicIfError(err)
		pprof.StartCPUProfile(f)
		p.cpu = f
	}
	if memProfile != "" {
		f, err := os.Create(memProfile)
		d.PanicIfError(err)
		p.mem = f
	}
	return p
}
Example #10
0
func newBackingStore(dir string, maxFileHandles int, dumpStats bool) *internalLevelDBStore {
	d.PanicIfTrue(dir == "", "dir cannot be empty")
	d.PanicIfError(os.MkdirAll(dir, 0700))
	db, err := leveldb.OpenFile(dir, &opt.Options{
		Compression:            opt.NoCompression,
		Filter:                 filter.NewBloomFilter(10), // 10 bits/key
		OpenFilesCacheCapacity: maxFileHandles,
		WriteBuffer:            1 << 24, // 16MiB,
	})
	d.Chk.NoError(err, "opening internalLevelDBStore in %s", dir)
	return &internalLevelDBStore{
		db:                   db,
		mu:                   &sync.Mutex{},
		concurrentWriteLimit: make(chan struct{}, maxFileHandles),
		dumpStats:            dumpStats,
	}
}
Example #11
0
func (bhcs *httpBatchStore) requestRoot(method string, current, last hash.Hash) *http.Response {
	u := *bhcs.host
	u.Path = httprouter.CleanPath(bhcs.host.Path + constants.RootPath)
	if method == "POST" {
		d.PanicIfTrue(current.IsEmpty(), "Unexpected empty value")
		params := u.Query()
		params.Add("last", last.String())
		params.Add("current", current.String())
		u.RawQuery = params.Encode()
	}

	req := newRequest(method, bhcs.auth, u.String(), nil, nil)

	res, err := bhcs.httpClient.Do(req)
	d.PanicIfError(err)

	return res
}
Example #12
0
func newOrderedChunkCache() *orderedChunkCache {
	dir, err := ioutil.TempDir("", "")
	d.PanicIfError(err)
	db, err := leveldb.OpenFile(dir, &opt.Options{
		Compression:            opt.NoCompression,
		Filter:                 filter.NewBloomFilter(10), // 10 bits/key
		OpenFilesCacheCapacity: 24,
		NoSync:                 true,    // We dont need this data to be durable. LDB is acting as sorting temporary storage that can be larger than main memory.
		WriteBuffer:            1 << 27, // 128MiB
	})
	d.Chk.NoError(err, "opening put cache in %s", dir)
	return &orderedChunkCache{
		orderedChunks: db,
		chunkIndex:    map[hash.Hash][]byte{},
		dbDir:         dir,
		mu:            &sync.RWMutex{},
	}
}
Example #13
0
// Serial serially runs all instances of filename found under dir, mapping stdout and stderr to each subprocess in the obvious way. env is overlaid on the environment of the current process. If args are provided, they're passed en masse to each subprocess.
func Serial(stdout, stderr io.Writer, env Env, dir, filename string, args ...string) bool {
	success := true
	err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
		if os.IsNotExist(err) {
			// Some programs like npm create temporary log files which confuse filepath.Walk.
			return nil
		}
		d.PanicIfTrue(err != nil, "Failed directory traversal at %s", path)
		if !info.IsDir() && filepath.Base(path) == filename {
			scriptAndArgs := append([]string{filepath.Base(path)}, args...)
			runErr := runEnvDir(stdout, stderr, env, filepath.Dir(path), "python", scriptAndArgs...)
			if runErr != nil {
				success = false
				fmt.Fprintf(stderr, "Running %s failed with %v\n", path, runErr)
			}
		}
		return nil
	})
	d.PanicIfError(err)
	return success
}
Example #14
0
func (s *DynamoStore) Root() hash.Hash {
	result, err := s.ddbsvc.GetItem(&dynamodb.GetItemInput{
		TableName: aws.String(s.table),
		Key: map[string]*dynamodb.AttributeValue{
			refAttr: {B: s.rootKey},
		},
	})
	d.PanicIfError(err)

	itemLen := len(result.Item)
	if itemLen == 0 {
		return hash.Hash{}
	}
	d.Chk.True(itemLen == 2 || itemLen == 3, "Root should have 2 or three attributes on it: %+v", result.Item)
	if itemLen == 3 {
		d.Chk.True(result.Item[compAttr] != nil)
		d.Chk.True(result.Item[compAttr].S != nil)
		d.Chk.True(noneValue == *result.Item[compAttr].S)
	}
	return hash.FromSlice(result.Item[chunkAttr].B)
}
Example #15
0
func main() {
	flag.Usage = func() {
		fmt.Fprintf(os.Stderr, "usage: %s <url> <dataset>\n", os.Args[0])
		flag.PrintDefaults()
	}

	spec.RegisterDatabaseFlags(flag.CommandLine)
	flag.Parse(true)

	if len(flag.Args()) != 2 {
		d.CheckError(errors.New("expected url and dataset flags"))
	}

	ds, err := spec.GetDataset(flag.Arg(1))
	d.CheckError(err)

	url := flag.Arg(0)
	if url == "" {
		flag.Usage()
	}

	res, err := http.Get(url)
	if err != nil {
		log.Fatalf("Error fetching %s: %+v\n", url, err)
	} else if res.StatusCode != 200 {
		log.Fatalf("Error fetching %s: %s\n", url, res.Status)
	}
	defer res.Body.Close()

	var jsonObject interface{}
	err = json.NewDecoder(res.Body).Decode(&jsonObject)
	if err != nil {
		log.Fatalln("Error decoding JSON: ", err)
	}

	_, err = ds.CommitValue(jsontonoms.NomsValueFromDecodedJSON(jsonObject, true))
	d.PanicIfError(err)
	ds.Database().Close()
}
Example #16
0
func main() {
	flag.Usage = func() {
		fmt.Fprintf(os.Stderr, "Usage of %s:\n  %s path/to/staging/dir\n", os.Args[0], os.Args[0])
	}
	flag.Parse(true)
	if flag.Arg(0) == "" {
		flag.Usage()
		os.Exit(1)
	}
	err := d.Try(func() {
		stagingDir, err := filepath.Abs(flag.Arg(0))
		d.PanicIfTrue(err != nil, "Path to staging directory (first arg) must be valid, not %s", flag.Arg(0))
		d.PanicIfError(os.MkdirAll(stagingDir, 0755))

		goPath := os.Getenv("GOPATH")
		d.PanicIfTrue(goPath == "", "GOPATH must be set!")
		workspace := os.Getenv("WORKSPACE")
		if workspace == "" {
			fmt.Printf("WORKSPACE not set in environment; using GOPATH (%s).\n", goPath)
			workspace = goPath
		}
		pythonPath := filepath.Join(goPath, nomsCheckoutPath, "tools")
		env := runner.Env{
			"PYTHONPATH": pythonPath,
		}

		if !runner.Serial(os.Stdout, os.Stderr, env, ".", buildScript) {
			os.Exit(1)
		}

		if !runner.Serial(os.Stdout, os.Stderr, env, ".", stageScript, stagingDir) {
			os.Exit(1)
		}
	})
	if err != nil {
		log.Fatal(err)
	}
}
Example #17
0
func newHTTPBatchStore(baseURL, auth string) *httpBatchStore {
	u, err := url.Parse(baseURL)
	d.PanicIfError(err)
	d.PanicIfTrue(u.Scheme != "http" && u.Scheme != "https", "Unrecognized scheme: %s", u.Scheme)
	buffSink := &httpBatchStore{
		host:          u,
		httpClient:    makeHTTPClient(httpChunkSinkConcurrency),
		auth:          auth,
		getQueue:      make(chan chunks.ReadRequest, readBufferSize),
		hasQueue:      make(chan chunks.ReadRequest, readBufferSize),
		writeQueue:    make(chan writeRequest, writeBufferSize),
		flushChan:     make(chan struct{}),
		finishedChan:  make(chan struct{}),
		rateLimit:     make(chan struct{}, httpChunkSinkConcurrency),
		requestWg:     &sync.WaitGroup{},
		workerWg:      &sync.WaitGroup{},
		unwrittenPuts: newOrderedChunkCache(),
	}
	buffSink.batchGetRequests()
	buffSink.batchHasRequests()
	buffSink.batchPutRequests()
	return buffSink
}
Example #18
0
func main() {
	err := d.Try(func() {
		spec.RegisterDatabaseFlags(flag.CommandLine)
		profile.RegisterProfileFlags(flag.CommandLine)
		flag.Usage = customUsage
		flag.Parse(true)

		if flag.NArg() != 2 {
			d.CheckError(errors.New("Expected directory path followed by dataset"))
		}
		dir := flag.Arg(0)
		ds, err := spec.GetDataset(flag.Arg(1))
		d.CheckError(err)

		defer profile.MaybeStartProfile().Stop()

		cpuCount := runtime.NumCPU()

		filesChan := make(chan fileIndex, 1024)
		refsChan := make(chan refIndex, 1024)

		getFilePaths := func() {
			index := 0
			err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error {
				d.PanicIfTrue(err != nil, "Cannot traverse directories")
				if !info.IsDir() && filepath.Ext(path) == ".xml" {
					filesChan <- fileIndex{path, index}
					index++
				}

				return nil
			})
			d.PanicIfError(err)
			close(filesChan)
		}

		wg := sync.WaitGroup{}
		importXML := func() {
			expectedType := types.NewMap()
			for f := range filesChan {
				file, err := os.Open(f.path)
				d.PanicIfTrue(err != nil, "Error getting XML")

				xmlObject, err := mxj.NewMapXmlReader(file)
				d.PanicIfTrue(err != nil, "Error decoding XML")
				object := xmlObject.Old()
				file.Close()

				nomsObj := jsontonoms.NomsValueFromDecodedJSON(object, false)
				d.Chk.IsType(expectedType, nomsObj)

				var r types.Ref
				if !*noIO {
					r = ds.Database().WriteValue(nomsObj)
				}

				refsChan <- refIndex{r, f.index}
			}

			wg.Done()
		}

		go getFilePaths()
		for i := 0; i < cpuCount*8; i++ {
			wg.Add(1)
			go importXML()
		}
		go func() {
			wg.Wait()
			close(refsChan) // done converting xml to noms
		}()

		refList := refIndexList{}
		for r := range refsChan {
			refList = append(refList, r)
		}
		sort.Sort(refList)

		refs := make([]types.Value, len(refList))
		for idx, r := range refList {
			refs[idx] = r.ref
		}

		rl := types.NewList(refs...)

		if !*noIO {
			_, err := ds.CommitValue(rl)
			d.PanicIfError(err)
		}
	})

	if err != nil {
		log.Fatal(err)
	}
}
Example #19
0
func main() {
	// Actually the delimiter uses runes, which can be multiple characters long.
	// https://blog.golang.org/strings
	delimiter := flag.String("delimiter", ",", "field delimiter for csv file, must be exactly one character long.")
	comment := flag.String("comment", "", "comment to add to commit's meta data")
	header := flag.String("header", "", "header row. If empty, we'll use the first row of the file")
	name := flag.String("name", "Row", "struct name. The user-visible name to give to the struct type that will hold each row of data.")
	columnTypes := flag.String("column-types", "", "a comma-separated list of types representing the desired type of each column. if absent all types default to be String")
	pathDescription := "noms path to blob to import"
	path := flag.String("path", "", pathDescription)
	flag.StringVar(path, "p", "", pathDescription)
	dateFlag := flag.String("date", "", fmt.Sprintf(`date of commit in ISO 8601 format ("%s"). By default, the current date is used.`, dateFormat))
	noProgress := flag.Bool("no-progress", false, "prevents progress from being output if true")
	destType := flag.String("dest-type", "list", "the destination type to import to. can be 'list' or 'map:<pk>', where <pk> is the index position (0-based) of the column that is a the unique identifier for the column")
	skipRecords := flag.Uint("skip-records", 0, "number of records to skip at beginning of file")
	destTypePattern := regexp.MustCompile("^(list|map):(\\d+)$")

	spec.RegisterDatabaseFlags(flag.CommandLine)
	profile.RegisterProfileFlags(flag.CommandLine)

	flag.Usage = func() {
		fmt.Fprintf(os.Stderr, "Usage: csv-import [options] <csvfile> <dataset>\n\n")
		flag.PrintDefaults()
	}

	flag.Parse(true)

	var err error
	switch {
	case flag.NArg() == 0:
		err = errors.New("Maybe you put options after the dataset?")
	case flag.NArg() == 1 && *path == "":
		err = errors.New("If <csvfile> isn't specified, you must specify a noms path with -p")
	case flag.NArg() == 2 && *path != "":
		err = errors.New("Cannot specify both <csvfile> and a noms path with -p")
	case flag.NArg() > 2:
		err = errors.New("Too many arguments")
	}
	d.CheckError(err)

	var date = *dateFlag
	if date == "" {
		date = time.Now().UTC().Format(dateFormat)
	} else {
		_, err := time.Parse(dateFormat, date)
		d.CheckErrorNoUsage(err)
	}

	defer profile.MaybeStartProfile().Stop()

	var r io.Reader
	var size uint64
	var filePath string
	var dataSetArgN int

	if *path != "" {
		db, val, err := spec.GetPath(*path)
		d.CheckError(err)
		if val == nil {
			d.CheckError(fmt.Errorf("Path %s not found\n", *path))
		}
		blob, ok := val.(types.Blob)
		if !ok {
			d.CheckError(fmt.Errorf("Path %s not a Blob: %s\n", *path, types.EncodedValue(val.Type())))
		}
		defer db.Close()
		r = blob.Reader()
		size = blob.Len()
		dataSetArgN = 0
	} else {
		filePath = flag.Arg(0)
		res, err := os.Open(filePath)
		d.CheckError(err)
		defer res.Close()
		fi, err := res.Stat()
		d.CheckError(err)
		r = res
		size = uint64(fi.Size())
		dataSetArgN = 1
	}

	if !*noProgress {
		r = progressreader.New(r, getStatusPrinter(size))
	}

	comma, err := csv.StringToRune(*delimiter)
	d.CheckErrorNoUsage(err)

	var dest int
	var pk int
	if *destType == "list" {
		dest = destList
	} else if match := destTypePattern.FindStringSubmatch(*destType); match != nil {
		dest = destMap
		pk, err = strconv.Atoi(match[2])
		d.CheckErrorNoUsage(err)
	} else {
		fmt.Println("Invalid dest-type: ", *destType)
		return
	}

	cr := csv.NewCSVReader(r, comma)
	for i := uint(0); i < *skipRecords; i++ {
		cr.Read()
	}

	var headers []string
	if *header == "" {
		headers, err = cr.Read()
		d.PanicIfError(err)
	} else {
		headers = strings.Split(*header, string(comma))
	}

	ds, err := spec.GetDataset(flag.Arg(dataSetArgN))
	d.CheckError(err)
	defer ds.Database().Close()

	kinds := []types.NomsKind{}
	if *columnTypes != "" {
		kinds = csv.StringsToKinds(strings.Split(*columnTypes, ","))
	}

	var value types.Value
	if dest == destList {
		value, _ = csv.ReadToList(cr, *name, headers, kinds, ds.Database())
	} else {
		value = csv.ReadToMap(cr, headers, pk, kinds, ds.Database())
	}
	mi := metaInfoForCommit(date, filePath, *path, *comment)
	_, err = ds.Commit(value, dataset.CommitOptions{Meta: mi})
	if !*noProgress {
		status.Clear()
	}
	d.PanicIfError(err)
}
Example #20
0
func write(w io.Writer, b []byte) {
	_, err := w.Write(b)
	d.PanicIfError(err)
}
Example #21
0
func writeEncodedValueWithTags(w io.Writer, v types.Value) {
	d.PanicIfError(types.WriteEncodedValueWithTags(w, v))
}