func handleRootPost(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { d.PanicIfTrue(req.Method != "POST", "Expected post method.") params := req.URL.Query() tokens := params["last"] d.PanicIfTrue(len(tokens) != 1, `Expected "last" query param value`) last := hash.Parse(tokens[0]) tokens = params["current"] d.PanicIfTrue(len(tokens) != 1, `Expected "current" query param value`) current := hash.Parse(tokens[0]) // Ensure that proposed new Root is present in cs c := cs.Get(current) d.PanicIfTrue(c.IsEmpty(), "Can't set Root to a non-present Chunk") // Ensure that proposed new Root is a Map and, if it has anything in it, that it's <String, <RefCommit>> v := types.DecodeValue(c, nil) d.PanicIfTrue(v.Type().Kind() != types.MapKind, "Root of a Database must be a Map") m := v.(types.Map) if !m.Empty() && !isMapOfStringToRefOfCommit(m) { panic(d.Wrap(fmt.Errorf("Root of a Database must be a Map<String, Ref<Commit>>, not %s", m.Type().Describe()))) } if !cs.UpdateRoot(current, last) { w.WriteHeader(http.StatusConflict) return } }
func (lvs *ValueStore) checkChunksInCache(v Value, readValues bool) Hints { hints := map[hash.Hash]struct{}{} for _, reachable := range v.Chunks() { // First, check the type cache to see if reachable is already known to be valid. targetHash := reachable.TargetHash() entry := lvs.check(targetHash) // If it's not already in the cache, attempt to read the value directly, which will put it and its chunks into the cache. if entry == nil || !entry.Present() { var reachableV Value if readValues { // TODO: log or report that we needed to ReadValue here BUG 1762 reachableV = lvs.ReadValue(targetHash) entry = lvs.check(targetHash) } if reachableV == nil { d.Chk.Fail("Attempted to write Value containing Ref to non-existent object.", "%s\n, contains ref %s, which points to a non-existent Value.", v.Hash(), reachable.TargetHash()) } } if hint := entry.Hint(); !hint.IsEmpty() { hints[hint] = struct{}{} } targetType := getTargetType(reachable) d.PanicIfTrue(!entry.Type().Equals(targetType), "Value to write contains ref %s, which points to a value of a different type: %+v != %+v", reachable.TargetHash(), entry.Type(), targetType) } return hints }
// StringToValue takes a piece of data as a string and attempts to convert it to a types.Value of the appropriate types.NomsKind. func StringToValue(s string, k types.NomsKind) (types.Value, error) { switch k { case types.NumberKind: if s == "" { return types.Number(float64(0)), nil } fval, err := strconv.ParseFloat(s, 64) if err != nil { return nil, fmt.Errorf("Could not parse '%s' into number (%s)", s, err) } return types.Number(fval), nil case types.BoolKind: // TODO: This should probably be configurable. switch s { case "true", "1", "y", "Y": return types.Bool(true), nil case "false", "0", "n", "N", "": return types.Bool(false), nil default: return nil, fmt.Errorf("Could not parse '%s' into bool", s) } case types.StringKind: return types.String(s), nil default: d.PanicIfTrue(true, "Invalid column type kind:", k) } panic("not reached") }
func handleRootGet(w http.ResponseWriter, req *http.Request, ps URLParams, rt chunks.ChunkStore) { d.PanicIfTrue(req.Method != "GET", "Expected post method.") rootRef := rt.Root() fmt.Fprintf(w, "%v", rootRef.String()) w.Header().Add("content-type", "text/plain") }
func handleRootPost(w http.ResponseWriter, req *http.Request, ps URLParams, rt chunks.ChunkStore) { d.PanicIfTrue(req.Method != "POST", "Expected post method.") params := req.URL.Query() tokens := params["last"] d.PanicIfTrue(len(tokens) != 1, `Expected "last" query param value`) last := hash.Parse(tokens[0]) tokens = params["current"] d.PanicIfTrue(len(tokens) != 1, `Expected "current" query param value`) current := hash.Parse(tokens[0]) if !rt.UpdateRoot(current, last) { w.WriteHeader(http.StatusConflict) return } }
// StringsToKinds looks up each element of strs in the StringToKind map and returns a slice of answers func StringsToKinds(strs []string) KindSlice { kinds := make(KindSlice, len(strs)) for i, str := range strs { k, ok := StringToKind[str] d.PanicIfTrue(!ok, "StringToKind[%s] failed", str) kinds[i] = k } return kinds }
func extractHashes(req *http.Request) hash.HashSlice { err := req.ParseForm() d.PanicIfError(err) hashStrs := req.PostForm["ref"] d.PanicIfTrue(len(hashStrs) <= 0, "PostForm is empty") hashes := make(hash.HashSlice, len(hashStrs)) for idx, refStr := range hashStrs { hashes[idx] = hash.Parse(refStr) } return hashes }
func (bhcs *httpBatchStore) sendWriteRequests(hashes hash.HashSet, hints types.Hints) { if len(hashes) == 0 { return } bhcs.rateLimit <- struct{}{} go func() { defer func() { <-bhcs.rateLimit bhcs.unwrittenPuts.Clear(hashes) bhcs.requestWg.Add(-len(hashes)) }() var res *http.Response var err error for tryAgain := true; tryAgain; { chunkChan := make(chan *chunks.Chunk, 1024) go func() { bhcs.unwrittenPuts.ExtractChunks(hashes, chunkChan) close(chunkChan) }() body := buildWriteValueRequest(chunkChan, hints) url := *bhcs.host url.Path = httprouter.CleanPath(bhcs.host.Path + constants.WriteValuePath) // TODO: Make this accept snappy encoding req := newRequest("POST", bhcs.auth, url.String(), body, http.Header{ "Accept-Encoding": {"gzip"}, "Content-Encoding": {"x-snappy-framed"}, "Content-Type": {"application/octet-stream"}, }) res, err = bhcs.httpClient.Do(req) d.PanicIfError(err) expectVersion(res) defer closeResponse(res.Body) if tryAgain = res.StatusCode == httpStatusTooManyRequests; tryAgain { reader := res.Body if strings.Contains(res.Header.Get("Content-Encoding"), "gzip") { gr, err := gzip.NewReader(reader) d.PanicIfError(err) defer gr.Close() reader = gr } /*hashes :=*/ deserializeHashes(reader) // TODO: BUG 1259 Since the client must currently send all chunks in one batch, the only thing to do in response to backpressure is send EVERYTHING again. Once batching is again possible, this code should figure out how to resend the chunks indicated by hashes. } } d.PanicIfTrue(http.StatusCreated != res.StatusCode, "Unexpected response: %s", formatErrorResponse(res)) }() }
func handleHasRefs(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { d.PanicIfTrue(req.Method != "POST", "Expected post method.") hashes := extractHashes(req) w.Header().Add("Content-Type", "text/plain") writer := respWriter(req, w) defer writer.Close() for _, h := range hashes { fmt.Fprintf(writer, "%s %t\n", h, cs.Has(h)) } }
func main() { flag.Usage = func() { fmt.Fprintf(os.Stderr, "Usage of %s:\n %s path/to/staging/dir\n", os.Args[0], os.Args[0]) } flag.Parse(true) if flag.Arg(0) == "" { flag.Usage() os.Exit(1) } err := d.Try(func() { stagingDir, err := filepath.Abs(flag.Arg(0)) d.PanicIfTrue(err != nil, "Path to staging directory (first arg) must be valid, not %s", flag.Arg(0)) d.PanicIfError(os.MkdirAll(stagingDir, 0755)) goPath := os.Getenv("GOPATH") d.PanicIfTrue(goPath == "", "GOPATH must be set!") workspace := os.Getenv("WORKSPACE") if workspace == "" { fmt.Printf("WORKSPACE not set in environment; using GOPATH (%s).\n", goPath) workspace = goPath } pythonPath := filepath.Join(goPath, nomsCheckoutPath, "tools") env := runner.Env{ "PYTHONPATH": pythonPath, } if !runner.Serial(os.Stdout, os.Stderr, env, ".", buildScript) { os.Exit(1) } if !runner.Serial(os.Stdout, os.Stderr, env, ".", stageScript, stagingDir) { os.Exit(1) } }) if err != nil { log.Fatal(err) } }
func handleGetRefs(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { d.PanicIfTrue(req.Method != "POST", "Expected post method.") hashes := extractHashes(req) w.Header().Add("Content-Type", "application/octet-stream") writer := respWriter(req, w) defer writer.Close() for _, h := range hashes { c := cs.Get(h) if !c.IsEmpty() { chunks.Serialize(c, writer) } } }
// Enequeue adds a Chunk to the queue of Chunks waiting to be Put into vbs' backing ChunkStore. The instance keeps an internal buffer of Chunks, spilling to the ChunkStore when the buffer is full. If an attempt to Put Chunks fails, this method returns the BackpressureError from the underlying ChunkStore. func (vbs *ValidatingBatchingSink) Enqueue(c chunks.Chunk) chunks.BackpressureError { h := c.Hash() if vbs.vs.isPresent(h) { return nil } v := DecodeFromBytes(c.Data(), vbs.vs, vbs.tc) d.PanicIfTrue(getHash(v) != h, "Invalid hash found") vbs.vs.ensureChunksInCache(v) vbs.vs.set(h, hintedChunk{v.Type(), h}) vbs.batch[vbs.count] = c vbs.count++ if vbs.count == batchSize { return vbs.Flush() } return nil }
func newBackingStore(dir string, maxFileHandles int, dumpStats bool) *internalLevelDBStore { d.PanicIfTrue(dir == "", "dir cannot be empty") d.PanicIfError(os.MkdirAll(dir, 0700)) db, err := leveldb.OpenFile(dir, &opt.Options{ Compression: opt.NoCompression, Filter: filter.NewBloomFilter(10), // 10 bits/key OpenFilesCacheCapacity: maxFileHandles, WriteBuffer: 1 << 24, // 16MiB, }) d.Chk.NoError(err, "opening internalLevelDBStore in %s", dir) return &internalLevelDBStore{ db: db, mu: &sync.Mutex{}, concurrentWriteLimit: make(chan struct{}, maxFileHandles), dumpStats: dumpStats, } }
func (bhcs *httpBatchStore) requestRoot(method string, current, last hash.Hash) *http.Response { u := *bhcs.host u.Path = httprouter.CleanPath(bhcs.host.Path + constants.RootPath) if method == "POST" { d.PanicIfTrue(current.IsEmpty(), "Unexpected empty value") params := u.Query() params.Add("last", last.String()) params.Add("current", current.String()) u.RawQuery = params.Encode() } req := newRequest(method, bhcs.auth, u.String(), nil, nil) res, err := bhcs.httpClient.Do(req) d.PanicIfError(err) return res }
// Serial serially runs all instances of filename found under dir, mapping stdout and stderr to each subprocess in the obvious way. env is overlaid on the environment of the current process. If args are provided, they're passed en masse to each subprocess. func Serial(stdout, stderr io.Writer, env Env, dir, filename string, args ...string) bool { success := true err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { if os.IsNotExist(err) { // Some programs like npm create temporary log files which confuse filepath.Walk. return nil } d.PanicIfTrue(err != nil, "Failed directory traversal at %s", path) if !info.IsDir() && filepath.Base(path) == filename { scriptAndArgs := append([]string{filepath.Base(path)}, args...) runErr := runEnvDir(stdout, stderr, env, filepath.Dir(path), "python", scriptAndArgs...) if runErr != nil { success = false fmt.Fprintf(stderr, "Running %s failed with %v\n", path, runErr) } } return nil }) d.PanicIfError(err) return success }
func handleWriteValue(w http.ResponseWriter, req *http.Request, ps URLParams, cs chunks.ChunkStore) { d.PanicIfTrue(req.Method != "POST", "Expected post method.") reader := bodyReader(req) defer func() { // Ensure all data on reader is consumed io.Copy(ioutil.Discard, reader) reader.Close() }() tc := types.NewTypeCache() vbs := types.NewValidatingBatchingSink(cs, tc) vbs.Prepare(deserializeHints(reader)) chunkChan := make(chan *chunks.Chunk, 16) go chunks.DeserializeToChan(reader, chunkChan) var bpe chunks.BackpressureError for c := range chunkChan { if bpe == nil { bpe = vbs.Enqueue(*c) } else { bpe = append(bpe, c.Hash()) } // If a previous Enqueue() errored, we still need to drain chunkChan // TODO: what about having DeserializeToChan take a 'done' channel to stop it? } if bpe == nil { bpe = vbs.Flush() } if bpe != nil { w.WriteHeader(httpStatusTooManyRequests) w.Header().Add("Content-Type", "application/octet-stream") writer := respWriter(req, w) defer writer.Close() serializeHashes(writer, bpe.AsHashes()) return } w.WriteHeader(http.StatusCreated) }
func newHTTPBatchStore(baseURL, auth string) *httpBatchStore { u, err := url.Parse(baseURL) d.PanicIfError(err) d.PanicIfTrue(u.Scheme != "http" && u.Scheme != "https", "Unrecognized scheme: %s", u.Scheme) buffSink := &httpBatchStore{ host: u, httpClient: makeHTTPClient(httpChunkSinkConcurrency), auth: auth, getQueue: make(chan chunks.ReadRequest, readBufferSize), hasQueue: make(chan chunks.ReadRequest, readBufferSize), writeQueue: make(chan writeRequest, writeBufferSize), flushChan: make(chan struct{}), finishedChan: make(chan struct{}), rateLimit: make(chan struct{}, httpChunkSinkConcurrency), requestWg: &sync.WaitGroup{}, workerWg: &sync.WaitGroup{}, unwrittenPuts: newOrderedChunkCache(), } buffSink.batchGetRequests() buffSink.batchHasRequests() buffSink.batchPutRequests() return buffSink }
// MakeStructTypeFromHeaders creates a struct type from the headers using |kinds| as the type of each field. If |kinds| is empty, default to strings. func MakeStructTypeFromHeaders(headers []string, structName string, kinds KindSlice) (typ *types.Type, fieldOrder []int, kindMap []types.NomsKind) { useStringType := len(kinds) == 0 d.Chk.True(useStringType || len(headers) == len(kinds)) fieldMap := make(types.TypeMap, len(headers)) origOrder := make(map[string]int, len(headers)) fieldNames := make(sort.StringSlice, len(headers)) for i, key := range headers { fn := types.EscapeStructField(key) origOrder[fn] = i kind := types.StringKind if !useStringType { kind = kinds[i] } _, ok := fieldMap[fn] d.PanicIfTrue(ok, `Duplicate field name "%s"`, key) fieldMap[fn] = types.MakePrimitiveType(kind) fieldNames[i] = fn } sort.Sort(fieldNames) kindMap = make([]types.NomsKind, len(fieldMap)) fieldOrder = make([]int, len(fieldMap)) fieldTypes := make([]*types.Type, len(fieldMap)) for i, fn := range fieldNames { typ := fieldMap[fn] fieldTypes[i] = typ kindMap[i] = typ.Kind() fieldOrder[origOrder[fn]] = i } typ = types.MakeStructType(structName, fieldNames, fieldTypes) return }
func NewDataset(db datas.Database, datasetID string) Dataset { d.PanicIfTrue(!idRe.MatchString(datasetID), "Invalid dataset ID: %s", datasetID) return Dataset{db, datasetID} }
func (bsa *BatchStoreAdaptor) expectVersion() { dataVersion := bsa.cs.Version() d.PanicIfTrue(constants.NomsVersion != dataVersion, "SDK version %s incompatible with data of version %s", constants.NomsVersion, dataVersion) }
// ForceRunInDir runs 'exe [args...]' in the given directory, and d.Chk()s on failure. Inherits the environment of the current process. func ForceRunInDir(dir string, env Env, exe string, args ...string) { info, err := os.Stat(dir) d.PanicIfTrue(err != nil, "Can't stat %s", dir) d.PanicIfTrue(!info.IsDir(), "%s must be a path to a directory.", dir) d.Chk.NoError(runEnvDir(os.Stdout, os.Stderr, env, dir, exe, args...)) }
func verifyName(name, kind string) { d.PanicIfTrue(!fieldNameRe.MatchString(name), `Invalid struct%s name: "%s"`, kind, name) }
func (ds *databaseCommon) commit(datasetID string, commit types.Struct) error { d.PanicIfTrue(!IsCommitType(commit.Type()), "Can't commit a non-Commit struct to dataset %s", datasetID) return ds.doCommit(datasetID, commit) }
func main() { err := d.Try(func() { spec.RegisterDatabaseFlags(flag.CommandLine) profile.RegisterProfileFlags(flag.CommandLine) flag.Usage = customUsage flag.Parse(true) if flag.NArg() != 2 { d.CheckError(errors.New("Expected directory path followed by dataset")) } dir := flag.Arg(0) ds, err := spec.GetDataset(flag.Arg(1)) d.CheckError(err) defer profile.MaybeStartProfile().Stop() cpuCount := runtime.NumCPU() filesChan := make(chan fileIndex, 1024) refsChan := make(chan refIndex, 1024) getFilePaths := func() { index := 0 err := filepath.Walk(dir, func(path string, info os.FileInfo, err error) error { d.PanicIfTrue(err != nil, "Cannot traverse directories") if !info.IsDir() && filepath.Ext(path) == ".xml" { filesChan <- fileIndex{path, index} index++ } return nil }) d.PanicIfError(err) close(filesChan) } wg := sync.WaitGroup{} importXML := func() { expectedType := types.NewMap() for f := range filesChan { file, err := os.Open(f.path) d.PanicIfTrue(err != nil, "Error getting XML") xmlObject, err := mxj.NewMapXmlReader(file) d.PanicIfTrue(err != nil, "Error decoding XML") object := xmlObject.Old() file.Close() nomsObj := jsontonoms.NomsValueFromDecodedJSON(object, false) d.Chk.IsType(expectedType, nomsObj) var r types.Ref if !*noIO { r = ds.Database().WriteValue(nomsObj) } refsChan <- refIndex{r, f.index} } wg.Done() } go getFilePaths() for i := 0; i < cpuCount*8; i++ { wg.Add(1) go importXML() } go func() { wg.Wait() close(refsChan) // done converting xml to noms }() refList := refIndexList{} for r := range refsChan { refList = append(refList, r) } sort.Sort(refList) refs := make([]types.Value, len(refList)) for idx, r := range refList { refs[idx] = r.ref } rl := types.NewList(refs...) if !*noIO { _, err := ds.CommitValue(rl) d.PanicIfError(err) } }) if err != nil { log.Fatal(err) } }
func (lbs *localBatchStore) expectVersion() { dataVersion := lbs.cs.Version() d.PanicIfTrue(constants.NomsVersion != dataVersion, "SDK version %s incompatible with data of version %s", constants.NomsVersion, dataVersion) }