Пример #1
0
//export ReadMsgpackFrame
//
// ReadMsgpackFrame reads the msgpack frame at byteOffset in rawStream, decodes the
// 2-5 bytes of a msgpack binary array (either bin8, bin16, or bin32), and returns
// and the decoded-into-R object and the next byteOffset to use.
//
func ReadMsgpackFrame(rawStream C.SEXP, byteOffset C.SEXP) C.SEXP {

	var start int
	if C.TYPEOF(byteOffset) == C.REALSXP {
		start = int(C.get_real_elt(byteOffset, 0))
	} else if C.TYPEOF(byteOffset) == C.INTSXP {
		start = int(C.get_int_elt(byteOffset, 0))
	} else {
		C.ReportErrorToR_NoReturn(C.CString("read.msgpack.frame(x, byteOffset) requires byteOffset to be a numeric byte-offset number."))
	}

	// rawStream must be a RAWSXP
	if C.TYPEOF(rawStream) != C.RAWSXP {
		C.ReportErrorToR_NoReturn(C.CString("read.msgpack.frame(x, byteOffset) requires x be a RAW vector of bytes."))
	}

	n := int(C.Rf_xlength(rawStream))
	if n == 0 {
		return C.R_NilValue
	}

	if start >= n {
		C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.msgpack.frame(x, byteOffset) error: byteOffset(%d) is beyond the length of x (x has len %d).", start, n)))
	}

	var decoder [5]byte
	C.memcpy(unsafe.Pointer(&decoder[0]), unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start))), C.size_t(5))
	headerSz, _, totalSz, err := DecodeMsgpackBinArrayHeader(decoder[:])
	if err != nil {
		C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("ReadMsgpackFrame error trying to decode msgpack frame: %s", err)))
	}

	if start+totalSz > n {
		C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.msgpack.frame(x, byteOffset) error: byteOffset(%d) plus the frames size(%d) goes beyond the length of x (x has len %d).", start, totalSz, n)))
	}

	bytes := make([]byte, totalSz)
	C.memcpy(unsafe.Pointer(&bytes[0]), unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start))), C.size_t(totalSz))

	rObject := decodeMsgpackToR(bytes[headerSz:])
	C.Rf_protect(rObject)
	returnList := C.allocVector(C.VECSXP, C.R_xlen_t(2))
	C.Rf_protect(returnList)
	C.SET_VECTOR_ELT(returnList, C.R_xlen_t(0), C.Rf_ScalarReal(C.double(float64(start+totalSz))))
	C.SET_VECTOR_ELT(returnList, C.R_xlen_t(1), rObject)
	C.Rf_unprotect_ptr(rObject)
	C.Rf_unprotect_ptr(returnList)
	return returnList
}
Пример #2
0
//export ReadNewlineDelimJson
//
// ReadNewlineDelimJson reads a json object at byteOffset in rawStream, expects
// it to be newline terminated, and returns the
// decoded-into-R object and the next byteOffset to use (the byte just after
// the terminating newline).
//
func ReadNewlineDelimJson(rawStream C.SEXP, byteOffset C.SEXP) C.SEXP {
	C.Rf_protect(rawStream)

	var start int
	if C.TYPEOF(byteOffset) == C.REALSXP {
		start = int(C.get_real_elt(byteOffset, 0))
	} else if C.TYPEOF(byteOffset) == C.INTSXP {
		start = int(C.get_int_elt(byteOffset, 0))
	} else {
		C.ReportErrorToR_NoReturn(C.CString("read.ndjson(x, byteOffset) requires byteOffset to be a numeric byte-offset number."))
	}
	// rawStream must be a RAWSXP
	if C.TYPEOF(rawStream) != C.RAWSXP {
		C.ReportErrorToR_NoReturn(C.CString("read.ndjson(x, byteOffset) requires x be a RAW vector of bytes."))
	}

	n := int(C.Rf_xlength(rawStream))
	if n == 0 {
		return C.R_NilValue
	}

	if start >= n {
		C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.ndjson(x, byteOffset) error: byteOffset(%d) is at or beyond the length of x (x has len %d).", start, n)))
	}
	// INVAR: start < n

	// find the next newline or end of raw array
	next := int(C.next_newline_pos(rawStream, C.ulonglong(start+1), C.ulonglong(n)))
	totalSz := next - start

	bytes := make([]byte, totalSz)
	fromPtr := unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start)))
	C.memcpy(unsafe.Pointer(&bytes[0]), fromPtr, C.size_t(totalSz))
	rObject := decodeJsonToR(bytes)
	C.Rf_protect(rObject)
	returnList := C.allocVector(C.VECSXP, C.R_xlen_t(2))
	C.Rf_protect(returnList)
	C.SET_VECTOR_ELT(returnList, C.R_xlen_t(0), C.Rf_ScalarReal(C.double(float64(start+totalSz))))
	C.SET_VECTOR_ELT(returnList, C.R_xlen_t(1), rObject)
	C.Rf_unprotect_ptr(rObject)
	C.Rf_unprotect_ptr(returnList)
	C.Rf_unprotect_ptr(rawStream)
	return returnList
}
Пример #3
0
func decodeHelper(r interface{}, depth int) (s C.SEXP) {

	VPrintf("decodeHelper() at depth %d, decoded type is %T\n", depth, r)
	switch val := r.(type) {
	case string:
		VPrintf("depth %d found string case: val = %#v\n", depth, val)
		return C.Rf_mkString(C.CString(val))

	case int:
		VPrintf("depth %d found int case: val = %#v\n", depth, val)
		return C.Rf_ScalarReal(C.double(float64(val)))

	case int32:
		VPrintf("depth %d found int32 case: val = %#v\n", depth, val)
		return C.Rf_ScalarReal(C.double(float64(val)))

	case int64:
		VPrintf("depth %d found int64 case: val = %#v\n", depth, val)
		return C.Rf_ScalarReal(C.double(float64(val)))

	case []interface{}:
		VPrintf("depth %d found []interface{} case: val = %#v\n", depth, val)

		var sxpTy C.SEXPTYPE = C.VECSXP

		lenval := len(val)
		if lenval == 0 {
			emptyvec := C.allocVector(C.NILSXP, C.R_xlen_t(0))

			if depth == 0 {
				C.Rf_protect(emptyvec)
			}
			return emptyvec
		}

		if lenval > 0 {
			first := val[0]
			VPrintf(" ... also at depth %d,   ---> first has type '%T' and value '%v'\n", depth, first, first)

			switch first.(type) {
			case string:
				sxpTy = C.STRSXP

				stringSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
				C.Rf_protect(stringSlice)
				for i := range val {
					C.SET_STRING_ELT(stringSlice, C.R_xlen_t(i), C.mkChar(C.CString(val[i].(string))))
				}
				if depth != 0 {
					C.Rf_unprotect(1) // unprotect for stringSlice, now that we are returning it
				}
				return stringSlice

			case int64:
				// we can only realistically hope to preserve 53 bits worth here.
				// todo? unless... can we require bit64 package be available somehow?
				sxpTy = C.REALSXP

				numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
				C.Rf_protect(numSlice)
				size := unsafe.Sizeof(C.double(0))
				naflag := false
				rmax := int64(C.pow(FLT_RADIX, DBL_MANT_DIG) - 1)
				//VPrintf("rmax = %v\n", rmax) //  rmax = 9007199254740991
				rmin := -rmax
				ptrNumSlice := unsafe.Pointer(C.REAL(numSlice))
				var ui uintptr
				var rhs C.double
				for i := range val {
					n := val[i].(int64)
					fmt.Printf("n = %d, rmax = %d, n > rmax = %v\n", n, rmax, n > rmax)

					if n < rmin || n > rmax {
						naflag = true
					}

					ui = uintptr(i)
					rhs = C.double(float64(n))
					// Try to avoid any gc activity by avoiding conversions
					// and hence do pointer arithmetic all at once in one expression. See
					// https://github.com/golang/go/issues/8994 for discussion.
					*((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*ui))) = rhs
				}
				if naflag {
					C.WarnAndContinue(C.CString("integer precision lost while converting to double"))
				}

				if depth != 0 {
					C.Rf_unprotect(1) // unprotect for numSlice, now that we are returning it
				}
				return numSlice

			case float64:
				sxpTy = C.REALSXP

				numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
				C.Rf_protect(numSlice)
				size := unsafe.Sizeof(C.double(0))

				// unfortunately C.memmove() doesn't work here (I tried). I speculate this is because val[i] is
				// really wrapped in an interface{} rather than being a actual float64. val *is* an
				// []interface{} after all.
				var rhs C.double
				ptrNumSlice := unsafe.Pointer(C.REAL(numSlice))
				for i := range val {
					rhs = C.double(val[i].(float64))
					*((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*uintptr(i)))) = rhs
				}
				if depth != 0 {
					C.Rf_unprotect(1) // unprotect for numSlice, now that we are returning it
				}
				return numSlice

			}
		}

		intslice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
		C.Rf_protect(intslice)
		for i := range val {
			C.SET_VECTOR_ELT(intslice, C.R_xlen_t(i), decodeHelper(val[i], depth+1))
		}
		if depth != 0 {
			C.Rf_unprotect(1) // unprotect for intslice, now that we are returning it
		}
		return intslice

	case map[string]interface{}:

		s = C.allocVector(C.VECSXP, C.R_xlen_t(len(val)))
		if depth == 0 {
			// only protect the top parent of the returned value, recursively
			// geneated are transitively protected by their parent.
			C.Rf_protect(s)
		}
		names := C.allocVector(C.VECSXP, C.R_xlen_t(len(val)))
		C.Rf_protect(names)

		VPrintf("depth %d found map[string]interface case: val = %#v\n", depth, val)
		sortedMapKey, sortedMapVal := makeSortedSlicesFromMap(val)
		for i := range sortedMapKey {

			ele := decodeHelper(sortedMapVal[i], depth+1)
			C.Rf_protect(ele)
			C.SET_VECTOR_ELT(s, C.R_xlen_t(i), ele)
			C.Rf_unprotect(1) // unprotect for ele, now that it is safely inside s.

			ksexpString := C.Rf_mkString(C.CString(sortedMapKey[i]))
			C.SET_VECTOR_ELT(names, C.R_xlen_t(i), ksexpString)
		}
		C.setAttrib(s, C.R_NamesSymbol, names)
		C.Rf_unprotect(1) // unprotect for names, now that it is attached to s.

	case []byte:
		VPrintf("depth %d found []byte case: val = %#v\n", depth, val)

		rawmsg := C.allocVector(C.RAWSXP, C.R_xlen_t(len(val)))

		if depth == 0 {
			C.Rf_protect(rawmsg)
		}
		C.memcpy(unsafe.Pointer(C.RAW(rawmsg)), unsafe.Pointer(&val[0]), C.size_t(len(val)))
		return rawmsg

	case nil:
		return C.R_NilValue

	default:
		fmt.Printf("unknown type in type switch, val = %#v.  type = %T.\n", val, val)
	}

	return s
}
Пример #4
0
// new policy: decodeHelper should always return a protected s,
// and the user/client/caller of decodeHelper() is responsible
// for unprotecting s if they are embedding it. This is
// much easier to audit for correctness.
//
// if jsonHeuristicDecode then we'll treat raw []byte that
// start with '{' as JSON and try to decode them too.
//
func decodeHelper(r interface{}, depth int, jsonHeuristicDecode bool) (s C.SEXP) {

	defer func() {
		r := recover()
		if r != nil {
			// truncated or mal-formed msgpack can cause us problems...
			err, isErr := r.(error)
			if !isErr {
				err = fmt.Errorf("'%v'", r)
			}
			C.ReportErrorToR_NoReturn(C.CString(panicErrIntro + err.Error() + "\n" + string(debug.Stack())))
		}
	}()

	VPrintf("decodeHelper() at depth %d, decoded type is %T\n", depth, r)
	switch val := r.(type) {
	case string:
		VPrintf("depth %d found string case: val = %#v\n", depth, val)
		s = C.Rf_mkString(C.CString(val))
		C.Rf_protect(s)
		return s

	case int:
		VPrintf("depth %d found int case: val = %#v\n", depth, val)
		s = C.Rf_ScalarReal(C.double(float64(val)))
		C.Rf_protect(s)
		return s

	case int32:
		VPrintf("depth %d found int32 case: val = %#v\n", depth, val)
		s = C.Rf_ScalarReal(C.double(float64(val)))
		C.Rf_protect(s)
		return s

	case int64:
		VPrintf("depth %d found int64 case: val = %#v\n", depth, val)
		s = C.Rf_ScalarReal(C.double(float64(val)))
		C.Rf_protect(s)
		return s

	case float64:
		VPrintf("depth %d found float64 case: val = %#v\n", depth, val)
		s = C.Rf_ScalarReal(C.double(val))
		C.Rf_protect(s)
		return s

	case []interface{}:
		VPrintf("depth %d found []interface{} case: val = %#v\n", depth, val)

		var sxpTy C.SEXPTYPE = C.VECSXP

		lenval := len(val)
		if lenval == 0 {
			emptyvec := C.allocVector(C.NILSXP, C.R_xlen_t(0))
			C.Rf_protect(emptyvec)
			return emptyvec
		}

		if lenval > 0 {
			first := val[0]
			VPrintf(" ... also at depth %d,   ---> first has type '%T' and value '%v'\n", depth, first, first)

			switch first.(type) {
			case string:
				sxpTy = C.STRSXP

				stringSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
				C.Rf_protect(stringSlice)
				for i := range val {
					C.SET_STRING_ELT(stringSlice, C.R_xlen_t(i), C.mkChar(C.CString(val[i].(string))))
				}
				return stringSlice

			case bool:
				sxpTy = C.LGLSXP
				boolSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
				C.Rf_protect(boolSlice)
				for i := range val {
					switch val[i].(bool) {
					case true:
						C.set_lglsxp_true(boolSlice, C.ulonglong(i))
					case false:
						C.set_lglsxp_false(boolSlice, C.ulonglong(i))
					}
				}
				return boolSlice

			case int64:
				// we can only realistically hope to preserve 53 bits worth here.
				// todo? unless... can we require bit64 package be available somehow?
				sxpTy = C.REALSXP

				numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
				C.Rf_protect(numSlice)
				size := unsafe.Sizeof(C.double(0))
				naflag := false
				rmax := int64(C.pow(FLT_RADIX, DBL_MANT_DIG) - 1)
				//VPrintf("rmax = %v\n", rmax) //  rmax = 9007199254740991
				rmin := -rmax
				ptrNumSlice := unsafe.Pointer(C.REAL(numSlice))
				var ui uintptr
				var rhs C.double
				for i := range val {
					n := val[i].(int64)
					VPrintf("n = %d, rmax = %d, n > rmax = %v\n", n, rmax, n > rmax)

					if n < rmin || n > rmax {
						naflag = true
					}

					ui = uintptr(i)
					rhs = C.double(float64(n))
					// Try to avoid any gc activity (from the Go runtime) while
					// in the middle of uintptr <-> unsafe.Pointer conversion, as
					// if the gc were to catch us in the middle of that conversion
					// it might crash.
					// Hence we do pointer arithmetic all at once in one expression,
					// which is at present (Oct 2015) is the recommended safe way
					// to do pointer arithmetic in Go. See
					// https://github.com/golang/go/issues/8994 for discussion.
					*((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*ui))) = rhs
				}
				if naflag {
					C.WarnAndContinue(C.CString("integer precision lost while converting to double"))
				}
				return numSlice

			case float64:
				sxpTy = C.REALSXP

				numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
				C.Rf_protect(numSlice)
				size := unsafe.Sizeof(C.double(0))

				// unfortunately C.memmove() doesn't work here (I tried). I speculate this is because val[i] is
				// really wrapped in an interface{} rather than being a actual float64. val *is* an
				// []interface{} after all.
				var rhs C.double
				ptrNumSlice := unsafe.Pointer(C.REAL(numSlice))
				for i := range val {
					rhs = C.double(val[i].(float64))
					*((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*uintptr(i)))) = rhs
				}
				return numSlice

			}
		}

		intslice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
		C.Rf_protect(intslice)
		for i := range val {
			elt := decodeHelper(val[i], depth+1, jsonHeuristicDecode)
			C.SET_VECTOR_ELT(intslice, C.R_xlen_t(i), elt)
			C.Rf_unprotect_ptr(elt) // safely inside intslice now
		}
		return intslice

	case map[string]interface{}:

		s = C.allocVector(C.VECSXP, C.R_xlen_t(len(val)))
		C.Rf_protect(s)
		names := C.allocVector(C.VECSXP, C.R_xlen_t(len(val)))
		C.Rf_protect(names)

		VPrintf("depth %d found map[string]interface case: val = %#v\n", depth, val)
		sortedMapKey, sortedMapVal := makeSortedSlicesFromMap(val)
		for i := range sortedMapKey {

			ele := decodeHelper(sortedMapVal[i], depth+1, jsonHeuristicDecode)
			C.SET_VECTOR_ELT(s, C.R_xlen_t(i), ele)
			C.Rf_unprotect_ptr(ele) // unprotect ele now that it is safely inside s.

			ksexpString := C.Rf_mkString(C.CString(sortedMapKey[i]))
			C.Rf_protect(ksexpString)
			C.SET_VECTOR_ELT(names, C.R_xlen_t(i), ksexpString)
			C.Rf_unprotect_ptr(ksexpString) // safely inside names
		}
		C.setAttrib(s, C.R_NamesSymbol, names)
		C.Rf_unprotect_ptr(names) // safely attached to s.

	case []byte:
		VPrintf("depth %d found []byte case: val = %#v\n", depth, val)

		if jsonHeuristicDecode {
			if len(val) > 0 && val[0] == '{' {
				jsonToR := decodeJsonToR(val)
				C.Rf_protect(jsonToR)
				return jsonToR
			}
		}
		rawmsg := C.allocVector(C.RAWSXP, C.R_xlen_t(len(val)))
		C.Rf_protect(rawmsg)
		if len(val) > 0 {
			C.memcpy(unsafe.Pointer(C.RAW(rawmsg)), unsafe.Pointer(&val[0]), C.size_t(len(val)))
		}
		return rawmsg

	case nil:
		s = C.R_NilValue
		C.Rf_protect(s) // must, for uniformly consistency. else we get protect imbalances.
		return s

	case bool:
		boolmsg := C.allocVector(C.LGLSXP, C.R_xlen_t(1))
		C.Rf_protect(boolmsg)
		if val {
			C.set_lglsxp_true(boolmsg, 0)
		} else {
			C.set_lglsxp_false(boolmsg, 0)
		}
		return boolmsg

	default:
		fmt.Printf("unknown type in type switch, val = %#v.  type = %T.\n", val, val)
	}

	return s
}