Exemple #1
0
//export ReadMsgpackFrame
//
// ReadMsgpackFrame reads the msgpack frame at byteOffset in rawStream, decodes the
// 2-5 bytes of a msgpack binary array (either bin8, bin16, or bin32), and returns
// and the decoded-into-R object and the next byteOffset to use.
//
func ReadMsgpackFrame(rawStream C.SEXP, byteOffset C.SEXP) C.SEXP {

	var start int
	if C.TYPEOF(byteOffset) == C.REALSXP {
		start = int(C.get_real_elt(byteOffset, 0))
	} else if C.TYPEOF(byteOffset) == C.INTSXP {
		start = int(C.get_int_elt(byteOffset, 0))
	} else {
		C.ReportErrorToR_NoReturn(C.CString("read.msgpack.frame(x, byteOffset) requires byteOffset to be a numeric byte-offset number."))
	}

	// rawStream must be a RAWSXP
	if C.TYPEOF(rawStream) != C.RAWSXP {
		C.ReportErrorToR_NoReturn(C.CString("read.msgpack.frame(x, byteOffset) requires x be a RAW vector of bytes."))
	}

	n := int(C.Rf_xlength(rawStream))
	if n == 0 {
		return C.R_NilValue
	}

	if start >= n {
		C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.msgpack.frame(x, byteOffset) error: byteOffset(%d) is beyond the length of x (x has len %d).", start, n)))
	}

	var decoder [5]byte
	C.memcpy(unsafe.Pointer(&decoder[0]), unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start))), C.size_t(5))
	headerSz, _, totalSz, err := DecodeMsgpackBinArrayHeader(decoder[:])
	if err != nil {
		C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("ReadMsgpackFrame error trying to decode msgpack frame: %s", err)))
	}

	if start+totalSz > n {
		C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.msgpack.frame(x, byteOffset) error: byteOffset(%d) plus the frames size(%d) goes beyond the length of x (x has len %d).", start, totalSz, n)))
	}

	bytes := make([]byte, totalSz)
	C.memcpy(unsafe.Pointer(&bytes[0]), unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start))), C.size_t(totalSz))

	rObject := decodeMsgpackToR(bytes[headerSz:])
	C.Rf_protect(rObject)
	returnList := C.allocVector(C.VECSXP, C.R_xlen_t(2))
	C.Rf_protect(returnList)
	C.SET_VECTOR_ELT(returnList, C.R_xlen_t(0), C.Rf_ScalarReal(C.double(float64(start+totalSz))))
	C.SET_VECTOR_ELT(returnList, C.R_xlen_t(1), rObject)
	C.Rf_unprotect_ptr(rObject)
	C.Rf_unprotect_ptr(returnList)
	return returnList
}
Exemple #2
0
//export ReadNewlineDelimJson
//
// ReadNewlineDelimJson reads a json object at byteOffset in rawStream, expects
// it to be newline terminated, and returns the
// decoded-into-R object and the next byteOffset to use (the byte just after
// the terminating newline).
//
func ReadNewlineDelimJson(rawStream C.SEXP, byteOffset C.SEXP) C.SEXP {
	C.Rf_protect(rawStream)

	var start int
	if C.TYPEOF(byteOffset) == C.REALSXP {
		start = int(C.get_real_elt(byteOffset, 0))
	} else if C.TYPEOF(byteOffset) == C.INTSXP {
		start = int(C.get_int_elt(byteOffset, 0))
	} else {
		C.ReportErrorToR_NoReturn(C.CString("read.ndjson(x, byteOffset) requires byteOffset to be a numeric byte-offset number."))
	}
	// rawStream must be a RAWSXP
	if C.TYPEOF(rawStream) != C.RAWSXP {
		C.ReportErrorToR_NoReturn(C.CString("read.ndjson(x, byteOffset) requires x be a RAW vector of bytes."))
	}

	n := int(C.Rf_xlength(rawStream))
	if n == 0 {
		return C.R_NilValue
	}

	if start >= n {
		C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.ndjson(x, byteOffset) error: byteOffset(%d) is at or beyond the length of x (x has len %d).", start, n)))
	}
	// INVAR: start < n

	// find the next newline or end of raw array
	next := int(C.next_newline_pos(rawStream, C.ulonglong(start+1), C.ulonglong(n)))
	totalSz := next - start

	bytes := make([]byte, totalSz)
	fromPtr := unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start)))
	C.memcpy(unsafe.Pointer(&bytes[0]), fromPtr, C.size_t(totalSz))
	rObject := decodeJsonToR(bytes)
	C.Rf_protect(rObject)
	returnList := C.allocVector(C.VECSXP, C.R_xlen_t(2))
	C.Rf_protect(returnList)
	C.SET_VECTOR_ELT(returnList, C.R_xlen_t(0), C.Rf_ScalarReal(C.double(float64(start+totalSz))))
	C.SET_VECTOR_ELT(returnList, C.R_xlen_t(1), rObject)
	C.Rf_unprotect_ptr(rObject)
	C.Rf_unprotect_ptr(returnList)
	C.Rf_unprotect_ptr(rawStream)
	return returnList
}
Exemple #3
0
//export ToMsgpack
//
// ToMsgpack converts an R object into serialized RAW vector
// of msgpack2 encoded bytes. We use msgpack2 so that there is
// a difference between strings (utf8 encoded) and binary blobs
// which can contain '\0' zeros. The underlying msgpack2 library
// is the awesome https://github.com/ugorji/go/tree/master/codec
// library from Ugorji Nwoke.
//
func ToMsgpack(s C.SEXP) C.SEXP {
	byteSlice := encodeRIntoMsgpack(s)

	if len(byteSlice) == 0 {
		return C.R_NilValue
	}
	rawmsg := C.allocVector(C.RAWSXP, C.R_xlen_t(len(byteSlice)))
	C.Rf_protect(rawmsg)
	C.memcpy(unsafe.Pointer(C.RAW(rawmsg)), unsafe.Pointer(&byteSlice[0]), C.size_t(len(byteSlice)))
	C.Rf_unprotect_ptr(rawmsg)

	return rawmsg
}
Exemple #4
0
// returns an unprotected SEXP
func decodeMsgpackToR(reply []byte) C.SEXP {

	h.init()
	var r interface{}

	decoder := codec.NewDecoderBytes(reply, &h.mh)
	err := decoder.Decode(&r)
	if err != nil {
		C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("decodeMsgpackToR() error: '%s'", err)))
	}

	VPrintf("decoded type : %T\n", r)
	VPrintf("decoded value: %#v\n", r)

	s := decodeHelper(r, 0, true)
	if s != nil {
		C.Rf_unprotect_ptr(s) // unprotect s before returning it
	}
	return s
}
Exemple #5
0
func tmFramesToR(slc []*tf.Frame) C.SEXP {
	n := len(slc)
	if n == 0 {
		return C.R_NilValue
	}

	cols := 2

	pti := slc[0].GetPTI()
	firstPti := pti
	var payloadList, payload2List C.SEXP

	switch pti {
	case tf.PtiOneInt64:
		payloadList = C.allocVector(C.STRSXP, C.R_xlen_t(n))
		C.Rf_protect(payloadList)
	case tf.PtiOneFloat64:
		payloadList = C.allocVector(C.REALSXP, C.R_xlen_t(n))
		C.Rf_protect(payloadList)
	case tf.PtiTwo64:
		payloadList = C.allocVector(C.REALSXP, C.R_xlen_t(n))
		C.Rf_protect(payloadList)
		payload2List = C.allocVector(C.STRSXP, C.R_xlen_t(n))
		C.Rf_protect(payload2List)
		cols++

	case tf.PtiUDE:
		payloadList = C.allocVector(C.VECSXP, C.R_xlen_t(n))
		C.Rf_protect(payloadList)

	case tf.PtiZero:
	case tf.PtiNull:
	case tf.PtiNA:
	case tf.PtiNaN:
	}

	returnList := C.allocVector(C.VECSXP, C.R_xlen_t(cols))
	C.Rf_protect(returnList)

	timestampSlice := C.allocVector(C.REALSXP, C.R_xlen_t(n))
	C.Rf_protect(timestampSlice)
	size := unsafe.Sizeof(C.double(0))

	var rhs C.double
	ptrNumSlice := unsafe.Pointer(C.REAL(timestampSlice))
	const msec = 1e6
	for i, f := range slc {
		// timestamp
		tmu := f.Tm()
		ftm := float64(tmu / msec)
		//fmt.Printf("tmu[%v]=%v / ftm=%v\n", i, tmu, ftm)
		rhs = C.double(ftm)
		*((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*uintptr(i)))) = rhs

		// payload
		pti = f.GetPTI()
		if pti != firstPti {
			panic(fmt.Sprintf("inconsistent pti, firstPti was '%v', now we have '%v'",
				firstPti, pti))
		}
		switch pti {
		case tf.PtiOneInt64:
			C.SET_STRING_ELT(payloadList, C.R_xlen_t(i), C.mkChar(C.CString(fmt.Sprintf("%d", f.Ude))))
		case tf.PtiOneFloat64:
			rhs = C.double(f.V0)
			ptrPayList := unsafe.Pointer(C.REAL(payloadList))
			*((*C.double)(unsafe.Pointer(uintptr(ptrPayList) + size*uintptr(i)))) = rhs

		case tf.PtiTwo64:
			rhs = C.double(f.V0)
			ptrPayList := unsafe.Pointer(C.REAL(payloadList))
			*((*C.double)(unsafe.Pointer(uintptr(ptrPayList) + size*uintptr(i)))) = rhs
			C.SET_STRING_ELT(payload2List, C.R_xlen_t(i), C.mkChar(C.CString(fmt.Sprintf("%d", f.Ude))))

		case tf.PtiUDE:

			// probably json or msgpack, try to decode it.
			evtnum := f.GetEvtnum()
			if evtnum == tf.EvJson || (evtnum >= 2000 && evtnum <= 9999) {
				tmp := decodeJsonToR(f.Data)
				C.Rf_protect(tmp)
				C.SET_VECTOR_ELT(payloadList, C.R_xlen_t(i), tmp)
				C.Rf_unprotect_ptr(tmp)
			} else if evtnum == tf.EvMsgpKafka || evtnum == tf.EvMsgpack {
				tmp := decodeMsgpackToR(f.Data)
				C.Rf_protect(tmp)
				C.SET_VECTOR_ELT(payloadList, C.R_xlen_t(i), tmp)
				C.Rf_unprotect_ptr(tmp)
			}

		case tf.PtiZero:
		case tf.PtiNull:
		case tf.PtiNA:
		case tf.PtiNaN:
		}

	} // end for range slc

	C.SET_VECTOR_ELT(returnList, C.R_xlen_t(0), timestampSlice)
	C.SET_VECTOR_ELT(returnList, C.R_xlen_t(1), payloadList)
	if cols == 3 {
		C.SET_VECTOR_ELT(returnList, C.R_xlen_t(2), payload2List)
		C.Rf_unprotect_ptr(payload2List)
	}

	C.Rf_unprotect_ptr(timestampSlice)
	C.Rf_unprotect_ptr(payloadList)

	C.Rf_unprotect_ptr(returnList)
	return returnList
}
Exemple #6
0
// new policy: decodeHelper should always return a protected s,
// and the user/client/caller of decodeHelper() is responsible
// for unprotecting s if they are embedding it. This is
// much easier to audit for correctness.
//
// if jsonHeuristicDecode then we'll treat raw []byte that
// start with '{' as JSON and try to decode them too.
//
func decodeHelper(r interface{}, depth int, jsonHeuristicDecode bool) (s C.SEXP) {

	defer func() {
		r := recover()
		if r != nil {
			// truncated or mal-formed msgpack can cause us problems...
			err, isErr := r.(error)
			if !isErr {
				err = fmt.Errorf("'%v'", r)
			}
			C.ReportErrorToR_NoReturn(C.CString(panicErrIntro + err.Error() + "\n" + string(debug.Stack())))
		}
	}()

	VPrintf("decodeHelper() at depth %d, decoded type is %T\n", depth, r)
	switch val := r.(type) {
	case string:
		VPrintf("depth %d found string case: val = %#v\n", depth, val)
		s = C.Rf_mkString(C.CString(val))
		C.Rf_protect(s)
		return s

	case int:
		VPrintf("depth %d found int case: val = %#v\n", depth, val)
		s = C.Rf_ScalarReal(C.double(float64(val)))
		C.Rf_protect(s)
		return s

	case int32:
		VPrintf("depth %d found int32 case: val = %#v\n", depth, val)
		s = C.Rf_ScalarReal(C.double(float64(val)))
		C.Rf_protect(s)
		return s

	case int64:
		VPrintf("depth %d found int64 case: val = %#v\n", depth, val)
		s = C.Rf_ScalarReal(C.double(float64(val)))
		C.Rf_protect(s)
		return s

	case float64:
		VPrintf("depth %d found float64 case: val = %#v\n", depth, val)
		s = C.Rf_ScalarReal(C.double(val))
		C.Rf_protect(s)
		return s

	case []interface{}:
		VPrintf("depth %d found []interface{} case: val = %#v\n", depth, val)

		var sxpTy C.SEXPTYPE = C.VECSXP

		lenval := len(val)
		if lenval == 0 {
			emptyvec := C.allocVector(C.NILSXP, C.R_xlen_t(0))
			C.Rf_protect(emptyvec)
			return emptyvec
		}

		if lenval > 0 {
			first := val[0]
			VPrintf(" ... also at depth %d,   ---> first has type '%T' and value '%v'\n", depth, first, first)

			switch first.(type) {
			case string:
				sxpTy = C.STRSXP

				stringSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
				C.Rf_protect(stringSlice)
				for i := range val {
					C.SET_STRING_ELT(stringSlice, C.R_xlen_t(i), C.mkChar(C.CString(val[i].(string))))
				}
				return stringSlice

			case bool:
				sxpTy = C.LGLSXP
				boolSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
				C.Rf_protect(boolSlice)
				for i := range val {
					switch val[i].(bool) {
					case true:
						C.set_lglsxp_true(boolSlice, C.ulonglong(i))
					case false:
						C.set_lglsxp_false(boolSlice, C.ulonglong(i))
					}
				}
				return boolSlice

			case int64:
				// we can only realistically hope to preserve 53 bits worth here.
				// todo? unless... can we require bit64 package be available somehow?
				sxpTy = C.REALSXP

				numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
				C.Rf_protect(numSlice)
				size := unsafe.Sizeof(C.double(0))
				naflag := false
				rmax := int64(C.pow(FLT_RADIX, DBL_MANT_DIG) - 1)
				//VPrintf("rmax = %v\n", rmax) //  rmax = 9007199254740991
				rmin := -rmax
				ptrNumSlice := unsafe.Pointer(C.REAL(numSlice))
				var ui uintptr
				var rhs C.double
				for i := range val {
					n := val[i].(int64)
					VPrintf("n = %d, rmax = %d, n > rmax = %v\n", n, rmax, n > rmax)

					if n < rmin || n > rmax {
						naflag = true
					}

					ui = uintptr(i)
					rhs = C.double(float64(n))
					// Try to avoid any gc activity (from the Go runtime) while
					// in the middle of uintptr <-> unsafe.Pointer conversion, as
					// if the gc were to catch us in the middle of that conversion
					// it might crash.
					// Hence we do pointer arithmetic all at once in one expression,
					// which is at present (Oct 2015) is the recommended safe way
					// to do pointer arithmetic in Go. See
					// https://github.com/golang/go/issues/8994 for discussion.
					*((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*ui))) = rhs
				}
				if naflag {
					C.WarnAndContinue(C.CString("integer precision lost while converting to double"))
				}
				return numSlice

			case float64:
				sxpTy = C.REALSXP

				numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
				C.Rf_protect(numSlice)
				size := unsafe.Sizeof(C.double(0))

				// unfortunately C.memmove() doesn't work here (I tried). I speculate this is because val[i] is
				// really wrapped in an interface{} rather than being a actual float64. val *is* an
				// []interface{} after all.
				var rhs C.double
				ptrNumSlice := unsafe.Pointer(C.REAL(numSlice))
				for i := range val {
					rhs = C.double(val[i].(float64))
					*((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*uintptr(i)))) = rhs
				}
				return numSlice

			}
		}

		intslice := C.allocVector(sxpTy, C.R_xlen_t(lenval))
		C.Rf_protect(intslice)
		for i := range val {
			elt := decodeHelper(val[i], depth+1, jsonHeuristicDecode)
			C.SET_VECTOR_ELT(intslice, C.R_xlen_t(i), elt)
			C.Rf_unprotect_ptr(elt) // safely inside intslice now
		}
		return intslice

	case map[string]interface{}:

		s = C.allocVector(C.VECSXP, C.R_xlen_t(len(val)))
		C.Rf_protect(s)
		names := C.allocVector(C.VECSXP, C.R_xlen_t(len(val)))
		C.Rf_protect(names)

		VPrintf("depth %d found map[string]interface case: val = %#v\n", depth, val)
		sortedMapKey, sortedMapVal := makeSortedSlicesFromMap(val)
		for i := range sortedMapKey {

			ele := decodeHelper(sortedMapVal[i], depth+1, jsonHeuristicDecode)
			C.SET_VECTOR_ELT(s, C.R_xlen_t(i), ele)
			C.Rf_unprotect_ptr(ele) // unprotect ele now that it is safely inside s.

			ksexpString := C.Rf_mkString(C.CString(sortedMapKey[i]))
			C.Rf_protect(ksexpString)
			C.SET_VECTOR_ELT(names, C.R_xlen_t(i), ksexpString)
			C.Rf_unprotect_ptr(ksexpString) // safely inside names
		}
		C.setAttrib(s, C.R_NamesSymbol, names)
		C.Rf_unprotect_ptr(names) // safely attached to s.

	case []byte:
		VPrintf("depth %d found []byte case: val = %#v\n", depth, val)

		if jsonHeuristicDecode {
			if len(val) > 0 && val[0] == '{' {
				jsonToR := decodeJsonToR(val)
				C.Rf_protect(jsonToR)
				return jsonToR
			}
		}
		rawmsg := C.allocVector(C.RAWSXP, C.R_xlen_t(len(val)))
		C.Rf_protect(rawmsg)
		if len(val) > 0 {
			C.memcpy(unsafe.Pointer(C.RAW(rawmsg)), unsafe.Pointer(&val[0]), C.size_t(len(val)))
		}
		return rawmsg

	case nil:
		s = C.R_NilValue
		C.Rf_protect(s) // must, for uniformly consistency. else we get protect imbalances.
		return s

	case bool:
		boolmsg := C.allocVector(C.LGLSXP, C.R_xlen_t(1))
		C.Rf_protect(boolmsg)
		if val {
			C.set_lglsxp_true(boolmsg, 0)
		} else {
			C.set_lglsxp_false(boolmsg, 0)
		}
		return boolmsg

	default:
		fmt.Printf("unknown type in type switch, val = %#v.  type = %T.\n", val, val)
	}

	return s
}