//export ReadMsgpackFrame // // ReadMsgpackFrame reads the msgpack frame at byteOffset in rawStream, decodes the // 2-5 bytes of a msgpack binary array (either bin8, bin16, or bin32), and returns // and the decoded-into-R object and the next byteOffset to use. // func ReadMsgpackFrame(rawStream C.SEXP, byteOffset C.SEXP) C.SEXP { var start int if C.TYPEOF(byteOffset) == C.REALSXP { start = int(C.get_real_elt(byteOffset, 0)) } else if C.TYPEOF(byteOffset) == C.INTSXP { start = int(C.get_int_elt(byteOffset, 0)) } else { C.ReportErrorToR_NoReturn(C.CString("read.msgpack.frame(x, byteOffset) requires byteOffset to be a numeric byte-offset number.")) } // rawStream must be a RAWSXP if C.TYPEOF(rawStream) != C.RAWSXP { C.ReportErrorToR_NoReturn(C.CString("read.msgpack.frame(x, byteOffset) requires x be a RAW vector of bytes.")) } n := int(C.Rf_xlength(rawStream)) if n == 0 { return C.R_NilValue } if start >= n { C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.msgpack.frame(x, byteOffset) error: byteOffset(%d) is beyond the length of x (x has len %d).", start, n))) } var decoder [5]byte C.memcpy(unsafe.Pointer(&decoder[0]), unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start))), C.size_t(5)) headerSz, _, totalSz, err := DecodeMsgpackBinArrayHeader(decoder[:]) if err != nil { C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("ReadMsgpackFrame error trying to decode msgpack frame: %s", err))) } if start+totalSz > n { C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.msgpack.frame(x, byteOffset) error: byteOffset(%d) plus the frames size(%d) goes beyond the length of x (x has len %d).", start, totalSz, n))) } bytes := make([]byte, totalSz) C.memcpy(unsafe.Pointer(&bytes[0]), unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start))), C.size_t(totalSz)) rObject := decodeMsgpackToR(bytes[headerSz:]) C.Rf_protect(rObject) returnList := C.allocVector(C.VECSXP, C.R_xlen_t(2)) C.Rf_protect(returnList) C.SET_VECTOR_ELT(returnList, C.R_xlen_t(0), C.Rf_ScalarReal(C.double(float64(start+totalSz)))) C.SET_VECTOR_ELT(returnList, C.R_xlen_t(1), rObject) C.Rf_unprotect_ptr(rObject) C.Rf_unprotect_ptr(returnList) return returnList }
//export ReadNewlineDelimJson // // ReadNewlineDelimJson reads a json object at byteOffset in rawStream, expects // it to be newline terminated, and returns the // decoded-into-R object and the next byteOffset to use (the byte just after // the terminating newline). // func ReadNewlineDelimJson(rawStream C.SEXP, byteOffset C.SEXP) C.SEXP { C.Rf_protect(rawStream) var start int if C.TYPEOF(byteOffset) == C.REALSXP { start = int(C.get_real_elt(byteOffset, 0)) } else if C.TYPEOF(byteOffset) == C.INTSXP { start = int(C.get_int_elt(byteOffset, 0)) } else { C.ReportErrorToR_NoReturn(C.CString("read.ndjson(x, byteOffset) requires byteOffset to be a numeric byte-offset number.")) } // rawStream must be a RAWSXP if C.TYPEOF(rawStream) != C.RAWSXP { C.ReportErrorToR_NoReturn(C.CString("read.ndjson(x, byteOffset) requires x be a RAW vector of bytes.")) } n := int(C.Rf_xlength(rawStream)) if n == 0 { return C.R_NilValue } if start >= n { C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.ndjson(x, byteOffset) error: byteOffset(%d) is at or beyond the length of x (x has len %d).", start, n))) } // INVAR: start < n // find the next newline or end of raw array next := int(C.next_newline_pos(rawStream, C.ulonglong(start+1), C.ulonglong(n))) totalSz := next - start bytes := make([]byte, totalSz) fromPtr := unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start))) C.memcpy(unsafe.Pointer(&bytes[0]), fromPtr, C.size_t(totalSz)) rObject := decodeJsonToR(bytes) C.Rf_protect(rObject) returnList := C.allocVector(C.VECSXP, C.R_xlen_t(2)) C.Rf_protect(returnList) C.SET_VECTOR_ELT(returnList, C.R_xlen_t(0), C.Rf_ScalarReal(C.double(float64(start+totalSz)))) C.SET_VECTOR_ELT(returnList, C.R_xlen_t(1), rObject) C.Rf_unprotect_ptr(rObject) C.Rf_unprotect_ptr(returnList) C.Rf_unprotect_ptr(rawStream) return returnList }
//export ToMsgpack // // ToMsgpack converts an R object into serialized RAW vector // of msgpack2 encoded bytes. We use msgpack2 so that there is // a difference between strings (utf8 encoded) and binary blobs // which can contain '\0' zeros. The underlying msgpack2 library // is the awesome https://github.com/ugorji/go/tree/master/codec // library from Ugorji Nwoke. // func ToMsgpack(s C.SEXP) C.SEXP { byteSlice := encodeRIntoMsgpack(s) if len(byteSlice) == 0 { return C.R_NilValue } rawmsg := C.allocVector(C.RAWSXP, C.R_xlen_t(len(byteSlice))) C.Rf_protect(rawmsg) C.memcpy(unsafe.Pointer(C.RAW(rawmsg)), unsafe.Pointer(&byteSlice[0]), C.size_t(len(byteSlice))) C.Rf_unprotect_ptr(rawmsg) return rawmsg }
// returns an unprotected SEXP func decodeMsgpackToR(reply []byte) C.SEXP { h.init() var r interface{} decoder := codec.NewDecoderBytes(reply, &h.mh) err := decoder.Decode(&r) if err != nil { C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("decodeMsgpackToR() error: '%s'", err))) } VPrintf("decoded type : %T\n", r) VPrintf("decoded value: %#v\n", r) s := decodeHelper(r, 0, true) if s != nil { C.Rf_unprotect_ptr(s) // unprotect s before returning it } return s }
func tmFramesToR(slc []*tf.Frame) C.SEXP { n := len(slc) if n == 0 { return C.R_NilValue } cols := 2 pti := slc[0].GetPTI() firstPti := pti var payloadList, payload2List C.SEXP switch pti { case tf.PtiOneInt64: payloadList = C.allocVector(C.STRSXP, C.R_xlen_t(n)) C.Rf_protect(payloadList) case tf.PtiOneFloat64: payloadList = C.allocVector(C.REALSXP, C.R_xlen_t(n)) C.Rf_protect(payloadList) case tf.PtiTwo64: payloadList = C.allocVector(C.REALSXP, C.R_xlen_t(n)) C.Rf_protect(payloadList) payload2List = C.allocVector(C.STRSXP, C.R_xlen_t(n)) C.Rf_protect(payload2List) cols++ case tf.PtiUDE: payloadList = C.allocVector(C.VECSXP, C.R_xlen_t(n)) C.Rf_protect(payloadList) case tf.PtiZero: case tf.PtiNull: case tf.PtiNA: case tf.PtiNaN: } returnList := C.allocVector(C.VECSXP, C.R_xlen_t(cols)) C.Rf_protect(returnList) timestampSlice := C.allocVector(C.REALSXP, C.R_xlen_t(n)) C.Rf_protect(timestampSlice) size := unsafe.Sizeof(C.double(0)) var rhs C.double ptrNumSlice := unsafe.Pointer(C.REAL(timestampSlice)) const msec = 1e6 for i, f := range slc { // timestamp tmu := f.Tm() ftm := float64(tmu / msec) //fmt.Printf("tmu[%v]=%v / ftm=%v\n", i, tmu, ftm) rhs = C.double(ftm) *((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*uintptr(i)))) = rhs // payload pti = f.GetPTI() if pti != firstPti { panic(fmt.Sprintf("inconsistent pti, firstPti was '%v', now we have '%v'", firstPti, pti)) } switch pti { case tf.PtiOneInt64: C.SET_STRING_ELT(payloadList, C.R_xlen_t(i), C.mkChar(C.CString(fmt.Sprintf("%d", f.Ude)))) case tf.PtiOneFloat64: rhs = C.double(f.V0) ptrPayList := unsafe.Pointer(C.REAL(payloadList)) *((*C.double)(unsafe.Pointer(uintptr(ptrPayList) + size*uintptr(i)))) = rhs case tf.PtiTwo64: rhs = C.double(f.V0) ptrPayList := unsafe.Pointer(C.REAL(payloadList)) *((*C.double)(unsafe.Pointer(uintptr(ptrPayList) + size*uintptr(i)))) = rhs C.SET_STRING_ELT(payload2List, C.R_xlen_t(i), C.mkChar(C.CString(fmt.Sprintf("%d", f.Ude)))) case tf.PtiUDE: // probably json or msgpack, try to decode it. evtnum := f.GetEvtnum() if evtnum == tf.EvJson || (evtnum >= 2000 && evtnum <= 9999) { tmp := decodeJsonToR(f.Data) C.Rf_protect(tmp) C.SET_VECTOR_ELT(payloadList, C.R_xlen_t(i), tmp) C.Rf_unprotect_ptr(tmp) } else if evtnum == tf.EvMsgpKafka || evtnum == tf.EvMsgpack { tmp := decodeMsgpackToR(f.Data) C.Rf_protect(tmp) C.SET_VECTOR_ELT(payloadList, C.R_xlen_t(i), tmp) C.Rf_unprotect_ptr(tmp) } case tf.PtiZero: case tf.PtiNull: case tf.PtiNA: case tf.PtiNaN: } } // end for range slc C.SET_VECTOR_ELT(returnList, C.R_xlen_t(0), timestampSlice) C.SET_VECTOR_ELT(returnList, C.R_xlen_t(1), payloadList) if cols == 3 { C.SET_VECTOR_ELT(returnList, C.R_xlen_t(2), payload2List) C.Rf_unprotect_ptr(payload2List) } C.Rf_unprotect_ptr(timestampSlice) C.Rf_unprotect_ptr(payloadList) C.Rf_unprotect_ptr(returnList) return returnList }
// new policy: decodeHelper should always return a protected s, // and the user/client/caller of decodeHelper() is responsible // for unprotecting s if they are embedding it. This is // much easier to audit for correctness. // // if jsonHeuristicDecode then we'll treat raw []byte that // start with '{' as JSON and try to decode them too. // func decodeHelper(r interface{}, depth int, jsonHeuristicDecode bool) (s C.SEXP) { defer func() { r := recover() if r != nil { // truncated or mal-formed msgpack can cause us problems... err, isErr := r.(error) if !isErr { err = fmt.Errorf("'%v'", r) } C.ReportErrorToR_NoReturn(C.CString(panicErrIntro + err.Error() + "\n" + string(debug.Stack()))) } }() VPrintf("decodeHelper() at depth %d, decoded type is %T\n", depth, r) switch val := r.(type) { case string: VPrintf("depth %d found string case: val = %#v\n", depth, val) s = C.Rf_mkString(C.CString(val)) C.Rf_protect(s) return s case int: VPrintf("depth %d found int case: val = %#v\n", depth, val) s = C.Rf_ScalarReal(C.double(float64(val))) C.Rf_protect(s) return s case int32: VPrintf("depth %d found int32 case: val = %#v\n", depth, val) s = C.Rf_ScalarReal(C.double(float64(val))) C.Rf_protect(s) return s case int64: VPrintf("depth %d found int64 case: val = %#v\n", depth, val) s = C.Rf_ScalarReal(C.double(float64(val))) C.Rf_protect(s) return s case float64: VPrintf("depth %d found float64 case: val = %#v\n", depth, val) s = C.Rf_ScalarReal(C.double(val)) C.Rf_protect(s) return s case []interface{}: VPrintf("depth %d found []interface{} case: val = %#v\n", depth, val) var sxpTy C.SEXPTYPE = C.VECSXP lenval := len(val) if lenval == 0 { emptyvec := C.allocVector(C.NILSXP, C.R_xlen_t(0)) C.Rf_protect(emptyvec) return emptyvec } if lenval > 0 { first := val[0] VPrintf(" ... also at depth %d, ---> first has type '%T' and value '%v'\n", depth, first, first) switch first.(type) { case string: sxpTy = C.STRSXP stringSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(stringSlice) for i := range val { C.SET_STRING_ELT(stringSlice, C.R_xlen_t(i), C.mkChar(C.CString(val[i].(string)))) } return stringSlice case bool: sxpTy = C.LGLSXP boolSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(boolSlice) for i := range val { switch val[i].(bool) { case true: C.set_lglsxp_true(boolSlice, C.ulonglong(i)) case false: C.set_lglsxp_false(boolSlice, C.ulonglong(i)) } } return boolSlice case int64: // we can only realistically hope to preserve 53 bits worth here. // todo? unless... can we require bit64 package be available somehow? sxpTy = C.REALSXP numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(numSlice) size := unsafe.Sizeof(C.double(0)) naflag := false rmax := int64(C.pow(FLT_RADIX, DBL_MANT_DIG) - 1) //VPrintf("rmax = %v\n", rmax) // rmax = 9007199254740991 rmin := -rmax ptrNumSlice := unsafe.Pointer(C.REAL(numSlice)) var ui uintptr var rhs C.double for i := range val { n := val[i].(int64) VPrintf("n = %d, rmax = %d, n > rmax = %v\n", n, rmax, n > rmax) if n < rmin || n > rmax { naflag = true } ui = uintptr(i) rhs = C.double(float64(n)) // Try to avoid any gc activity (from the Go runtime) while // in the middle of uintptr <-> unsafe.Pointer conversion, as // if the gc were to catch us in the middle of that conversion // it might crash. // Hence we do pointer arithmetic all at once in one expression, // which is at present (Oct 2015) is the recommended safe way // to do pointer arithmetic in Go. See // https://github.com/golang/go/issues/8994 for discussion. *((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*ui))) = rhs } if naflag { C.WarnAndContinue(C.CString("integer precision lost while converting to double")) } return numSlice case float64: sxpTy = C.REALSXP numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(numSlice) size := unsafe.Sizeof(C.double(0)) // unfortunately C.memmove() doesn't work here (I tried). I speculate this is because val[i] is // really wrapped in an interface{} rather than being a actual float64. val *is* an // []interface{} after all. var rhs C.double ptrNumSlice := unsafe.Pointer(C.REAL(numSlice)) for i := range val { rhs = C.double(val[i].(float64)) *((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*uintptr(i)))) = rhs } return numSlice } } intslice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(intslice) for i := range val { elt := decodeHelper(val[i], depth+1, jsonHeuristicDecode) C.SET_VECTOR_ELT(intslice, C.R_xlen_t(i), elt) C.Rf_unprotect_ptr(elt) // safely inside intslice now } return intslice case map[string]interface{}: s = C.allocVector(C.VECSXP, C.R_xlen_t(len(val))) C.Rf_protect(s) names := C.allocVector(C.VECSXP, C.R_xlen_t(len(val))) C.Rf_protect(names) VPrintf("depth %d found map[string]interface case: val = %#v\n", depth, val) sortedMapKey, sortedMapVal := makeSortedSlicesFromMap(val) for i := range sortedMapKey { ele := decodeHelper(sortedMapVal[i], depth+1, jsonHeuristicDecode) C.SET_VECTOR_ELT(s, C.R_xlen_t(i), ele) C.Rf_unprotect_ptr(ele) // unprotect ele now that it is safely inside s. ksexpString := C.Rf_mkString(C.CString(sortedMapKey[i])) C.Rf_protect(ksexpString) C.SET_VECTOR_ELT(names, C.R_xlen_t(i), ksexpString) C.Rf_unprotect_ptr(ksexpString) // safely inside names } C.setAttrib(s, C.R_NamesSymbol, names) C.Rf_unprotect_ptr(names) // safely attached to s. case []byte: VPrintf("depth %d found []byte case: val = %#v\n", depth, val) if jsonHeuristicDecode { if len(val) > 0 && val[0] == '{' { jsonToR := decodeJsonToR(val) C.Rf_protect(jsonToR) return jsonToR } } rawmsg := C.allocVector(C.RAWSXP, C.R_xlen_t(len(val))) C.Rf_protect(rawmsg) if len(val) > 0 { C.memcpy(unsafe.Pointer(C.RAW(rawmsg)), unsafe.Pointer(&val[0]), C.size_t(len(val))) } return rawmsg case nil: s = C.R_NilValue C.Rf_protect(s) // must, for uniformly consistency. else we get protect imbalances. return s case bool: boolmsg := C.allocVector(C.LGLSXP, C.R_xlen_t(1)) C.Rf_protect(boolmsg) if val { C.set_lglsxp_true(boolmsg, 0) } else { C.set_lglsxp_false(boolmsg, 0) } return boolmsg default: fmt.Printf("unknown type in type switch, val = %#v. type = %T.\n", val, val) } return s }