//export ReadMsgpackFrame // // ReadMsgpackFrame reads the msgpack frame at byteOffset in rawStream, decodes the // 2-5 bytes of a msgpack binary array (either bin8, bin16, or bin32), and returns // and the decoded-into-R object and the next byteOffset to use. // func ReadMsgpackFrame(rawStream C.SEXP, byteOffset C.SEXP) C.SEXP { var start int if C.TYPEOF(byteOffset) == C.REALSXP { start = int(C.get_real_elt(byteOffset, 0)) } else if C.TYPEOF(byteOffset) == C.INTSXP { start = int(C.get_int_elt(byteOffset, 0)) } else { C.ReportErrorToR_NoReturn(C.CString("read.msgpack.frame(x, byteOffset) requires byteOffset to be a numeric byte-offset number.")) } // rawStream must be a RAWSXP if C.TYPEOF(rawStream) != C.RAWSXP { C.ReportErrorToR_NoReturn(C.CString("read.msgpack.frame(x, byteOffset) requires x be a RAW vector of bytes.")) } n := int(C.Rf_xlength(rawStream)) if n == 0 { return C.R_NilValue } if start >= n { C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.msgpack.frame(x, byteOffset) error: byteOffset(%d) is beyond the length of x (x has len %d).", start, n))) } var decoder [5]byte C.memcpy(unsafe.Pointer(&decoder[0]), unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start))), C.size_t(5)) headerSz, _, totalSz, err := DecodeMsgpackBinArrayHeader(decoder[:]) if err != nil { C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("ReadMsgpackFrame error trying to decode msgpack frame: %s", err))) } if start+totalSz > n { C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.msgpack.frame(x, byteOffset) error: byteOffset(%d) plus the frames size(%d) goes beyond the length of x (x has len %d).", start, totalSz, n))) } bytes := make([]byte, totalSz) C.memcpy(unsafe.Pointer(&bytes[0]), unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start))), C.size_t(totalSz)) rObject := decodeMsgpackToR(bytes[headerSz:]) C.Rf_protect(rObject) returnList := C.allocVector(C.VECSXP, C.R_xlen_t(2)) C.Rf_protect(returnList) C.SET_VECTOR_ELT(returnList, C.R_xlen_t(0), C.Rf_ScalarReal(C.double(float64(start+totalSz)))) C.SET_VECTOR_ELT(returnList, C.R_xlen_t(1), rObject) C.Rf_unprotect_ptr(rObject) C.Rf_unprotect_ptr(returnList) return returnList }
//export ReadNewlineDelimJson // // ReadNewlineDelimJson reads a json object at byteOffset in rawStream, expects // it to be newline terminated, and returns the // decoded-into-R object and the next byteOffset to use (the byte just after // the terminating newline). // func ReadNewlineDelimJson(rawStream C.SEXP, byteOffset C.SEXP) C.SEXP { C.Rf_protect(rawStream) var start int if C.TYPEOF(byteOffset) == C.REALSXP { start = int(C.get_real_elt(byteOffset, 0)) } else if C.TYPEOF(byteOffset) == C.INTSXP { start = int(C.get_int_elt(byteOffset, 0)) } else { C.ReportErrorToR_NoReturn(C.CString("read.ndjson(x, byteOffset) requires byteOffset to be a numeric byte-offset number.")) } // rawStream must be a RAWSXP if C.TYPEOF(rawStream) != C.RAWSXP { C.ReportErrorToR_NoReturn(C.CString("read.ndjson(x, byteOffset) requires x be a RAW vector of bytes.")) } n := int(C.Rf_xlength(rawStream)) if n == 0 { return C.R_NilValue } if start >= n { C.ReportErrorToR_NoReturn(C.CString(fmt.Sprintf("read.ndjson(x, byteOffset) error: byteOffset(%d) is at or beyond the length of x (x has len %d).", start, n))) } // INVAR: start < n // find the next newline or end of raw array next := int(C.next_newline_pos(rawStream, C.ulonglong(start+1), C.ulonglong(n))) totalSz := next - start bytes := make([]byte, totalSz) fromPtr := unsafe.Pointer(C.get_raw_elt_ptr(rawStream, C.ulonglong(start))) C.memcpy(unsafe.Pointer(&bytes[0]), fromPtr, C.size_t(totalSz)) rObject := decodeJsonToR(bytes) C.Rf_protect(rObject) returnList := C.allocVector(C.VECSXP, C.R_xlen_t(2)) C.Rf_protect(returnList) C.SET_VECTOR_ELT(returnList, C.R_xlen_t(0), C.Rf_ScalarReal(C.double(float64(start+totalSz)))) C.SET_VECTOR_ELT(returnList, C.R_xlen_t(1), rObject) C.Rf_unprotect_ptr(rObject) C.Rf_unprotect_ptr(returnList) C.Rf_unprotect_ptr(rawStream) return returnList }
func decodeHelper(r interface{}, depth int) (s C.SEXP) { VPrintf("decodeHelper() at depth %d, decoded type is %T\n", depth, r) switch val := r.(type) { case string: VPrintf("depth %d found string case: val = %#v\n", depth, val) return C.Rf_mkString(C.CString(val)) case int: VPrintf("depth %d found int case: val = %#v\n", depth, val) return C.Rf_ScalarReal(C.double(float64(val))) case int32: VPrintf("depth %d found int32 case: val = %#v\n", depth, val) return C.Rf_ScalarReal(C.double(float64(val))) case int64: VPrintf("depth %d found int64 case: val = %#v\n", depth, val) return C.Rf_ScalarReal(C.double(float64(val))) case []interface{}: VPrintf("depth %d found []interface{} case: val = %#v\n", depth, val) var sxpTy C.SEXPTYPE = C.VECSXP lenval := len(val) if lenval == 0 { emptyvec := C.allocVector(C.NILSXP, C.R_xlen_t(0)) if depth == 0 { C.Rf_protect(emptyvec) } return emptyvec } if lenval > 0 { first := val[0] VPrintf(" ... also at depth %d, ---> first has type '%T' and value '%v'\n", depth, first, first) switch first.(type) { case string: sxpTy = C.STRSXP stringSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(stringSlice) for i := range val { C.SET_STRING_ELT(stringSlice, C.R_xlen_t(i), C.mkChar(C.CString(val[i].(string)))) } if depth != 0 { C.Rf_unprotect(1) // unprotect for stringSlice, now that we are returning it } return stringSlice case int64: // we can only realistically hope to preserve 53 bits worth here. // todo? unless... can we require bit64 package be available somehow? sxpTy = C.REALSXP numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(numSlice) size := unsafe.Sizeof(C.double(0)) naflag := false rmax := int64(C.pow(FLT_RADIX, DBL_MANT_DIG) - 1) //VPrintf("rmax = %v\n", rmax) // rmax = 9007199254740991 rmin := -rmax ptrNumSlice := unsafe.Pointer(C.REAL(numSlice)) var ui uintptr var rhs C.double for i := range val { n := val[i].(int64) fmt.Printf("n = %d, rmax = %d, n > rmax = %v\n", n, rmax, n > rmax) if n < rmin || n > rmax { naflag = true } ui = uintptr(i) rhs = C.double(float64(n)) // Try to avoid any gc activity by avoiding conversions // and hence do pointer arithmetic all at once in one expression. See // https://github.com/golang/go/issues/8994 for discussion. *((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*ui))) = rhs } if naflag { C.WarnAndContinue(C.CString("integer precision lost while converting to double")) } if depth != 0 { C.Rf_unprotect(1) // unprotect for numSlice, now that we are returning it } return numSlice case float64: sxpTy = C.REALSXP numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(numSlice) size := unsafe.Sizeof(C.double(0)) // unfortunately C.memmove() doesn't work here (I tried). I speculate this is because val[i] is // really wrapped in an interface{} rather than being a actual float64. val *is* an // []interface{} after all. var rhs C.double ptrNumSlice := unsafe.Pointer(C.REAL(numSlice)) for i := range val { rhs = C.double(val[i].(float64)) *((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*uintptr(i)))) = rhs } if depth != 0 { C.Rf_unprotect(1) // unprotect for numSlice, now that we are returning it } return numSlice } } intslice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(intslice) for i := range val { C.SET_VECTOR_ELT(intslice, C.R_xlen_t(i), decodeHelper(val[i], depth+1)) } if depth != 0 { C.Rf_unprotect(1) // unprotect for intslice, now that we are returning it } return intslice case map[string]interface{}: s = C.allocVector(C.VECSXP, C.R_xlen_t(len(val))) if depth == 0 { // only protect the top parent of the returned value, recursively // geneated are transitively protected by their parent. C.Rf_protect(s) } names := C.allocVector(C.VECSXP, C.R_xlen_t(len(val))) C.Rf_protect(names) VPrintf("depth %d found map[string]interface case: val = %#v\n", depth, val) sortedMapKey, sortedMapVal := makeSortedSlicesFromMap(val) for i := range sortedMapKey { ele := decodeHelper(sortedMapVal[i], depth+1) C.Rf_protect(ele) C.SET_VECTOR_ELT(s, C.R_xlen_t(i), ele) C.Rf_unprotect(1) // unprotect for ele, now that it is safely inside s. ksexpString := C.Rf_mkString(C.CString(sortedMapKey[i])) C.SET_VECTOR_ELT(names, C.R_xlen_t(i), ksexpString) } C.setAttrib(s, C.R_NamesSymbol, names) C.Rf_unprotect(1) // unprotect for names, now that it is attached to s. case []byte: VPrintf("depth %d found []byte case: val = %#v\n", depth, val) rawmsg := C.allocVector(C.RAWSXP, C.R_xlen_t(len(val))) if depth == 0 { C.Rf_protect(rawmsg) } C.memcpy(unsafe.Pointer(C.RAW(rawmsg)), unsafe.Pointer(&val[0]), C.size_t(len(val))) return rawmsg case nil: return C.R_NilValue default: fmt.Printf("unknown type in type switch, val = %#v. type = %T.\n", val, val) } return s }
// new policy: decodeHelper should always return a protected s, // and the user/client/caller of decodeHelper() is responsible // for unprotecting s if they are embedding it. This is // much easier to audit for correctness. // // if jsonHeuristicDecode then we'll treat raw []byte that // start with '{' as JSON and try to decode them too. // func decodeHelper(r interface{}, depth int, jsonHeuristicDecode bool) (s C.SEXP) { defer func() { r := recover() if r != nil { // truncated or mal-formed msgpack can cause us problems... err, isErr := r.(error) if !isErr { err = fmt.Errorf("'%v'", r) } C.ReportErrorToR_NoReturn(C.CString(panicErrIntro + err.Error() + "\n" + string(debug.Stack()))) } }() VPrintf("decodeHelper() at depth %d, decoded type is %T\n", depth, r) switch val := r.(type) { case string: VPrintf("depth %d found string case: val = %#v\n", depth, val) s = C.Rf_mkString(C.CString(val)) C.Rf_protect(s) return s case int: VPrintf("depth %d found int case: val = %#v\n", depth, val) s = C.Rf_ScalarReal(C.double(float64(val))) C.Rf_protect(s) return s case int32: VPrintf("depth %d found int32 case: val = %#v\n", depth, val) s = C.Rf_ScalarReal(C.double(float64(val))) C.Rf_protect(s) return s case int64: VPrintf("depth %d found int64 case: val = %#v\n", depth, val) s = C.Rf_ScalarReal(C.double(float64(val))) C.Rf_protect(s) return s case float64: VPrintf("depth %d found float64 case: val = %#v\n", depth, val) s = C.Rf_ScalarReal(C.double(val)) C.Rf_protect(s) return s case []interface{}: VPrintf("depth %d found []interface{} case: val = %#v\n", depth, val) var sxpTy C.SEXPTYPE = C.VECSXP lenval := len(val) if lenval == 0 { emptyvec := C.allocVector(C.NILSXP, C.R_xlen_t(0)) C.Rf_protect(emptyvec) return emptyvec } if lenval > 0 { first := val[0] VPrintf(" ... also at depth %d, ---> first has type '%T' and value '%v'\n", depth, first, first) switch first.(type) { case string: sxpTy = C.STRSXP stringSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(stringSlice) for i := range val { C.SET_STRING_ELT(stringSlice, C.R_xlen_t(i), C.mkChar(C.CString(val[i].(string)))) } return stringSlice case bool: sxpTy = C.LGLSXP boolSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(boolSlice) for i := range val { switch val[i].(bool) { case true: C.set_lglsxp_true(boolSlice, C.ulonglong(i)) case false: C.set_lglsxp_false(boolSlice, C.ulonglong(i)) } } return boolSlice case int64: // we can only realistically hope to preserve 53 bits worth here. // todo? unless... can we require bit64 package be available somehow? sxpTy = C.REALSXP numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(numSlice) size := unsafe.Sizeof(C.double(0)) naflag := false rmax := int64(C.pow(FLT_RADIX, DBL_MANT_DIG) - 1) //VPrintf("rmax = %v\n", rmax) // rmax = 9007199254740991 rmin := -rmax ptrNumSlice := unsafe.Pointer(C.REAL(numSlice)) var ui uintptr var rhs C.double for i := range val { n := val[i].(int64) VPrintf("n = %d, rmax = %d, n > rmax = %v\n", n, rmax, n > rmax) if n < rmin || n > rmax { naflag = true } ui = uintptr(i) rhs = C.double(float64(n)) // Try to avoid any gc activity (from the Go runtime) while // in the middle of uintptr <-> unsafe.Pointer conversion, as // if the gc were to catch us in the middle of that conversion // it might crash. // Hence we do pointer arithmetic all at once in one expression, // which is at present (Oct 2015) is the recommended safe way // to do pointer arithmetic in Go. See // https://github.com/golang/go/issues/8994 for discussion. *((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*ui))) = rhs } if naflag { C.WarnAndContinue(C.CString("integer precision lost while converting to double")) } return numSlice case float64: sxpTy = C.REALSXP numSlice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(numSlice) size := unsafe.Sizeof(C.double(0)) // unfortunately C.memmove() doesn't work here (I tried). I speculate this is because val[i] is // really wrapped in an interface{} rather than being a actual float64. val *is* an // []interface{} after all. var rhs C.double ptrNumSlice := unsafe.Pointer(C.REAL(numSlice)) for i := range val { rhs = C.double(val[i].(float64)) *((*C.double)(unsafe.Pointer(uintptr(ptrNumSlice) + size*uintptr(i)))) = rhs } return numSlice } } intslice := C.allocVector(sxpTy, C.R_xlen_t(lenval)) C.Rf_protect(intslice) for i := range val { elt := decodeHelper(val[i], depth+1, jsonHeuristicDecode) C.SET_VECTOR_ELT(intslice, C.R_xlen_t(i), elt) C.Rf_unprotect_ptr(elt) // safely inside intslice now } return intslice case map[string]interface{}: s = C.allocVector(C.VECSXP, C.R_xlen_t(len(val))) C.Rf_protect(s) names := C.allocVector(C.VECSXP, C.R_xlen_t(len(val))) C.Rf_protect(names) VPrintf("depth %d found map[string]interface case: val = %#v\n", depth, val) sortedMapKey, sortedMapVal := makeSortedSlicesFromMap(val) for i := range sortedMapKey { ele := decodeHelper(sortedMapVal[i], depth+1, jsonHeuristicDecode) C.SET_VECTOR_ELT(s, C.R_xlen_t(i), ele) C.Rf_unprotect_ptr(ele) // unprotect ele now that it is safely inside s. ksexpString := C.Rf_mkString(C.CString(sortedMapKey[i])) C.Rf_protect(ksexpString) C.SET_VECTOR_ELT(names, C.R_xlen_t(i), ksexpString) C.Rf_unprotect_ptr(ksexpString) // safely inside names } C.setAttrib(s, C.R_NamesSymbol, names) C.Rf_unprotect_ptr(names) // safely attached to s. case []byte: VPrintf("depth %d found []byte case: val = %#v\n", depth, val) if jsonHeuristicDecode { if len(val) > 0 && val[0] == '{' { jsonToR := decodeJsonToR(val) C.Rf_protect(jsonToR) return jsonToR } } rawmsg := C.allocVector(C.RAWSXP, C.R_xlen_t(len(val))) C.Rf_protect(rawmsg) if len(val) > 0 { C.memcpy(unsafe.Pointer(C.RAW(rawmsg)), unsafe.Pointer(&val[0]), C.size_t(len(val))) } return rawmsg case nil: s = C.R_NilValue C.Rf_protect(s) // must, for uniformly consistency. else we get protect imbalances. return s case bool: boolmsg := C.allocVector(C.LGLSXP, C.R_xlen_t(1)) C.Rf_protect(boolmsg) if val { C.set_lglsxp_true(boolmsg, 0) } else { C.set_lglsxp_false(boolmsg, 0) } return boolmsg default: fmt.Printf("unknown type in type switch, val = %#v. type = %T.\n", val, val) } return s }