py/objint.c: Code review of int.from_bytes().

IhorNehrutsa · IhorNehrutsa · commit 9957726dc16f · 2025-02-24T23:42:38.000+02:00
Support signed param:
result = int.from_bytes(bytearray(),
order='big'|'little', signed=False|True)

Signed-off-by: Ihor Nehrutsa &lt;Ihor.Nehrutsa@gmail.com&gt;
diff --git a/ports/esp32/mpconfigport.h b/ports/esp32/mpconfigport.h
@@ -61,8 +61,8 @@
 #define MICROPY_ENABLE_GC                   (1)
 #define MICROPY_STACK_CHECK_MARGIN          (1024)
 #define MICROPY_ENABLE_EMERGENCY_EXCEPTION_BUF (1)
-#define MICROPY_LONGINT_IMPL                (MICROPY_LONGINT_IMPL_MPZ)
-#define MICROPY_ERROR_REPORTING             (MICROPY_ERROR_REPORTING_NORMAL)
+#define MICROPY_LONGINT_IMPL                (MICROPY_LONGINT_IMPL_LONGLONG) // (MICROPY_LONGINT_IMPL_MPZ) //
+#define MICROPY_ERROR_REPORTING             (MICROPY_ERROR_REPORTING_NORMAL + 1)
 #define MICROPY_WARNINGS                    (1)
 #define MICROPY_FLOAT_IMPL                  (MICROPY_FLOAT_IMPL_FLOAT)
 #define MICROPY_STREAMS_POSIX_API           (1)
diff --git a/py/mpz.c b/py/mpz.c
@@ -850,7 +850,7 @@ size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigne
     return cur - str;
 }
 
-void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf) {
+void mpz_set_from_bytes(mpz_t *z, bool big_endian, bool signd, size_t len, const byte *buf) {
     int delta = 1;
     if (big_endian) {
         buf += len - 1;
diff --git a/py/mpz.h b/py/mpz.h
@@ -114,7 +114,7 @@ void mpz_set_from_ll(mpz_t *z, long long i, bool is_signed);
 void mpz_set_from_float(mpz_t *z, mp_float_t src);
 #endif
 size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigned int base);
-void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf);
+void mpz_set_from_bytes(mpz_t *z, bool big_endian, bool signd, size_t len, const byte *buf);
 
 static inline bool mpz_is_zero(const mpz_t *z) {
     return z->len == 0;
diff --git a/py/objint.c b/py/objint.c
@@ -39,6 +39,9 @@
 #include <math.h>
 #endif
 
+#define debug_printf(...) // mp_printf(&mp_plat_print, __VA_ARGS__); mp_printf(&mp_plat_print, "\n"); // mp_printf(&mp_plat_print, " | func:%s line:%d at %s\n", __FUNCTION__, __LINE__, __FILE__);
+#define _debug_printf(...) // mp_printf(&mp_plat_print, __VA_ARGS__);
+
 // This dispatcher function is expected to be independent of the implementation of long int
 static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
     (void)type_in;
@@ -386,7 +389,7 @@ mp_obj_t mp_obj_int_binary_op_extra_cases(mp_binary_op_t op, mp_obj_t lhs_in, mp
     }
     return MP_OBJ_NULL; // op not supported
 }
-
+/*
 // this is a classmethod
 static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
     // TODO: Support signed param (assumes signed=False at the moment)
@@ -416,6 +419,88 @@ static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
     }
     return mp_obj_new_int_from_uint(value);
 }
+*/
+
+void *reverce_memcpy(void *dest, const void *src, size_t len) {
+    char *d = (char *)dest + len - 1;
+    const char *s = src;
+    while (len--) {
+        *d-- = *s++;
+    }
+    return dest;
+}
+
+mp_obj_t mp_obj_integer_from_bytes_impl(bool big_endian, bool signd, size_t len, const byte *buf) {
+    if (len > sizeof(mp_int_t)) {
+        #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
+        // Result will overflow a small-int size so construct a big-int
+        return mp_obj_int_from_bytes_impl(big_endian, signd, len, buf);
+        #else
+        mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("small-int overflow"));
+        #endif
+    }
+    union {
+        mp_int_t value;
+        mp_uint_t uvalue;
+        byte buf[sizeof(mp_int_t)];
+    } result = {0};
+    // #if sizeof(mp_int_t) != sizeof(mp_uint_t)
+    // #error "sizeof(mp_int_t) != sizeof(mp_uint_t)"
+    // #endif
+
+    if (big_endian) {
+        reverce_memcpy(&result, buf, len);
+    } else { // little-endian
+        memcpy(&result, buf, len);
+    }
+
+    if ((signd) && (sizeof(result) > len) && (result.buf[len - 1] & 0x80)) {
+        // Sign propagation in little-endian
+        // x = 2
+        // x.to_bytes(1, 'little', True) -> b'\x02'
+        // x.to_bytes(4, 'little', True) -> b'\x02\x00\x00\x00'
+        // x = -2
+        // x.to_bytes(1, 'little', True) -> b'\xFE'
+        // x.to_bytes(4, 'little', True) -> b'\xFE\xFF\xFF\xFF'
+        _debug_printf(" 1result=0x%08X=", result.uvalue);
+        for (unsigned int i = 0; i < sizeof(result); i++) {
+            _debug_printf("\\%02X", result.buf[i]);
+        }
+        debug_printf("");
+
+        memset(result.buf + len, 0xFF, sizeof(result) - len);
+
+        _debug_printf("\n 2result=0x%08X=", result.uvalue);
+        for (unsigned int i = 0; i < sizeof(result); i++) {
+            _debug_printf("\\%02X", result.buf[i]);
+        }
+        debug_printf("");
+    }
+    // debug_printf("big_endian:%d signed:%d len:%d sizeof(result):%d result.value:%ld=0x%X", big_endian, signd, len, sizeof(result), result.value, result.value);
+    debug_printf("MP_SMALL_INT_MAX=%d=0x%X, MP_SMALL_INT_MIN=%d=0x%X, MP_SMALL_INT_POSITIVE_MASK=%d=0x%X", MP_SMALL_INT_MAX, MP_SMALL_INT_MAX, MP_SMALL_INT_MIN, MP_SMALL_INT_MIN, MP_SMALL_INT_POSITIVE_MASK, MP_SMALL_INT_POSITIVE_MASK);
+    // debug_printf("(MP_SMALL_INT_MAX << 1) + 1=%d=0x%X", (MP_SMALL_INT_MAX << 1) + 1, (MP_SMALL_INT_MAX << 1) + 1);
+    if (((!signd) && (result.uvalue > MP_SMALL_INT_MAX)) || (signd && ((result.value < MP_SMALL_INT_MIN) || (result.value > MP_SMALL_INT_MAX)))) {
+        // Result will overflow a small-int so construct a big-int
+        #if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
+        return mp_obj_int_from_bytes_impl(big_endian, signd, len, buf);
+        #else
+        mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("small-int overflow"));
+        #endif
+    }
+    return mp_obj_new_int(result.value);
+}
+
+// this is a classmethod
+// result = int.from_bytes(bytearray(), order='big', signed=False)
+static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
+    // get the buffer info
+    mp_buffer_info_t bufinfo;
+    mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
+    bool big_endian = n_args < 3 || args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
+    bool signd = (n_args > 3) && mp_obj_is_true(args[3]);
+
+    return mp_obj_integer_from_bytes_impl(big_endian, signd, bufinfo.len, bufinfo.buf);
+}
 
 static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 2, 4, int_from_bytes);
 static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));
diff --git a/py/objint.h b/py/objint.h
@@ -54,13 +54,15 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co
 char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in,
     int base, const char *prefix, char base_char, char comma);
 mp_int_t mp_obj_int_hash(mp_obj_t self_in);
-mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
+mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool signd, size_t len, const byte *buf);
+mp_obj_t mp_obj_integer_from_bytes_impl(bool big_endian, bool signd, size_t len, const byte *buf);
 // Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
 bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
 int mp_obj_int_sign(mp_obj_t self_in);
 mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
 mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
 mp_obj_t mp_obj_int_binary_op_extra_cases(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
 mp_obj_t mp_obj_int_pow3(mp_obj_t base, mp_obj_t exponent,  mp_obj_t modulus);
+void *reverce_memcpy(void *dest, const void *src, size_t len);
 
 #endif // MICROPY_INCLUDED_PY_OBJINT_H
diff --git a/py/objint_longlong.c b/py/objint_longlong.c
@@ -43,6 +43,7 @@
 const mp_obj_int_t mp_sys_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX};
 #endif
 
+/*
 mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
     int delta = 1;
     if (!big_endian) {
@@ -56,6 +57,49 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
     }
     return mp_obj_new_int_from_ll(value);
 }
+*/
+#define debug_printf(...) // mp_printf(&mp_plat_print, __VA_ARGS__); mp_printf(&mp_plat_print, "\n"); // mp_printf(&mp_plat_print, " | func:%s line:%d at %s\n", __FUNCTION__, __LINE__, __FILE__);
+#define _debug_printf(...) // mp_printf(&mp_plat_print, __VA_ARGS__);
+
+mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool signd, size_t len, const byte *buf) {
+    if (len > sizeof(mp_longint_impl_t)) {
+        mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("big-int overflow"));
+    }
+    union {
+        mp_longint_impl_t value;
+        byte buf[sizeof(mp_longint_impl_t)];
+    } result = {0};
+
+    if (big_endian) {
+        reverce_memcpy(&result, buf, len);
+    } else { // little-endian
+        memcpy(&result, buf, len);
+    }
+
+    if ((signd) && (sizeof(result) > len) && (result.buf[len - 1] & 0x80)) {
+        // Sign propagation in little-endian
+        // x = 2
+        // x.to_bytes(1, 'little', True) -> b'\x02'
+        // x.to_bytes(4, 'little', True) -> b'\x02\x00\x00\x00'
+        // x = -2
+        // x.to_bytes(1, 'little', True) -> b'\xFE'
+        // x.to_bytes(4, 'little', True) -> b'\xFE\xFF\xFF\xFF'
+        _debug_printf(" 3result=0x%08X=", result.value);
+        for (unsigned int i = 0; i < sizeof(result); i++) {
+            _debug_printf("\\%02X", result.buf[i]);
+        }
+        debug_printf("");
+
+        memset(result.buf + len, 0xFF, sizeof(result) - len);
+
+        _debug_printf("\n 4result=0x%08X=", result.value);
+        for (unsigned int i = 0; i < sizeof(result); i++) {
+            _debug_printf("\\%02X", result.buf[i]);
+        }
+        debug_printf("");
+    }
+    return mp_obj_new_int_from_ll(result.value);
+}
 
 bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
     assert(mp_obj_is_exact_type(self_in, &mp_type_int));
diff --git a/py/objint_mpz.c b/py/objint_mpz.c
@@ -106,9 +106,9 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
     return str;
 }
 
-mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
+mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool signd, size_t len, const byte *buf) {
     mp_obj_int_t *o = mp_obj_int_new_mpz();
-    mpz_set_from_bytes(&o->mpz, big_endian, len, buf);
+    mpz_set_from_bytes(&o->mpz, big_endian, signd, len, buf);
     return MP_OBJ_FROM_PTR(o);
 }
 
diff --git a/py/smallint.h b/py/smallint.h
@@ -60,6 +60,7 @@
 #endif
 
 #define MP_SMALL_INT_MAX ((mp_int_t)(~(MP_SMALL_INT_MIN)))
+// #define MP_SMALL_POSITIVE_INT_FITS(n) (((n) & (~MP_SMALL_INT_POSITIVE_MASK)) == 0)
 
 // https://stackoverflow.com/a/4589384/1976323
 // Number of bits in inttype_MAX, or in any (1<<k)-1 where 0 <= k < 2040
diff --git a/tests/basics/int_bytes.py b/tests/basics/int_bytes.py
@@ -9,7 +9,7 @@
 
 # check that extra zero bytes don't change the internal int value
 print(int.from_bytes(bytes(20), "little") == 0)
-print(int.from_bytes(b"\x01" + bytes(20), "little") == 1)
+print(int.from_bytes(b"\x01" + bytes(7), "little") == 1)
 
 # big-endian conversion
 print((10).to_bytes(1, "big"))
diff --git a/tests/basics/int_from_bytes.py b/tests/basics/int_from_bytes.py

Original file line number	Diff line number	Diff line change
`@@ -850,7 +850,7 @@ size_t mpz_set_from_str(mpz_t z, const char str, size_t len, bool neg, unsigne`
`850`	`850`	`return cur - str;`
`851`	`851`	`}`
`852`	`852`
`853`		`-void mpz_set_from_bytes(mpz_t z, bool big_endian, size_t len, const byte buf) {`
	`853`	`+void mpz_set_from_bytes(mpz_t z, bool big_endian, bool signd, size_t len, const byte buf) {`
`854`	`854`	`int delta = 1;`
`855`	`855`	`if (big_endian) {`
`856`	`856`	`buf += len - 1;`
Original file line number	Diff line number	Diff line change
`@@ -106,9 +106,9 @@ char mp_obj_int_formatted_impl(char buf, size_t buf_size, size_t *fmt_size,`
`106`	`106`	`return str;`
`107`	`107`	`}`
`108`	`108`
`109`		`-mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {`
	`109`	`+mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool signd, size_t len, const byte *buf) {`
`110`	`110`	`mp_obj_int_t *o = mp_obj_int_new_mpz();`
`111`		`- mpz_set_from_bytes(&o->mpz, big_endian, len, buf);`
	`111`	`+ mpz_set_from_bytes(&o->mpz, big_endian, signd, len, buf);`
`112`	`112`	`return MP_OBJ_FROM_PTR(o);`
`113`	`113`	`}`
`114`	`114`