Skip to content

Commit 9957726

Browse files
committed
py/objint.c: Code review of int.from_bytes().
Support signed param: result = int.from_bytes(bytearray(), order='big'|'little', signed=False|True) Signed-off-by: Ihor Nehrutsa <[email protected]>
1 parent 8987b39 commit 9957726

10 files changed

+333
-9
lines changed

ports/esp32/mpconfigport.h

+2-2
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@
6161
#define MICROPY_ENABLE_GC (1)
6262
#define MICROPY_STACK_CHECK_MARGIN (1024)
6363
#define MICROPY_ENABLE_EMERGENCY_EXCEPTION_BUF (1)
64-
#define MICROPY_LONGINT_IMPL (MICROPY_LONGINT_IMPL_MPZ)
65-
#define MICROPY_ERROR_REPORTING (MICROPY_ERROR_REPORTING_NORMAL)
64+
#define MICROPY_LONGINT_IMPL (MICROPY_LONGINT_IMPL_LONGLONG) // (MICROPY_LONGINT_IMPL_MPZ) //
65+
#define MICROPY_ERROR_REPORTING (MICROPY_ERROR_REPORTING_NORMAL + 1)
6666
#define MICROPY_WARNINGS (1)
6767
#define MICROPY_FLOAT_IMPL (MICROPY_FLOAT_IMPL_FLOAT)
6868
#define MICROPY_STREAMS_POSIX_API (1)

py/mpz.c

+1-1
Original file line numberDiff line numberDiff line change
@@ -850,7 +850,7 @@ size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigne
850850
return cur - str;
851851
}
852852

853-
void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf) {
853+
void mpz_set_from_bytes(mpz_t *z, bool big_endian, bool signd, size_t len, const byte *buf) {
854854
int delta = 1;
855855
if (big_endian) {
856856
buf += len - 1;

py/mpz.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ void mpz_set_from_ll(mpz_t *z, long long i, bool is_signed);
114114
void mpz_set_from_float(mpz_t *z, mp_float_t src);
115115
#endif
116116
size_t mpz_set_from_str(mpz_t *z, const char *str, size_t len, bool neg, unsigned int base);
117-
void mpz_set_from_bytes(mpz_t *z, bool big_endian, size_t len, const byte *buf);
117+
void mpz_set_from_bytes(mpz_t *z, bool big_endian, bool signd, size_t len, const byte *buf);
118118

119119
static inline bool mpz_is_zero(const mpz_t *z) {
120120
return z->len == 0;

py/objint.c

+86-1
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@
3939
#include <math.h>
4040
#endif
4141

42+
#define debug_printf(...) // mp_printf(&mp_plat_print, __VA_ARGS__); mp_printf(&mp_plat_print, "\n"); // mp_printf(&mp_plat_print, " | func:%s line:%d at %s\n", __FUNCTION__, __LINE__, __FILE__);
43+
#define _debug_printf(...) // mp_printf(&mp_plat_print, __VA_ARGS__);
44+
4245
// This dispatcher function is expected to be independent of the implementation of long int
4346
static mp_obj_t mp_obj_int_make_new(const mp_obj_type_t *type_in, size_t n_args, size_t n_kw, const mp_obj_t *args) {
4447
(void)type_in;
@@ -386,7 +389,7 @@ mp_obj_t mp_obj_int_binary_op_extra_cases(mp_binary_op_t op, mp_obj_t lhs_in, mp
386389
}
387390
return MP_OBJ_NULL; // op not supported
388391
}
389-
392+
/*
390393
// this is a classmethod
391394
static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
392395
// TODO: Support signed param (assumes signed=False at the moment)
@@ -416,6 +419,88 @@ static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
416419
}
417420
return mp_obj_new_int_from_uint(value);
418421
}
422+
*/
423+
424+
void *reverce_memcpy(void *dest, const void *src, size_t len) {
425+
char *d = (char *)dest + len - 1;
426+
const char *s = src;
427+
while (len--) {
428+
*d-- = *s++;
429+
}
430+
return dest;
431+
}
432+
433+
mp_obj_t mp_obj_integer_from_bytes_impl(bool big_endian, bool signd, size_t len, const byte *buf) {
434+
if (len > sizeof(mp_int_t)) {
435+
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
436+
// Result will overflow a small-int size so construct a big-int
437+
return mp_obj_int_from_bytes_impl(big_endian, signd, len, buf);
438+
#else
439+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("small-int overflow"));
440+
#endif
441+
}
442+
union {
443+
mp_int_t value;
444+
mp_uint_t uvalue;
445+
byte buf[sizeof(mp_int_t)];
446+
} result = {0};
447+
// #if sizeof(mp_int_t) != sizeof(mp_uint_t)
448+
// #error "sizeof(mp_int_t) != sizeof(mp_uint_t)"
449+
// #endif
450+
451+
if (big_endian) {
452+
reverce_memcpy(&result, buf, len);
453+
} else { // little-endian
454+
memcpy(&result, buf, len);
455+
}
456+
457+
if ((signd) && (sizeof(result) > len) && (result.buf[len - 1] & 0x80)) {
458+
// Sign propagation in little-endian
459+
// x = 2
460+
// x.to_bytes(1, 'little', True) -> b'\x02'
461+
// x.to_bytes(4, 'little', True) -> b'\x02\x00\x00\x00'
462+
// x = -2
463+
// x.to_bytes(1, 'little', True) -> b'\xFE'
464+
// x.to_bytes(4, 'little', True) -> b'\xFE\xFF\xFF\xFF'
465+
_debug_printf(" 1result=0x%08X=", result.uvalue);
466+
for (unsigned int i = 0; i < sizeof(result); i++) {
467+
_debug_printf("\\%02X", result.buf[i]);
468+
}
469+
debug_printf("");
470+
471+
memset(result.buf + len, 0xFF, sizeof(result) - len);
472+
473+
_debug_printf("\n 2result=0x%08X=", result.uvalue);
474+
for (unsigned int i = 0; i < sizeof(result); i++) {
475+
_debug_printf("\\%02X", result.buf[i]);
476+
}
477+
debug_printf("");
478+
}
479+
// debug_printf("big_endian:%d signed:%d len:%d sizeof(result):%d result.value:%ld=0x%X", big_endian, signd, len, sizeof(result), result.value, result.value);
480+
debug_printf("MP_SMALL_INT_MAX=%d=0x%X, MP_SMALL_INT_MIN=%d=0x%X, MP_SMALL_INT_POSITIVE_MASK=%d=0x%X", MP_SMALL_INT_MAX, MP_SMALL_INT_MAX, MP_SMALL_INT_MIN, MP_SMALL_INT_MIN, MP_SMALL_INT_POSITIVE_MASK, MP_SMALL_INT_POSITIVE_MASK);
481+
// debug_printf("(MP_SMALL_INT_MAX << 1) + 1=%d=0x%X", (MP_SMALL_INT_MAX << 1) + 1, (MP_SMALL_INT_MAX << 1) + 1);
482+
if (((!signd) && (result.uvalue > MP_SMALL_INT_MAX)) || (signd && ((result.value < MP_SMALL_INT_MIN) || (result.value > MP_SMALL_INT_MAX)))) {
483+
// Result will overflow a small-int so construct a big-int
484+
#if MICROPY_LONGINT_IMPL != MICROPY_LONGINT_IMPL_NONE
485+
return mp_obj_int_from_bytes_impl(big_endian, signd, len, buf);
486+
#else
487+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("small-int overflow"));
488+
#endif
489+
}
490+
return mp_obj_new_int(result.value);
491+
}
492+
493+
// this is a classmethod
494+
// result = int.from_bytes(bytearray(), order='big', signed=False)
495+
static mp_obj_t int_from_bytes(size_t n_args, const mp_obj_t *args) {
496+
// get the buffer info
497+
mp_buffer_info_t bufinfo;
498+
mp_get_buffer_raise(args[1], &bufinfo, MP_BUFFER_READ);
499+
bool big_endian = n_args < 3 || args[2] != MP_OBJ_NEW_QSTR(MP_QSTR_little);
500+
bool signd = (n_args > 3) && mp_obj_is_true(args[3]);
501+
502+
return mp_obj_integer_from_bytes_impl(big_endian, signd, bufinfo.len, bufinfo.buf);
503+
}
419504

420505
static MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(int_from_bytes_fun_obj, 2, 4, int_from_bytes);
421506
static MP_DEFINE_CONST_CLASSMETHOD_OBJ(int_from_bytes_obj, MP_ROM_PTR(&int_from_bytes_fun_obj));

py/objint.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -54,13 +54,15 @@ char *mp_obj_int_formatted(char **buf, size_t *buf_size, size_t *fmt_size, mp_co
5454
char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size, mp_const_obj_t self_in,
5555
int base, const char *prefix, char base_char, char comma);
5656
mp_int_t mp_obj_int_hash(mp_obj_t self_in);
57-
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf);
57+
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool signd, size_t len, const byte *buf);
58+
mp_obj_t mp_obj_integer_from_bytes_impl(bool big_endian, bool signd, size_t len, const byte *buf);
5859
// Returns true if 'self_in' fit into 'len' bytes of 'buf' without overflowing, 'buf' is truncated otherwise.
5960
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf);
6061
int mp_obj_int_sign(mp_obj_t self_in);
6162
mp_obj_t mp_obj_int_unary_op(mp_unary_op_t op, mp_obj_t o_in);
6263
mp_obj_t mp_obj_int_binary_op(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
6364
mp_obj_t mp_obj_int_binary_op_extra_cases(mp_binary_op_t op, mp_obj_t lhs_in, mp_obj_t rhs_in);
6465
mp_obj_t mp_obj_int_pow3(mp_obj_t base, mp_obj_t exponent, mp_obj_t modulus);
66+
void *reverce_memcpy(void *dest, const void *src, size_t len);
6567

6668
#endif // MICROPY_INCLUDED_PY_OBJINT_H

py/objint_longlong.c

+44
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
const mp_obj_int_t mp_sys_maxsize_obj = {{&mp_type_int}, MP_SSIZE_MAX};
4444
#endif
4545

46+
/*
4647
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
4748
int delta = 1;
4849
if (!big_endian) {
@@ -56,6 +57,49 @@ mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf
5657
}
5758
return mp_obj_new_int_from_ll(value);
5859
}
60+
*/
61+
#define debug_printf(...) // mp_printf(&mp_plat_print, __VA_ARGS__); mp_printf(&mp_plat_print, "\n"); // mp_printf(&mp_plat_print, " | func:%s line:%d at %s\n", __FUNCTION__, __LINE__, __FILE__);
62+
#define _debug_printf(...) // mp_printf(&mp_plat_print, __VA_ARGS__);
63+
64+
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool signd, size_t len, const byte *buf) {
65+
if (len > sizeof(mp_longint_impl_t)) {
66+
mp_raise_msg(&mp_type_OverflowError, MP_ERROR_TEXT("big-int overflow"));
67+
}
68+
union {
69+
mp_longint_impl_t value;
70+
byte buf[sizeof(mp_longint_impl_t)];
71+
} result = {0};
72+
73+
if (big_endian) {
74+
reverce_memcpy(&result, buf, len);
75+
} else { // little-endian
76+
memcpy(&result, buf, len);
77+
}
78+
79+
if ((signd) && (sizeof(result) > len) && (result.buf[len - 1] & 0x80)) {
80+
// Sign propagation in little-endian
81+
// x = 2
82+
// x.to_bytes(1, 'little', True) -> b'\x02'
83+
// x.to_bytes(4, 'little', True) -> b'\x02\x00\x00\x00'
84+
// x = -2
85+
// x.to_bytes(1, 'little', True) -> b'\xFE'
86+
// x.to_bytes(4, 'little', True) -> b'\xFE\xFF\xFF\xFF'
87+
_debug_printf(" 3result=0x%08X=", result.value);
88+
for (unsigned int i = 0; i < sizeof(result); i++) {
89+
_debug_printf("\\%02X", result.buf[i]);
90+
}
91+
debug_printf("");
92+
93+
memset(result.buf + len, 0xFF, sizeof(result) - len);
94+
95+
_debug_printf("\n 4result=0x%08X=", result.value);
96+
for (unsigned int i = 0; i < sizeof(result); i++) {
97+
_debug_printf("\\%02X", result.buf[i]);
98+
}
99+
debug_printf("");
100+
}
101+
return mp_obj_new_int_from_ll(result.value);
102+
}
59103

60104
bool mp_obj_int_to_bytes_impl(mp_obj_t self_in, bool big_endian, size_t len, byte *buf) {
61105
assert(mp_obj_is_exact_type(self_in, &mp_type_int));

py/objint_mpz.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -106,9 +106,9 @@ char *mp_obj_int_formatted_impl(char **buf, size_t *buf_size, size_t *fmt_size,
106106
return str;
107107
}
108108

109-
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, size_t len, const byte *buf) {
109+
mp_obj_t mp_obj_int_from_bytes_impl(bool big_endian, bool signd, size_t len, const byte *buf) {
110110
mp_obj_int_t *o = mp_obj_int_new_mpz();
111-
mpz_set_from_bytes(&o->mpz, big_endian, len, buf);
111+
mpz_set_from_bytes(&o->mpz, big_endian, signd, len, buf);
112112
return MP_OBJ_FROM_PTR(o);
113113
}
114114

py/smallint.h

+1
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
#endif
6161

6262
#define MP_SMALL_INT_MAX ((mp_int_t)(~(MP_SMALL_INT_MIN)))
63+
// #define MP_SMALL_POSITIVE_INT_FITS(n) (((n) & (~MP_SMALL_INT_POSITIVE_MASK)) == 0)
6364

6465
// https://stackoverflow.com/a/4589384/1976323
6566
// Number of bits in inttype_MAX, or in any (1<<k)-1 where 0 <= k < 2040

tests/basics/int_bytes.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99

1010
# check that extra zero bytes don't change the internal int value
1111
print(int.from_bytes(bytes(20), "little") == 0)
12-
print(int.from_bytes(b"\x01" + bytes(20), "little") == 1)
12+
print(int.from_bytes(b"\x01" + bytes(7), "little") == 1)
1313

1414
# big-endian conversion
1515
print((10).to_bytes(1, "big"))

0 commit comments

Comments
 (0)