Skip to content

Commit

Permalink
Add utility functions for string to integer conversions (#366)
Browse files Browse the repository at this point in the history
* Add utility functions, improve integer conversion functions

- move `is_be()` to cutils.h
- add `is_upper_ascii()` and `to_upper_ascii()`
- add extensive benchmark for integer conversion variants in **tests/test_conv.c**
- add `u32toa()`, `i32toa()`, `u64toa()`, `i64toa()` based on register shift variant
- add  `u32toa_radix()`, `u64toa_radix()`, `i64toa_radix()` based on length_loop variant
- use direct converters instead of `snprintf()`
- copy NaN and Infinity directly in `js_dtoa1()`
- optimize `js_number_toString()` for small integers
- use `JS_NewStringLen()` instead of `JS_NewString()` when possible
- add more precise conversion tests in microbench.js
- disable some benchmark tests for gcc (they cause ASAN failures)
  • Loading branch information
chqrlie authored Apr 19, 2024
1 parent f326a7a commit 83726bb
Show file tree
Hide file tree
Showing 7 changed files with 2,078 additions and 91 deletions.
3 changes: 3 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -324,6 +324,9 @@ if(BUILD_EXAMPLES AND NOT WIN32)
target_link_libraries(test_fib ${qjs_libs})
endif()

add_executable(test_conv
tests/test_conv.c
)

# Install target
#
Expand Down
6 changes: 6 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,9 @@ $(QJS): $(BUILD_DIR)
$(QJSC): $(BUILD_DIR)
cmake --build $(BUILD_DIR) --target qjsc -j $(JOBS)

$(BUILD_DIR)/test_conv: $(BUILD_DIR) tests/test_conv.c
cmake --build $(BUILD_DIR) --target test_conv

install: $(QJS) $(QJSC)
cmake --build $(BUILD_DIR) --target install

Expand Down Expand Up @@ -86,6 +89,9 @@ test: $(QJS)
$(QJS) tests/test_worker.js
$(QJS) tests/test_queue_microtask.js

testconv: $(BUILD_DIR)/test_conv
$(BUILD_DIR)/test_conv

test262: $(QJS)
$(RUN262) -m -c test262.conf -a

Expand Down
231 changes: 230 additions & 1 deletion cutils.c
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,8 @@ void dbuf_free(DynBuf *s)
memset(s, 0, sizeof(*s));
}

/*--- Unicode / UTF-8 utility functions --*/

/* Note: at most 31 bits are encoded. At most UTF8_CHAR_LEN_MAX bytes
are output. */
int unicode_to_utf8(uint8_t *buf, unsigned int c)
Expand Down Expand Up @@ -315,6 +317,231 @@ int unicode_from_utf8(const uint8_t *p, int max_len, const uint8_t **pp)
return c;
}

/*--- integer to string conversions --*/

/* All conversion functions:
- require a destination array `buf` of sufficient length
- write the string representation at the beginning of `buf`
- null terminate the string
- return the string length
*/

/* 2 <= base <= 36 */
char const digits36[36] = "0123456789abcdefghijklmnopqrstuvwxyz";

/* using u32toa_shift variant */

#define gen_digit(buf, c) if (is_be()) \
buf = (buf >> 8) | ((uint64_t)(c) << ((sizeof(buf) - 1) * 8)); \
else \
buf = (buf << 8) | (c)

size_t u7toa_shift(char dest[minimum_length(8)], uint32_t n)
{
size_t len = 1;
uint64_t buf = 0;
while (n >= 10) {
uint32_t quo = n % 10;
n /= 10;
gen_digit(buf, '0' + quo);
len++;
}
gen_digit(buf, '0' + n);
memcpy(dest, &buf, sizeof buf);
return len;
}

size_t u07toa_shift(char dest[minimum_length(8)], uint32_t n, size_t len)
{
size_t i;
dest += len;
dest[7] = '\0';
for (i = 7; i-- > 1;) {
uint32_t quo = n % 10;
n /= 10;
dest[i] = (char)('0' + quo);
}
dest[i] = (char)('0' + n);
return len + 7;
}

size_t u32toa(char buf[minimum_length(11)], uint32_t n)
{
if (n < 10) {
buf[0] = (char)('0' + n);
buf[1] = '\0';
return 1;
}
#define TEN_POW_7 10000000
if (n >= TEN_POW_7) {
uint32_t quo = n / TEN_POW_7;
n %= TEN_POW_7;
size_t len = u7toa_shift(buf, quo);
return u07toa_shift(buf, n, len);
}
return u7toa_shift(buf, n);
}

size_t u64toa(char buf[minimum_length(21)], uint64_t n)
{
if (likely(n < 0x100000000))
return u32toa(buf, n);

size_t len;
if (n >= TEN_POW_7) {
uint64_t n1 = n / TEN_POW_7;
n %= TEN_POW_7;
if (n1 >= TEN_POW_7) {
uint32_t quo = n1 / TEN_POW_7;
n1 %= TEN_POW_7;
len = u7toa_shift(buf, quo);
len = u07toa_shift(buf, n1, len);
} else {
len = u7toa_shift(buf, n1);
}
return u07toa_shift(buf, n, len);
}
return u7toa_shift(buf, n);
}

size_t i32toa(char buf[minimum_length(12)], int32_t n)
{
if (likely(n >= 0))
return u32toa(buf, n);

buf[0] = '-';
return 1 + u32toa(buf + 1, -(uint32_t)n);
}

size_t i64toa(char buf[minimum_length(22)], int64_t n)
{
if (likely(n >= 0))
return u64toa(buf, n);

buf[0] = '-';
return 1 + u64toa(buf + 1, -(uint64_t)n);
}

/* using u32toa_radix_length variant */

static uint8_t const radix_shift[64] = {
0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0,
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
};

size_t u32toa_radix(char buf[minimum_length(33)], uint32_t n, unsigned base)
{
#ifdef USE_SPECIAL_RADIX_10
if (likely(base == 10))
return u32toa(buf, n);
#endif
if (n < base) {
buf[0] = digits36[n];
buf[1] = '\0';
return 1;
}
int shift = radix_shift[base & 63];
if (shift) {
uint32_t mask = (1 << shift) - 1;
size_t len = (32 - clz32(n) + shift - 1) / shift;
size_t last = n & mask;
n /= base;
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n & mask;
n >>= shift;
*end-- = digits36[quo];
}
*end = digits36[n];
return len;
} else {
size_t len = 2;
size_t last = n % base;
n /= base;
uint32_t nbase = base;
while (n >= nbase) {
nbase *= base;
len++;
}
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n % base;
n /= base;
*end-- = digits36[quo];
}
*end = digits36[n];
return len;
}
}

size_t u64toa_radix(char buf[minimum_length(65)], uint64_t n, unsigned base)
{
#ifdef USE_SPECIAL_RADIX_10
if (likely(base == 10))
return u64toa(buf, n);
#endif
int shift = radix_shift[base & 63];
if (shift) {
if (n < base) {
buf[0] = digits36[n];
buf[1] = '\0';
return 1;
}
uint64_t mask = (1 << shift) - 1;
size_t len = (64 - clz64(n) + shift - 1) / shift;
size_t last = n & mask;
n /= base;
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n & mask;
n >>= shift;
*end-- = digits36[quo];
}
*end = digits36[n];
return len;
} else {
if (likely(n < 0x100000000))
return u32toa_radix(buf, n, base);
size_t last = n % base;
n /= base;
uint64_t nbase = base;
size_t len = 2;
while (n >= nbase) {
nbase *= base;
len++;
}
char *end = buf + len;
*end-- = '\0';
*end-- = digits36[last];
while (n >= base) {
size_t quo = n % base;
n /= base;
*end-- = digits36[quo];
}
*end = digits36[n];
return len;
}
}

size_t i64toa_radix(char buf[minimum_length(66)], int64_t n, unsigned int base)
{
if (likely(n >= 0))
return u64toa_radix(buf, n, base);

buf[0] = '-';
return 1 + u64toa_radix(buf + 1, -(uint64_t)n, base);
}

/*---- sorting with opaque argument ----*/

typedef void (*exchange_f)(void *a, void *b, size_t size);
typedef int (*cmp_f)(const void *, const void *, void *opaque);

Expand Down Expand Up @@ -614,6 +841,8 @@ void rqsort(void *base, size_t nmemb, size_t size, cmp_f cmp, void *opaque)
}
}

/*---- Portable time functions ----*/

#if defined(_MSC_VER)
// From: https://stackoverflow.com/a/26085827
static int gettimeofday_msvc(struct timeval *tp, struct timezone *tzp)
Expand Down Expand Up @@ -677,7 +906,7 @@ int64_t js__gettimeofday_us(void) {
return ((int64_t)tv.tv_sec * 1000000) + tv.tv_usec;
}

/* Cross-platform threading APIs. */
/*--- Cross-platform threading APIs. ----*/

#if !defined(EMSCRIPTEN) && !defined(__wasi__)

Expand Down
25 changes: 25 additions & 0 deletions cutils.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,14 @@ char *pstrcat(char *buf, int buf_size, const char *s);
int strstart(const char *str, const char *val, const char **ptr);
int has_suffix(const char *str, const char *suffix);

static inline uint8_t is_be(void) {
union {
uint16_t a;
uint8_t b;
} u = { 0x100 };
return u.b;
}

static inline int max_int(int a, int b)
{
if (a > b)
Expand Down Expand Up @@ -426,6 +434,23 @@ static inline int from_hex(int c)
return -1;
}

static inline uint8_t is_upper_ascii(uint8_t c) {
return c >= 'A' && c <= 'Z';
}

static inline uint8_t to_upper_ascii(uint8_t c) {
return c >= 'a' && c <= 'z' ? c - 'a' + 'A' : c;
}

extern char const digits36[36];
size_t u32toa(char buf[minimum_length(11)], uint32_t n);
size_t i32toa(char buf[minimum_length(12)], int32_t n);
size_t u64toa(char buf[minimum_length(21)], uint64_t n);
size_t i64toa(char buf[minimum_length(22)], int64_t n);
size_t u32toa_radix(char buf[minimum_length(33)], uint32_t n, unsigned int base);
size_t u64toa_radix(char buf[minimum_length(65)], uint64_t n, unsigned int base);
size_t i64toa_radix(char buf[minimum_length(66)], int64_t n, unsigned int base);

void rqsort(void *base, size_t nmemb, size_t size,
int (*cmp)(const void *, const void *, void *),
void *arg);
Expand Down
Loading

0 comments on commit 83726bb

Please sign in to comment.