Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

stdlib: improve string to integer conversion functions #381

Merged
merged 2 commits into from
Nov 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 26 additions & 7 deletions stdlib/strtoul.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
#include <limits.h>


unsigned long int strtoul(const char *nptr, char **endptr, int base)
static unsigned long int strtoul_common(const char *nptr, char **endptr, int base, int isUnsigned)
{
unsigned long int cutoff, result = 0;
int cutlim, t, width = 0, negative = 0;
Expand All @@ -37,11 +37,12 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
sptr++;
}

if ((base == 16 || base == 0) && sptr[0] == '0' && (sptr[1] | 0x20) == 'x') {
if ((base == 16 || base == 0) && sptr[0] == '0' && (sptr[1] | 0x20) == 'x' && isxdigit(sptr[2])) {
base = 16;
sptr += 2;
}
else if (base == 0 && sptr[0] == '0') {

if (base == 0 && sptr[0] == '0') {
base = 8;
}
else if (base == 0) {
Expand All @@ -53,8 +54,15 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
return 0;
}

cutoff = (unsigned long int)(ULONG_MAX) / (unsigned long int)base;
cutlim = (unsigned long int)(ULONG_MAX) - (cutoff * (unsigned long int)base);
if (isUnsigned != 0) {
cutoff = ULONG_MAX;
}
else {
cutoff = (negative != 0) ? -LONG_MIN : LONG_MAX;
}

cutlim = (int)(cutoff % base);
cutoff /= base;

while (isalnum(*sptr) != 0) {
t = *sptr - '0';
Expand All @@ -78,7 +86,12 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)

if (width < 0) {
errno = ERANGE;
result = ULONG_MAX;
if (isUnsigned != 0) {
result = ULONG_MAX;
}
else {
result = (negative != 0) ? LONG_MIN : LONG_MAX;
}
}
else if (width == 0) {
errno = EINVAL;
Expand All @@ -95,9 +108,15 @@ unsigned long int strtoul(const char *nptr, char **endptr, int base)
}


unsigned long int strtoul(const char *nptr, char **endptr, int base)
{
return strtoul_common(nptr, endptr, base, 1);
}


long int strtol(const char *nptr, char **endptr, int base)
{
return (long int)strtoul(nptr, endptr, base);
return (long int)strtoul_common(nptr, endptr, base, 0);
}


Expand Down
174 changes: 69 additions & 105 deletions stdlib/strtoull.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#include <stdlib.h>
#include <ctype.h>
#include <limits.h>
#include <errno.h>


intmax_t strtoimax(const char *nptr, char **endptr, int base)
{
Expand All @@ -30,163 +32,125 @@ uintmax_t strtoumax(const char *nptr, char **endptr, int base)
}


unsigned long long int strtoull(const char *nptr, char **endptr, int base)
static unsigned long long int strtoll_common(const char *nptr, char **endptr, int base, int isUnsigned)
{
const char *s;
unsigned long long acc;
unsigned long long int acc, cutoff;
char c;
unsigned long long cutoff;
int neg, any, cutlim;

s = nptr;
do {
c = *s++;
} while (isspace((unsigned char)c));
} while (isspace(c));

if (c == '-') {
neg = 1;
c = *s++;
} else {
}
else {
neg = 0;
if (c == '+')
if (c == '+') {
c = *s++;
}
}

any = 0;
acc = 0;
acc = 0uLL;

if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X')) {
if (((base == 0) || (base == 16)) && (c == '0') && ((s[0] == 'x') || (s[0] == 'X')) && isxdigit(s[1])) {
c = s[1];
s += 2;
base = 16;
any = 1;
}

if (base == 0)
base = c == '0' ? 8 : 10;
if (base == 0) {
base = (c == '0') ? 8 : 10;
}

if ((base < 2) || (base > 36)) {
errno = EINVAL;
return 0uLL;
}

if (isUnsigned != 0) {
cutoff = ULLONG_MAX;
}
else {
cutoff = (neg != 0) ? -LLONG_MIN : LLONG_MAX;
}

if (base < 2 || base > 36)
return acc;
cutlim = (int)(cutoff % base);
cutoff /= base;

cutoff = ULONG_LONG_MAX / base;
cutlim = ULONG_LONG_MAX % base;
for ( ; ; c = *s++) {
if (c >= '0' && c <= '9')
for (;; c = *s++) {
if (c >= '0' && c <= '9') {
c -= '0';
else if (c >= 'A' && c <= 'Z')
}
else if (c >= 'A' && c <= 'Z') {
c -= 'A' - 10;
else if (c >= 'a' && c <= 'z')
}
else if (c >= 'a' && c <= 'z') {
c -= 'a' - 10;
else
}
else {
break;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

errno should be set to EINVAL if no conversion could be performed. This is an optional error condition, so I'm not 100% sure it will be useful in portable applications, but maybe it is worth including.

I have noticed that strtoul.c and strtoull.c still differ and are not unified/commonized completely and that's why EINVAL is not set - is this intended (for now) to avoid a large refactor?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I checked with glibc and it doesn't set errno to EINVAL if no conversion can be performed - so perhaps it won't be very useful, but I added it.

Making code for strtol and strtoll common would be tricky, because (at least on 32-bit platforms) they use different arithmetic types for intermediate results. If we used unsigned long long for both of them, it would reduce performance if 32-bit conversion is requested. Although perhaps the impact wouldn't be large, because only addition and multiplication is used.

}

if (c >= base)
if (c >= base) {
break;
}

if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
if (any < 0) {
continue;
}

if ((acc > cutoff) || ((acc == cutoff) && (c > cutlim))) {
any = -1;
}
else {
any = 1;
acc *= base;
acc += c;
}
}

if (any < 0) {
acc = ULONG_LONG_MAX;
} else if (neg)
acc = -acc;

if (endptr != NULL)
*endptr = (char *)(any ? s - 1 : nptr);

return (acc);
}


long long int strtoll(const char *nptr, char **endptr, int base)
{
const char *s;
long long int acc, cutoff;
int c;
int neg, any, cutlim;

s = nptr;
do {
c = (unsigned char) *s++;
} while (isspace(c));

if (c == '-') {
neg = 1;
c = *s++;
} else {
neg = 0;
if (c == '+')
c = *s++;
if (any == 0) {
errno = EINVAL;
}

if ((base == 0 || base == 16) && c == '0' && (*s == 'x' || *s == 'X')) {
c = s[1];
s += 2;
base = 16;
}

if (base == 0)
base = c == '0' ? 8 : 10;

cutoff = neg ? LONG_LONG_MIN : LONG_LONG_MAX;
cutlim = (int)(cutoff % base);
cutoff /= base;
if (neg) {
if (cutlim > 0) {
cutlim -= base;
cutoff += 1;
else if (any < 0) {
errno = ERANGE;
if (isUnsigned != 0) {
acc = ULLONG_MAX;
}
else {
acc = (neg != 0) ? -LLONG_MIN : LLONG_MAX;
}
cutlim = -cutlim;
}
else if (neg != 0) {
acc = -acc;
}

for (acc = 0, any = 0;; c = (unsigned char) *s++) {
if (isdigit(c))
c -= '0';
else if (isalpha(c))
c -= isupper(c) ? 'A' - 10 : 'a' - 10;
else
break;
if (endptr != NULL) {
*endptr = (char *)((any != 0) ? (s - 1) : nptr);
}

if (c >= base)
break;
return acc;
}

if (any < 0)
continue;

if (neg) {
if (acc < cutoff || (acc == cutoff && c > cutlim)) {
any = -1;
acc = LONG_LONG_MIN;
} else {
any = 1;
acc *= base;
acc -= c;
}
} else {
if (acc > cutoff || (acc == cutoff && c > cutlim)) {
any = -1;
acc = LONG_LONG_MAX;
} else {
any = 1;
acc *= base;
acc += c;
}
}
}
unsigned long long int strtoull(const char *nptr, char **endptr, int base)
{
return strtoll_common(nptr, endptr, base, 1);
}

if (endptr != 0)
*endptr = (char *)(any ? s - 1 : nptr);

return (acc);
long long int strtoll(const char *nptr, char **endptr, int base)
{
return (long long int)strtoll_common(nptr, endptr, base, 0);
}


long long int atoll(const char *str)
{
return strtoll(str, NULL, 10);
Expand Down
Loading