From: "Erich E. Hoover" Subject: [PATCH 1/3] msvcrt: Rework strtod_helper to be reusable. Message-Id: Date: Thu, 2 Jan 2020 09:07:00 -0700 We have the same code for parsing doubles in several places, this rework allows us to use the most complete version (strtod) for wcstod (patch 2) and scanf floats (patch 3). Many thanks go to Piotr for his help with this. Best, Erich From 9b782ecbf150a72bde5e9e80efdd8c86f788e875 Mon Sep 17 00:00:00 2001 From: "Erich E. Hoover" Date: Tue, 31 Dec 2019 11:19:07 -0700 Subject: msvcrt: Rework strtod_helper to be reusable. Signed-off-by: Erich E. Hoover --- dlls/msvcrt/msvcrt.h | 2 + dlls/msvcrt/string.c | 265 ++++++++++++++++++++++++++----------------- 2 files changed, 163 insertions(+), 104 deletions(-) diff --git a/dlls/msvcrt/msvcrt.h b/dlls/msvcrt/msvcrt.h index ee1a159385..84e3b013bc 100644 --- a/dlls/msvcrt/msvcrt.h +++ b/dlls/msvcrt/msvcrt.h @@ -1181,6 +1181,7 @@ int __cdecl MSVCRT__toupper_l(int,MSVCRT__locale_t); int __cdecl MSVCRT__tolower_l(int,MSVCRT__locale_t); int __cdecl MSVCRT__towupper_l(MSVCRT_wint_t,MSVCRT__locale_t); int __cdecl MSVCRT__towlower_l(MSVCRT_wint_t,MSVCRT__locale_t); +int __cdecl MSVCRT__toupper(int); /* only use on lower-case ASCII characters */ int __cdecl MSVCRT__stricmp(const char*, const char*); int __cdecl MSVCRT__strnicmp(const char*, const char*, MSVCRT_size_t); int __cdecl MSVCRT__strnicoll_l(const char*, const char*, MSVCRT_size_t, MSVCRT__locale_t); @@ -1190,6 +1191,7 @@ int __cdecl MSVCRT_strcmp(const char*, const char*); char* __cdecl MSVCRT_strstr(const char*, const char*); unsigned int __cdecl MSVCRT__get_output_format(void); char* __cdecl MSVCRT_strtok_s(char*, const char*, char**); +double parse_double(MSVCRT_wchar_t (*)(void*), void (*)(void*), void*, MSVCRT_pthreadlocinfo, int*); /* Maybe one day we'll enable the invalid parameter handlers with the full set of information (msvcrXXd) * #define MSVCRT_INVALID_PMT(x) MSVCRT_call_invalid_parameter_handler(x, __FUNCTION__, __FILE__, __LINE__, 0) diff --git a/dlls/msvcrt/string.c b/dlls/msvcrt/string.c index 51730f6349..a1a2c05fbb 100644 --- a/dlls/msvcrt/string.c +++ b/dlls/msvcrt/string.c @@ -455,28 +455,30 @@ static inline int hex2int(char c) return -1; } -static double strtod16(int sign, const char *p, char **end, - MSVCRT_pthreadlocinfo locinfo, int *err) +static double strtod16(MSVCRT_wchar_t get(void *ctx), void unget(void *ctx), + void *ctx, int sign, MSVCRT_pthreadlocinfo locinfo, int *err) { + BOOL found_digit = FALSE, found_dp = FALSE; enum round round = ROUND_ZERO; - BOOL found_digit = FALSE; + MSVCRT_wchar_t nch; ULONGLONG m = 0; int val, exp = 0; + nch = get(ctx); while(m < MSVCRT_UI64_MAX/16) { - val = hex2int(*p); + val = hex2int(nch); if (val == -1) break; found_digit = TRUE; - p++; + nch = get(ctx); m = m*16 + val; } while(1) { - val = hex2int(*p); + val = hex2int(nch); if (val == -1) break; - p++; + nch = get(ctx); exp += 4; if (val || round != ROUND_ZERO) @@ -487,29 +489,33 @@ static double strtod16(int sign, const char *p, char **end, } } - if(*p == *locinfo->lconv->decimal_point) - p++; + if(nch == *locinfo->lconv->decimal_point) + { + found_dp = TRUE; + nch = get(ctx); + } else if (!found_digit) { - if(end) *end = (char*)p - 1; + if(nch!=MSVCRT_WEOF) unget(ctx); + unget(ctx); return 0.0; } while(m <= MSVCRT_UI64_MAX/16) { - val = hex2int(*p); + val = hex2int(nch); if (val == -1) break; found_digit = TRUE; - p++; + nch = get(ctx); m = m*16 + val; exp -= 4; } while(1) { - val = hex2int(*p); + val = hex2int(nch); if (val == -1) break; - p++; + nch = get(ctx); if (val || round != ROUND_ZERO) { @@ -521,39 +527,44 @@ static double strtod16(int sign, const char *p, char **end, if (!found_digit) { - if(end) *end = (char*)p - 2; + if (nch != MSVCRT_WEOF) unget(ctx); + if (found_dp) unget(ctx); + unget(ctx); return 0.0; } - if(*p=='p' || *p=='P') { + if(nch=='p' || nch=='P') { + BOOL found_sign = FALSE; int e=0, s=1; - p++; - if(*p == '-') { + nch = get(ctx); + if(nch == '-') { + found_sign = TRUE; s = -1; - p++; - } else if(*p == '+') - p++; - - if(*p>='0' && *p<='9') { - while(*p>='0' && *p<='9') { - if(e>INT_MAX/10 || (e=e*10+*p-'0')<0) + nch = get(ctx); + } else if(nch == '+') { + found_sign = TRUE; + nch = get(ctx); + } + if(nch>='0' && nch<='9') { + while(nch>='0' && nch<='9') { + if(e>INT_MAX/10 || (e=e*10+nch-'0')<0) e = INT_MAX; - p++; + nch = get(ctx); } + if((nch!=MSVCRT_WEOF) && (nch < '0' || nch > '9')) unget(ctx); e *= s; if(exp<0 && e<0 && exp+e>=0) exp = INT_MIN; else if(exp>0 && e>0 && exp+e<0) exp = INT_MAX; else exp += e; } else { - if(*p=='-' || *p=='+') - p--; - p--; + if(nch != MSVCRT_WEOF) unget(ctx); + if(found_sign) unget(ctx); + unget(ctx); } } - if (end) *end = (char*)p; return make_double(sign, exp, m, round, err); } #endif @@ -650,111 +661,120 @@ static double convert_e10_to_e2(int sign, int e10, ULONGLONG m, int *err) return make_double(sign, e2, u128.u[0], ROUND_DOWN, err); } -static double strtod_helper(const char *str, char **end, MSVCRT__locale_t locale, int *err) +double parse_double(MSVCRT_wchar_t (*get)(void *ctx), void (*unget)(void *ctx), + void *ctx, MSVCRT_pthreadlocinfo locinfo, int *err) { - MSVCRT_pthreadlocinfo locinfo; +#if _MSVCR_VER >= 140 + MSVCRT_wchar_t _infinity[] = { 'i', 'n', 'f', 'i', 'n', 'i', 't', 'y', 0 }; + MSVCRT_wchar_t _nan[] = { 'n', 'a', 'n', 0 }; + MSVCRT_wchar_t *str_match = NULL; + int matched=0; +#endif + BOOL found_digit = FALSE, found_dp = FALSE, found_sign = FALSE; unsigned __int64 d=0, hlp; + MSVCRT_wchar_t nch; int exp=0, sign=1; - const char *p; - BOOL found_digit = FALSE; - - if(err) - *err = 0; - else if(!MSVCRT_CHECK_PMT(str != NULL)) { - if (end) - *end = NULL; - return 0; - } - if(!locale) - locinfo = get_locinfo(); - else - locinfo = locale->locinfo; - - p = str; - while(MSVCRT__isspace_l((unsigned char)*p, locale)) - p++; - - if(*p == '-') { + nch = get(ctx); + if(nch == '-') { + found_sign = TRUE; sign = -1; - p++; - } else if(*p == '+') - p++; + nch = get(ctx); + } else if(nch == '+') { + found_sign = TRUE; + nch = get(ctx); + } #if _MSVCR_VER >= 140 - if(MSVCRT__tolower_l(p[0], locale) == 'i' && MSVCRT__tolower_l(p[1], locale) == 'n' - && MSVCRT__tolower_l(p[2], locale) == 'f') { - if(end) - *end = (char*) &p[3]; - if(MSVCRT__tolower_l(p[3], locale) == 'i' && MSVCRT__tolower_l(p[4], locale) == 'n' - && MSVCRT__tolower_l(p[5], locale) == 'i' && MSVCRT__tolower_l(p[6], locale) == 't' - && MSVCRT__tolower_l(p[7], locale) == 'y' && end) - *end = (char*) &p[8]; - return sign*INFINITY; - } - if(MSVCRT__tolower_l(p[0], locale) == 'n' && - MSVCRT__tolower_l(p[1], locale) == 'a' && - MSVCRT__tolower_l(p[2], locale) == 'n') { - if(end) - *end = (char*) &p[3]; - return NAN; - } - - if(p[0] == '0' && (p[1] == 'x' || p[1] == 'X')) { - p += 2; - return strtod16(sign, p, end, locinfo, err); + if(nch == _infinity[0] || nch == MSVCRT__toupper(_infinity[0])) + str_match = _infinity; + if(nch == _nan[0] || nch == MSVCRT__toupper(_nan[0])) + str_match = _nan; + while(str_match && nch != MSVCRT_WEOF && + (nch == str_match[matched] || nch == MSVCRT__toupper(str_match[matched]))) { + nch = get(ctx); + matched++; + } + if(str_match) { + int keep = 0; + if(matched >= 8) keep = 8; + else if(matched >= 3) keep = 3; + if(nch != MSVCRT_WEOF) unget(ctx); + for (; matched > keep; matched--) { + unget(ctx); + } + if(keep) { + if (str_match == _infinity) return sign*INFINITY; + if (str_match == _nan) return sign*NAN; + } + } + + if(nch == '0') { + nch = get(ctx); + if(nch == 'x' || nch == 'X') + return strtod16(get, unget, ctx, sign, locinfo, err); } #endif - while(*p>='0' && *p<='9') { + while(nch>='0' && nch<='9') { found_digit = TRUE; - hlp = d * 10 + *p++ - '0'; + hlp = d * 10 + nch - '0'; + nch = get(ctx); if(d>MSVCRT_UI64_MAX/10 || hlp='0' && *p<='9') { + while(nch>='0' && nch<='9') { exp++; - p++; + nch = get(ctx); } - if(*p == *locinfo->lconv->decimal_point) - p++; + if(nch == *locinfo->lconv->decimal_point) { + found_dp = TRUE; + nch = get(ctx); + } - while(*p>='0' && *p<='9') { + while(nch>='0' && nch<='9') { found_digit = TRUE; - hlp = d * 10 + *p++ - '0'; + hlp = d * 10 + nch - '0'; + nch = get(ctx); if(d>MSVCRT_UI64_MAX/10 || hlp='0' && *p<='9') - p++; + while(nch>='0' && nch<='9') + nch = get(ctx); if(!found_digit) { - if(end) - *end = (char*)str; + if(nch != MSVCRT_WEOF) unget(ctx); + if(found_dp) unget(ctx); + if(found_sign) unget(ctx); return 0.0; } - if(*p=='e' || *p=='E' || *p=='d' || *p=='D') { + if(nch=='e' || nch=='E' || nch=='d' || nch=='D') { int e=0, s=1; - p++; - if(*p == '-') { + nch = get(ctx); + if(nch == '-') { + found_sign = TRUE; s = -1; - p++; - } else if(*p == '+') - p++; + nch = get(ctx); + } else if(nch == '+') { + found_sign = TRUE; + nch = get(ctx); + } else { + found_sign = FALSE; + } - if(*p>='0' && *p<='9') { - while(*p>='0' && *p<='9') { - if(e>INT_MAX/10 || (e=e*10+*p-'0')<0) + if(nch>='0' && nch<='9') { + while(nch>='0' && nch<='9') { + if(e>INT_MAX/10 || (e=e*10+nch-'0')<0) e = INT_MAX; - p++; + nch = get(ctx); } e *= s; @@ -762,15 +782,12 @@ static double strtod_helper(const char *str, char **end, MSVCRT__locale_t locale else if(exp>0 && e>0 && exp+e<0) exp = INT_MAX; else exp += e; } else { - if(*p=='-' || *p=='+') - p--; - p--; + if(nch != MSVCRT_WEOF) unget(ctx); + if(found_sign) unget(ctx); + unget(ctx); } } - if(end) - *end = (char*)p; - if(!err) err = MSVCRT__errno(); if(!d) return make_double(sign, exp, d, ROUND_ZERO, err); if(exp > MSVCRT_DBL_MAX_10_EXP) @@ -783,6 +800,46 @@ static double strtod_helper(const char *str, char **end, MSVCRT__locale_t locale return convert_e10_to_e2(sign, exp, d, err); } +static MSVCRT_wchar_t strtod_str_get(void *ctx) +{ + const char **p = ctx; + if (!**p) return MSVCRT_WEOF; + return *(*p)++; +} + +static void strtod_str_unget(void *ctx) +{ + const char **p = ctx; + (*p)--; +} + +static inline double strtod_helper(const char *str, char **end, MSVCRT__locale_t locale, int *err) +{ + MSVCRT_pthreadlocinfo locinfo; + const char *beg, *p; + double ret; + + if (err) *err = 0; + if (!MSVCRT_CHECK_PMT(str != NULL)) { + if (end) *end = NULL; + return 0; + } + + if (!locale) + locinfo = get_locinfo(); + else + locinfo = locale->locinfo; + + p = str; + while(MSVCRT__isspace_l((unsigned char)*p, locale)) + p++; + beg = p; + + ret = parse_double(strtod_str_get, strtod_str_unget, &p, locinfo, err); + if (end) *end = (p == beg ? (char*)str : (char*)p); + return ret; +} + /********************************************************************* * strtod_l (MSVCRT.@) */ -- 2.17.1