📄 grok
字号:
#################################################################################### $Revision: 14 $## $Author: mhx $## $Date: 2007/09/11 23:20:41 +0200 $###################################################################################### Version 3.x, Copyright (C) 2004-2007, Marcus Holland-Moritz.## Version 2.x, Copyright (C) 2001, Paul Marquess.## Version 1.x, Copyright (C) 1999, Kenneth Albanowski.#### This program is free software; you can redistribute it and/or## modify it under the same terms as Perl itself.##################################################################################=providesgrok_hexgrok_octgrok_bingrok_numeric_radixgrok_number__UNDEFINED__=implementation__UNDEFINED__ IN_PERL_COMPILETIME (PL_curcop == &PL_compiling)__UNDEFINED__ IN_LOCALE_RUNTIME (PL_curcop->op_private & HINT_LOCALE)__UNDEFINED__ IN_LOCALE_COMPILETIME (PL_hints & HINT_LOCALE)__UNDEFINED__ IN_LOCALE (IN_PERL_COMPILETIME ? IN_LOCALE_COMPILETIME : IN_LOCALE_RUNTIME)__UNDEFINED__ IS_NUMBER_IN_UV 0x01__UNDEFINED__ IS_NUMBER_GREATER_THAN_UV_MAX 0x02__UNDEFINED__ IS_NUMBER_NOT_INT 0x04__UNDEFINED__ IS_NUMBER_NEG 0x08__UNDEFINED__ IS_NUMBER_INFINITY 0x10__UNDEFINED__ IS_NUMBER_NAN 0x20__UNDEFINED__ GROK_NUMERIC_RADIX(sp, send) grok_numeric_radix(sp, send)__UNDEFINED__ PERL_SCAN_GREATER_THAN_UV_MAX 0x02__UNDEFINED__ PERL_SCAN_SILENT_ILLDIGIT 0x04__UNDEFINED__ PERL_SCAN_ALLOW_UNDERSCORES 0x01__UNDEFINED__ PERL_SCAN_DISALLOW_PREFIX 0x02#ifndef grok_numeric_radix#if { NEED grok_numeric_radix }boolgrok_numeric_radix(pTHX_ const char **sp, const char *send){#ifdef USE_LOCALE_NUMERIC#ifdef PL_numeric_radix_sv if (PL_numeric_radix_sv && IN_LOCALE) { STRLEN len; char* radix = SvPV(PL_numeric_radix_sv, len); if (*sp + len <= send && memEQ(*sp, radix, len)) { *sp += len; return TRUE; } }#else /* older perls don't have PL_numeric_radix_sv so the radix * must manually be requested from locale.h */#include <locale.h> dTHR; /* needed for older threaded perls */ struct lconv *lc = localeconv(); char *radix = lc->decimal_point; if (radix && IN_LOCALE) { STRLEN len = strlen(radix); if (*sp + len <= send && memEQ(*sp, radix, len)) { *sp += len; return TRUE; } }#endif#endif /* USE_LOCALE_NUMERIC */ /* always try "." if numeric radix didn't match because * we may have data from different locales mixed */ if (*sp < send && **sp == '.') { ++*sp; return TRUE; } return FALSE;}#endif#endif#ifndef grok_number#if { NEED grok_number }intgrok_number(pTHX_ const char *pv, STRLEN len, UV *valuep){ const char *s = pv; const char *send = pv + len; const UV max_div_10 = UV_MAX / 10; const char max_mod_10 = UV_MAX % 10; int numtype = 0; int sawinf = 0; int sawnan = 0; while (s < send && isSPACE(*s)) s++; if (s == send) { return 0; } else if (*s == '-') { s++; numtype = IS_NUMBER_NEG; } else if (*s == '+') s++; if (s == send) return 0; /* next must be digit or the radix separator or beginning of infinity */ if (isDIGIT(*s)) { /* UVs are at least 32 bits, so the first 9 decimal digits cannot overflow. */ UV value = *s - '0'; /* This construction seems to be more optimiser friendly. (without it gcc does the isDIGIT test and the *s - '0' separately) With it gcc on arm is managing 6 instructions (6 cycles) per digit. In theory the optimiser could deduce how far to unroll the loop before checking for overflow. */ if (++s < send) { int digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { digit = *s - '0'; if (digit >= 0 && digit <= 9) { value = value * 10 + digit; if (++s < send) { /* Now got 9 digits, so need to check each time for overflow. */ digit = *s - '0'; while (digit >= 0 && digit <= 9 && (value < max_div_10 || (value == max_div_10 && digit <= max_mod_10))) { value = value * 10 + digit; if (++s < send) digit = *s - '0'; else break; } if (digit >= 0 && digit <= 9 && (s < send)) { /* value overflowed. skip the remaining digits, don't worry about setting *valuep. */ do { s++; } while (s < send && isDIGIT(*s)); numtype |= IS_NUMBER_GREATER_THAN_UV_MAX; goto skip_value; } } } } } } } } } } } } } } } } } } numtype |= IS_NUMBER_IN_UV; if (valuep) *valuep = value; skip_value: if (GROK_NUMERIC_RADIX(&s, send)) { numtype |= IS_NUMBER_NOT_INT; while (s < send && isDIGIT(*s)) /* optional digits after the radix */ s++; } } else if (GROK_NUMERIC_RADIX(&s, send)) { numtype |= IS_NUMBER_NOT_INT | IS_NUMBER_IN_UV; /* valuep assigned below */ /* no digits before the radix means we need digits after it */ if (s < send && isDIGIT(*s)) { do { s++; } while (s < send && isDIGIT(*s)); if (valuep) { /* integer approximation is valid - it's 0. */ *valuep = 0; } } else return 0; } else if (*s == 'I' || *s == 'i') { s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; if (s == send || (*s != 'F' && *s != 'f')) return 0; s++; if (s < send && (*s == 'I' || *s == 'i')) { s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; if (s == send || (*s != 'I' && *s != 'i')) return 0; s++; if (s == send || (*s != 'T' && *s != 't')) return 0; s++; if (s == send || (*s != 'Y' && *s != 'y')) return 0; s++; } sawinf = 1; } else if (*s == 'N' || *s == 'n') { /* XXX TODO: There are signaling NaNs and quiet NaNs. */ s++; if (s == send || (*s != 'A' && *s != 'a')) return 0; s++; if (s == send || (*s != 'N' && *s != 'n')) return 0; s++; sawnan = 1; } else return 0; if (sawinf) { numtype &= IS_NUMBER_NEG; /* Keep track of sign */ numtype |= IS_NUMBER_INFINITY | IS_NUMBER_NOT_INT; } else if (sawnan) { numtype &= IS_NUMBER_NEG; /* Keep track of sign */ numtype |= IS_NUMBER_NAN | IS_NUMBER_NOT_INT; } else if (s < send) { /* we can have an optional exponent part */ if (*s == 'e' || *s == 'E') { /* The only flag we keep is sign. Blow away any "it's UV" */ numtype &= IS_NUMBER_NEG; numtype |= IS_NUMBER_NOT_INT; s++; if (s < send && (*s == '-' || *s == '+')) s++; if (s < send && isDIGIT(*s)) { do { s++; } while (s < send && isDIGIT(*s)); } else return 0; } } while (s < send && isSPACE(*s)) s++; if (s >= send) return numtype; if (len == 10 && memEQ(pv, "0 but true", 10)) { if (valuep) *valuep = 0; return IS_NUMBER_IN_UV; } return 0;}#endif#endif/* * The grok_* routines have been modified to use warn() instead of * Perl_warner(). Also, 'hexdigit' was the former name of PL_hexdigit, * which is why the stack variable has been renamed to 'xdigit'. */#ifndef grok_bin#if { NEED grok_bin }UVgrok_bin(pTHX_ const char *start, STRLEN *len_p, I32 *flags, NV *result){ const char *s = start; STRLEN len = *len_p; UV value = 0; NV value_nv = 0; const UV max_div_2 = UV_MAX / 2; bool allow_underscores = *flags & PERL_SCAN_ALLOW_UNDERSCORES; bool overflowed = FALSE; if (!(*flags & PERL_SCAN_DISALLOW_PREFIX)) { /* strip off leading b or 0b. for compatibility silently suffer "b" and "0b" as valid binary numbers. */ if (len >= 1) { if (s[0] == 'b') { s++; len--; } else if (len >= 2 && s[0] == '0' && s[1] == 'b') { s+=2; len-=2; } } } for (; len-- && *s; s++) { char bit = *s; if (bit == '0' || bit == '1') { /* Write it in this wonky order with a goto to attempt to get the compiler to make the common case integer-only loop pretty tight. With gcc seems to be much straighter code than old scan_bin. */ redo: if (!overflowed) { if (value <= max_div_2) { value = (value << 1) | (bit - '0'); continue; } /* Bah. We're just overflowed. */ warn("Integer overflow in binary number"); overflowed = TRUE; value_nv = (NV) value; } value_nv *= 2.0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -