regex_compile.hpp

来自「CGAL is a collaborative effort of severa」· HPP 代码 · 共 1,861 行 · 第 1/5 页

HPP
1,861
字号
            }            break;         case traits_type::syntax_or:            if(((_flags & bk_vbar) == 0) || (_flags & limited_ops))               break;            goto alt_string_jump;         case traits_type::syntax_open_brace:            if( ((_flags & bk_braces) == 0) || ((_flags & intervals) == 0))               break;            // we have {x} or {x,} or {x,y}:            parse_range(ptr, end, rep_min, rep_max);            goto repeat_jump;         case traits_type::syntax_digit:            if(_flags & bk_refs)            {               // update previous:               int i = traits_inst.toi((charT)c);               if(i == 0)               {                  // we can have \025 which means take char whose                  // code is 25 (octal), so parse string:                  c = traits_inst.toi(ptr, end, -8);                  --ptr;                  break;               }               dat = add_simple(dat, re_detail::syntax_element_backref, sizeof(re_detail::re_brace));               static_cast<re_detail::re_brace*>(dat)->index = i;               ++ptr;               continue;            }            break;         case traits_type::syntax_b:     // re_detail::syntax_element_word_boundary            dat = add_simple(dat, re_detail::syntax_element_word_boundary);            ++ptr;            continue;         case traits_type::syntax_B:            dat = add_simple(dat, re_detail::syntax_element_within_word);            ++ptr;            continue;         case traits_type::syntax_left_word:            dat = add_simple(dat, re_detail::syntax_element_word_start);            ++ptr;            continue;         case traits_type::syntax_right_word:            dat = add_simple(dat, re_detail::syntax_element_word_end);            ++ptr;            continue;         case traits_type::syntax_w:     //re_detail::syntax_element_word_char            dat = compile_set_simple(dat, traits_type::char_class_word);            ++ptr;            continue;         case traits_type::syntax_W:            dat = compile_set_simple(dat, traits_type::char_class_word, true);            ++ptr;            continue;         case traits_type::syntax_d:     //re_detail::syntax_element_word_char            dat = compile_set_simple(dat, traits_type::char_class_digit);            ++ptr;            continue;         case traits_type::syntax_D:            dat = compile_set_simple(dat, traits_type::char_class_digit, true);            ++ptr;            continue;         case traits_type::syntax_s:     //re_detail::syntax_element_word_char            dat = compile_set_simple(dat, traits_type::char_class_space);            ++ptr;            continue;         case traits_type::syntax_S:            dat = compile_set_simple(dat, traits_type::char_class_space, true);            ++ptr;            continue;         case traits_type::syntax_l:     //re_detail::syntax_element_word_char            dat = compile_set_simple(dat, traits_type::char_class_lower);            ++ptr;            continue;         case traits_type::syntax_L:            dat = compile_set_simple(dat, traits_type::char_class_lower, true);            ++ptr;            continue;         case traits_type::syntax_u:     //re_detail::syntax_element_word_char            dat = compile_set_simple(dat, traits_type::char_class_upper);            ++ptr;            continue;         case traits_type::syntax_U:            dat = compile_set_simple(dat, traits_type::char_class_upper, true);            ++ptr;            continue;         case traits_type::syntax_Q:            ++ptr;            while(true)            {               if(ptr == end)               {                  fail(REG_EESCAPE);                  return error_code();               }               if(traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_slash)               {                  ++ptr;                  if((ptr != end) && (traits_inst.syntax_type((traits_size_type)(traits_uchar_type)*ptr) == traits_type::syntax_E))                     break;                  else                  {                     dat = add_literal(dat, *(ptr-1));                     continue;                  }               }               dat = add_literal(dat, *ptr);               ++ptr;            }            ++ptr;            continue;         case traits_type::syntax_C:            dat = add_simple(dat, re_detail::syntax_element_wild);            ++ptr;            continue;         case traits_type::syntax_X:            dat = add_simple(dat, re_detail::syntax_element_combining);            ++ptr;            continue;         case traits_type::syntax_Z:            dat = add_simple(dat, re_detail::syntax_element_soft_buffer_end);            ++ptr;            continue;         case traits_type::syntax_G:            dat = add_simple(dat, re_detail::syntax_element_restart_continue);            ++ptr;            continue;         case traits_type::syntax_start_buffer:            dat = add_simple(dat, re_detail::syntax_element_buffer_start);            ++ptr;            continue;         case traits_type::syntax_end_buffer:            dat = add_simple(dat, re_detail::syntax_element_buffer_end);            ++ptr;            continue;         default:            c = (traits_size_type)(traits_uchar_type)parse_escape(ptr, end);            dat = add_literal(dat, (charT)c);            continue;         }         dat = add_literal(dat, (charT)c);         ++ptr;         break;      }      case traits_type::syntax_dollar:         dat = add_simple(dat, re_detail::syntax_element_end_line, sizeof(re_detail::re_syntax_base));         ++ptr;         continue;      case traits_type::syntax_caret:         dat = add_simple(dat, re_detail::syntax_element_start_line, sizeof(re_detail::re_syntax_base));         ++ptr;         continue;      case traits_type::syntax_dot:         dat = add_simple(dat, re_detail::syntax_element_wild, sizeof(re_detail::re_syntax_base));         ++ptr;         continue;      case traits_type::syntax_star:         rep_min = 0;         rep_max = (unsigned)-1;         repeat_jump:         {          std::ptrdiff_t offset;            if(dat == 0)            {               fail(REG_BADRPT);               return error_code();            }            switch(dat->type)            {            case re_detail::syntax_element_endmark:               offset = last_mark_popped;               break;            case re_detail::syntax_element_literal:               if(static_cast<re_detail::re_literal*>(dat)->length > 1)               {                  // update previous:                  charT lit = *reinterpret_cast<charT*>(reinterpret_cast<char*>(dat) + sizeof(re_detail::re_literal) + ((static_cast<re_detail::re_literal*>(dat)->length-1)*sizeof(charT)));                  --static_cast<re_detail::re_literal*>(dat)->length;                  dat = add_simple(dat, re_detail::syntax_element_literal, sizeof(re_detail::re_literal) + sizeof(charT));                  static_cast<re_detail::re_literal*>(dat)->length = 1;                  *reinterpret_cast<charT*>(static_cast<re_detail::re_literal*>(dat)+1) = lit;               }               offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data());               break;            case re_detail::syntax_element_backref:            case re_detail::syntax_element_long_set:            case re_detail::syntax_element_set:            case re_detail::syntax_element_wild:            case re_detail::syntax_element_combining:               // we're repeating a single item:               offset = reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data());               break;            default:               fail(REG_BADRPT);               return error_code();            }            data.align();            dat->next.i = data.size();            //unsigned pos = (char*)dat - (char*)data.data();            // add the trailing jump:            dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);            static_cast<re_detail::re_jump*>(dat)->alt.i = 0;            // now insert the leading repeater:            dat = static_cast<re_detail::re_syntax_base*>(data.insert(offset, re_detail::re_repeater_size));            dat->next.i = (reinterpret_cast<char*>(dat) - reinterpret_cast<char*>(data.data())) + re_detail::re_repeater_size;            dat->type = re_detail::syntax_element_rep;            static_cast<re_detail::re_repeat*>(dat)->alt.i = data.size();            static_cast<re_detail::re_repeat*>(dat)->min = rep_min;            static_cast<re_detail::re_repeat*>(dat)->max = rep_max;            static_cast<re_detail::re_repeat*>(dat)->leading = false;            static_cast<re_detail::re_repeat*>(dat)->greedy = true;            move_offsets(dat, re_detail::re_repeater_size);            ++ptr;            //            // now check to see if we have a non-greedy repeat:            if((ptr != end) && (_flags & (limited_ops | bk_plus_qm | bk_braces)) == 0)            {               c = (traits_size_type)(traits_uchar_type)*ptr;               if(traits_type::syntax_question == traits_inst.syntax_type(c))               {                  // OK repeat is non-greedy:                  static_cast<re_detail::re_repeat*>(dat)->greedy = false;                  ++ptr;               }            }            dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<char*>(data.data()) + data.size() - re_detail::re_jump_size);            static_cast<re_detail::re_repeat*>(dat)->alt.i = offset;            continue;         }      case traits_type::syntax_plus:         if(_flags & (bk_plus_qm | limited_ops))         {            dat = add_literal(dat, (charT)c);            ++ptr;            continue;         }         rep_min = 1;         rep_max = (unsigned)-1;         goto repeat_jump;      case traits_type::syntax_question:         if(_flags & (bk_plus_qm | limited_ops))         {            dat = add_literal(dat, (charT)c);            ++ptr;            continue;         }         rep_min = 0;         rep_max = 1;         goto repeat_jump;      case traits_type::syntax_open_set:         // update previous:         if(dat)         {            data.align();            dat->next.i = data.size();         }         // extend:         dat = compile_set(ptr, end);         if(dat == 0)         {            if((_flags & regbase::failbit) == 0)               fail(REG_EBRACK);            return error_code();         }         break;      case traits_type::syntax_or:      {         if(_flags & (bk_vbar | limited_ops))         {            dat = add_literal(dat, (charT)c);            ++ptr;            continue;         }         alt_string_jump:         // update previous:         if(dat == 0)         {            // start of pattern can't have empty "|"            fail(REG_EMPTY);            return error_code();         }         // see if we have an empty alternative:         if(mark.empty() == false)            if(mark.peek() == data.index(dat))            {               fail(REG_EMPTY);               return error_code();            }         // extend:         dat = add_simple(dat, re_detail::syntax_element_jump, re_detail::re_jump_size);         data.align();         //         // we don't know what value to put here yet,         // use an arbitrarily large value for now         // and check it later (TODO!)         static_cast<re_detail::re_jump*>(dat)->alt.i = INT_MAX/2;         // now work out where to insert:         std::size_t offset = 0;         if(mark.empty() == false)         {            // we have a '(' or '|' to go back to:            offset = mark.peek();            re_detail::re_syntax_base* base = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + offset);            offset = base->next.i;         }         re_detail::re_jump* j = static_cast<re_detail::re_jump*>(data.insert(offset, re_detail::re_jump_size));         j->type = re_detail::syntax_element_alt;         j->next.i = offset + re_detail::re_jump_size;         j->alt.i = data.size();         move_offsets(j, re_detail::re_jump_size);         dat = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cast<unsigned char*>(data.data()) + data.size() - re_detail::re_jump_size);         mark.push(data.size() - re_detail::re_jump_size);         ++ptr;         break;      }      case traits_type::syntax_open_brace:         if((_flags & bk_braces) || ((_flags & intervals) == 0))         {            dat = add_literal(dat, (charT)c);            ++ptr;            continue;         }         // we have {x} or {x,} or {x,y}:         parse_range(ptr, end, rep_min, rep_max);         goto repeat_jump;      case traits_type::syntax_newline:         if(_flags & newline_alt)            goto alt_string_jump;         dat = add_literal(dat, (charT)c);         ++ptr;         continue;      case traits_type::syntax_close_brace:         if(_flags & bk_braces)         {            dat = add_literal(dat, (charT)c);            ++ptr;            continue;         }         fail(REG_BADPAT);         return error_code();      default:         dat = add_literal(dat, (charT)c);         ++ptr;         break;      }  // switch   }     // while   //   // update previous:   if(dat)   {      data.align();      dat->next.i = data.size();   }   // see if we have an empty alternative:   if(mark.empty() == false)      if(mark.peek() == data.index(dat) )      {         re_detail::re_syntax_base* para = reinterpret_cast<re_detail::re_syntax_base*>(reinterpret_cas

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?