📄 regex_internal.c
字号:
re_string_fetch_byte_case (re_string_t *pstr){ if (BE (!pstr->mbs_allocated, 1)) return re_string_fetch_byte (pstr);#ifdef RE_ENABLE_I18N if (pstr->offsets_needed) { Idx off; int ch; /* For tr_TR.UTF-8 [[:islower:]] there is [[: CAPITAL LETTER I WITH DOT lower:]] in mbs. Skip in that case the whole multi-byte character and return the original letter. On the other side, with [[: DOTLESS SMALL LETTER I return [[:I, as doing anything else would complicate things too much. */ if (!re_string_first_byte (pstr, pstr->cur_idx)) return re_string_fetch_byte (pstr); off = pstr->offsets[pstr->cur_idx]; ch = pstr->raw_mbs[pstr->raw_mbs_idx + off]; if (! isascii (ch)) return re_string_fetch_byte (pstr); re_string_skip_bytes (pstr, re_string_char_size_at (pstr, pstr->cur_idx)); return ch; }#endif return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];}static voidinternal_functionre_string_destruct (re_string_t *pstr){#ifdef RE_ENABLE_I18N re_free (pstr->wcs); re_free (pstr->offsets);#endif /* RE_ENABLE_I18N */ if (pstr->mbs_allocated) re_free (pstr->mbs);}/* Return the context at IDX in INPUT. */static unsigned intinternal_functionre_string_context_at (const re_string_t *input, Idx idx, int eflags){ int c; if (BE (! REG_VALID_INDEX (idx), 0)) /* In this case, we use the value stored in input->tip_context, since we can't know the character in input->mbs[-1] here. */ return input->tip_context; if (BE (idx == input->len, 0)) return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF : CONTEXT_NEWLINE | CONTEXT_ENDBUF);#ifdef RE_ENABLE_I18N if (input->mb_cur_max > 1) { wint_t wc; Idx wc_idx = idx; while(input->wcs[wc_idx] == WEOF) {#ifdef DEBUG /* It must not happen. */ assert (REG_VALID_INDEX (wc_idx));#endif --wc_idx; if (! REG_VALID_INDEX (wc_idx)) return input->tip_context; } wc = input->wcs[wc_idx]; if (BE (input->word_ops_used != 0, 0) && IS_WIDE_WORD_CHAR (wc)) return CONTEXT_WORD; return (IS_WIDE_NEWLINE (wc) && input->newline_anchor ? CONTEXT_NEWLINE : 0); } else#endif { c = re_string_byte_at (input, idx); if (bitset_contain (input->word_char, c)) return CONTEXT_WORD; return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0; }}/* Functions for set operation. */static reg_errcode_tinternal_functionre_node_set_alloc (re_node_set *set, Idx size){ set->alloc = size; set->nelem = 0; set->elems = re_malloc (Idx, size); if (BE (set->elems == NULL, 0)) return REG_ESPACE; return REG_NOERROR;}static reg_errcode_tinternal_functionre_node_set_init_1 (re_node_set *set, Idx elem){ set->alloc = 1; set->nelem = 1; set->elems = re_malloc (Idx, 1); if (BE (set->elems == NULL, 0)) { set->alloc = set->nelem = 0; return REG_ESPACE; } set->elems[0] = elem; return REG_NOERROR;}static reg_errcode_tinternal_functionre_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2){ set->alloc = 2; set->elems = re_malloc (Idx, 2); if (BE (set->elems == NULL, 0)) return REG_ESPACE; if (elem1 == elem2) { set->nelem = 1; set->elems[0] = elem1; } else { set->nelem = 2; if (elem1 < elem2) { set->elems[0] = elem1; set->elems[1] = elem2; } else { set->elems[0] = elem2; set->elems[1] = elem1; } } return REG_NOERROR;}static reg_errcode_tinternal_functionre_node_set_init_copy (re_node_set *dest, const re_node_set *src){ dest->nelem = src->nelem; if (src->nelem > 0) { dest->alloc = dest->nelem; dest->elems = re_malloc (Idx, dest->alloc); if (BE (dest->elems == NULL, 0)) { dest->alloc = dest->nelem = 0; return REG_ESPACE; } memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx)); } else re_node_set_init_empty (dest); return REG_NOERROR;}/* Calculate the intersection of the sets SRC1 and SRC2. And merge it to DEST. Return value indicate the error code or REG_NOERROR if succeeded. Note: We assume dest->elems is NULL, when dest->alloc is 0. */static reg_errcode_tinternal_functionre_node_set_add_intersect (re_node_set *dest, const re_node_set *src1, const re_node_set *src2){ Idx i1, i2, is, id, delta, sbase; if (src1->nelem == 0 || src2->nelem == 0) return REG_NOERROR; /* We need dest->nelem + 2 * elems_in_intersection; this is a conservative estimate. */ if (src1->nelem + src2->nelem + dest->nelem > dest->alloc) { Idx new_alloc = src1->nelem + src2->nelem + dest->alloc; Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc); if (BE (new_elems == NULL, 0)) return REG_ESPACE; dest->elems = new_elems; dest->alloc = new_alloc; } /* Find the items in the intersection of SRC1 and SRC2, and copy into the top of DEST those that are not already in DEST itself. */ sbase = dest->nelem + src1->nelem + src2->nelem; i1 = src1->nelem - 1; i2 = src2->nelem - 1; id = dest->nelem - 1; for (;;) { if (src1->elems[i1] == src2->elems[i2]) { /* Try to find the item in DEST. Maybe we could binary search? */ while (REG_VALID_INDEX (id) && dest->elems[id] > src1->elems[i1]) --id; if (! REG_VALID_INDEX (id) || dest->elems[id] != src1->elems[i1]) dest->elems[--sbase] = src1->elems[i1]; if (! REG_VALID_INDEX (--i1) || ! REG_VALID_INDEX (--i2)) break; } /* Lower the highest of the two items. */ else if (src1->elems[i1] < src2->elems[i2]) { if (! REG_VALID_INDEX (--i2)) break; } else { if (! REG_VALID_INDEX (--i1)) break; } } id = dest->nelem - 1; is = dest->nelem + src1->nelem + src2->nelem - 1; delta = is - sbase + 1; /* Now copy. When DELTA becomes zero, the remaining DEST elements are already in place; this is more or less the same loop that is in re_node_set_merge. */ dest->nelem += delta; if (delta > 0 && REG_VALID_INDEX (id)) for (;;) { if (dest->elems[is] > dest->elems[id]) { /* Copy from the top. */ dest->elems[id + delta--] = dest->elems[is--]; if (delta == 0) break; } else { /* Slide from the bottom. */ dest->elems[id + delta] = dest->elems[id]; if (! REG_VALID_INDEX (--id)) break; } } /* Copy remaining SRC elements. */ memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx)); return REG_NOERROR;}/* Calculate the union set of the sets SRC1 and SRC2. And store it to DEST. Return value indicate the error code or REG_NOERROR if succeeded. */static reg_errcode_tinternal_functionre_node_set_init_union (re_node_set *dest, const re_node_set *src1, const re_node_set *src2){ Idx i1, i2, id; if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0) { dest->alloc = src1->nelem + src2->nelem; dest->elems = re_malloc (Idx, dest->alloc); if (BE (dest->elems == NULL, 0)) return REG_ESPACE; } else { if (src1 != NULL && src1->nelem > 0) return re_node_set_init_copy (dest, src1); else if (src2 != NULL && src2->nelem > 0) return re_node_set_init_copy (dest, src2); else re_node_set_init_empty (dest); return REG_NOERROR; } for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;) { if (src1->elems[i1] > src2->elems[i2]) { dest->elems[id++] = src2->elems[i2++]; continue; } if (src1->elems[i1] == src2->elems[i2]) ++i2; dest->elems[id++] = src1->elems[i1++]; } if (i1 < src1->nelem) { memcpy (dest->elems + id, src1->elems + i1, (src1->nelem - i1) * sizeof (Idx)); id += src1->nelem - i1; } else if (i2 < src2->nelem) { memcpy (dest->elems + id, src2->elems + i2, (src2->nelem - i2) * sizeof (Idx)); id += src2->nelem - i2; } dest->nelem = id; return REG_NOERROR;}/* Calculate the union set of the sets DEST and SRC. And store it to DEST. Return value indicate the error code or REG_NOERROR if succeeded. */static reg_errcode_tinternal_functionre_node_set_merge (re_node_set *dest, const re_node_set *src){ Idx is, id, sbase, delta; if (src == NULL || src->nelem == 0) return REG_NOERROR; if (dest->alloc < 2 * src->nelem + dest->nelem) { Idx new_alloc = 2 * (src->nelem + dest->alloc); Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc); if (BE (new_buffer == NULL, 0)) return REG_ESPACE; dest->elems = new_buffer; dest->alloc = new_alloc; } if (BE (dest->nelem == 0, 0)) { dest->nelem = src->nelem; memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx)); return REG_NOERROR; } /* Copy into the top of DEST the items of SRC that are not found in DEST. Maybe we could binary search in DEST? */ for (sbase = dest->nelem + 2 * src->nelem, is = src->nelem - 1, id = dest->nelem - 1; REG_VALID_INDEX (is) && REG_VALID_INDEX (id); ) { if (dest->elems[id] == src->elems[is]) is--, id--; else if (dest->elems[id] < src->elems[is]) dest->elems[--sbase] = src->elems[is--]; else /* if (dest->elems[id] > src->elems[is]) */ --id; } if (REG_VALID_INDEX (is)) { /* If DEST is exhausted, the remaining items of SRC must be unique. */ sbase -= is + 1; memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx)); } id = dest->nelem - 1; is = dest->nelem + 2 * src->nelem - 1; delta = is - sbase + 1; if (delta == 0) return REG_NOERROR; /* Now copy. When DELTA becomes zero, the remaining DEST elements are already in place. */ dest->nelem += delta; for (;;) { if (dest->elems[is] > dest->elems[id]) { /* Copy from the top. */ dest->elems[id + delta--] = dest->elems[is--]; if (delta == 0) break; } else { /* Slide from the bottom. */ dest->elems[id + delta] = dest->elems[id]; if (! REG_VALID_INDEX (--id)) { /* Copy remaining SRC elements. */ memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx)); break; } } } return REG_NOERROR;}/* Insert the new element ELEM to the re_node_set* SET. SET should not already have ELEM. Return true if successful. */static boolinternal_functionre_node_set_insert (re_node_set *set, Idx elem){ Idx idx; /* In case the set is empty. */ if (set->alloc == 0) return BE (re_node_set_init_1 (set, elem) == REG_NOERROR, 1); if (BE (set->nelem, 0) == 0) { /* We already guaranteed above that set->alloc != 0. */ set->elems[0] = elem; ++set->nelem; return true; } /* Realloc if we need. */ if (set->alloc == set->nelem) { Idx *new_elems; set->alloc = set->alloc * 2; new_elems = re_realloc (set->elems, Idx, set->alloc); if (BE (new_elems == NULL, 0)) return false; set->elems = new_elems; } /* Move the elements which follows the new element. Test the first element separately to skip a check in the inner loop. */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -