📄 gen_collate.c
字号:
{ colitem_t *p; p = xmalloc(sizeof(colitem_t)); p->string = xsymdup(item); p->element = (!def) ? def : xsymdup(def); return p;}static void add_colitem(char *item, char *def){ colitem_t *p;#if 0 printf("adding collation item %s", item); if (def) { printf(" with definition %s", def); } printf("\n");#endif p = new_colitem(item, def);#warning devel code if (superset) { if (tfind(p, &cur_base->root_colitem, colitem_cmp)) {/* fprintf(stderr, "skipping superset duplicate collating item \"%s\"\n", p->string); */ del_colitem(p); return;/* } else { *//* fprintf(stderr, "superset: new collating item \"%s\" = %s\n", p->string, p->element); */ } } if (cur_col == cur_derived) { if (!tfind(p, &cur_base->root_colitem, colitem_cmp)) { /* not in current but could be in base */ if (!tsearch(p, &cur_base->root_colitem, colitem_cmp)) { error_msg("OUT OF MEMORY!"); } } else if (!tfind(p, &cur_base->root_colitem, colelement_cmp)) { error_msg("collating element/symbol mismatch: item=%s def=%s", item, def); } } if (!tfind(p, &cur_col->root_colitem, colitem_cmp)) { /* not in current but could be in base */ if (!tsearch(p, &cur_col->root_colitem, colitem_cmp)) { error_msg("OUT OF MEMORY!"); } } else if (!tfind(p, &cur_col->root_colitem, colelement_cmp)) { error_msg("collating element/symbol mismatch"); } else { /* already there */ fprintf(stderr, "duplicate collating item \"%s\"\n", p->string); del_colitem(p); }}/* add a script (section) to the current locale */static void add_script(const char *s){ ll_item_t *l; /* make sure it isn't in base if working with derived */ if (cur_base != cur_col) { if (find_section_list_item(s, cur_base)) { error_msg("attempt to add script %s for derived when already in base", s); } } if (find_section_list_item(s, cur_col)) { error_msg("attempt to readd script %s", s); } l = find_ll_last(cur_col->section_list); insque(new_ll_item(DT_SECTION, new_section(s)), l);}static const char str_forward[] = "forward";static const char str_backward[] = "backward";static const char str_position[] = "position";static void do_order_start(void){ const char *s; char *e; ll_item_t *l; section_t *sect; int rule; if (order_state & ~IN_ORDER) { error_msg("order_start following reorder{_sections}_after"); } order_state |= IN_ORDER; if (superset) { if (++superset_order_start_cnt > 1) { error_msg("currently only a common order_start is supported in superset"); } return; } if (!(s = next_token())) { s = str_forward; /* if no args */ } if (*s == '<') { /* section (script) */ e = strrchr(s,'>'); if ((*s == '<') && e && (*e == '>') && !e[1]) { e[1] = 0; /* cleanup in case next_token stored something */ if (!(l = find_section_list_item(s, cur_col))) { error_msg("ref of undefined sections: %s", s); } sect = (section_t *)(l->data); if (sect->num_rules) { error_msg("sections already defined: %s", s); } } else { error_msg("illegal section ref: %s", s); } if (!(s = next_token())) { s = str_forward; /* if no args */ } else if (*s != ';') { error_msg("missing seperator!"); } } else { /* need an anonymous section */ if ((*cur_section->name != '<') && (cur_section->num_items == 0)) { /* already in an empty anonymous section */ sect = cur_section;/* fprintf(stdout, "using empty anon section %s\n", sect->name); */ } else { sect = new_section(NULL); l = find_ll_last(cur_col->section_list); insque(new_ll_item(DT_SECTION, sect), l);/* fprintf(stdout, "adding order section after section %s\n", ((section_t *)(l->data))->name); *//* fprintf(stdout, " last section is %s\n", ((section_t *)(l->next->data))->name); */ } sect->num_rules = 0; /* setting this below so nix default */ } cur_section = sect;/* fprintf(stdout, "cur_section now %s\n", cur_section->name); */#warning need to add section to weight list? /* now do rules */ do { rule = 0; if (*s == ';') { ++s; } while (*s) { if (!strncmp(str_forward, s, 7)) { rule |= R_FORWARD; s += 7; } else if (!strncmp(str_backward, s, 8)) { rule |= R_BACKWARD; s += 8; } else if (!strncmp(str_position, s, 8)) { rule |= R_POSITION; s += 8; } if (*s == ',') { ++s; continue; } if (!*s || (*s == ';')) { if (sect->num_rules >= MAX_COLLATION_WEIGHTS) { error_msg("more than %d weight rules!", MAX_COLLATION_WEIGHTS); } if (!rule) { error_msg("missing weight rule!"); } if ((rule & (R_FORWARD|R_BACKWARD|R_POSITION)) > R_BACKWARD) { error_msg("backward paired with forward and/or position!"); } sect->rules[sect->num_rules++] = rule; rule = 0; continue; } error_msg("illegal weight rule: %s", s); } } while ((s = next_token()) != NULL); cur_section = sect;/* fprintf(stderr, "setting cur_num_weights to %d for %s\n", sect->num_rules, sect->name); */ cur_num_weights = sect->num_rules; memcpy(cur_rule, sect->rules, MAX_COLLATION_WEIGHTS);}static void do_order_end(void){ if (!(order_state & IN_ORDER)) { error_msg("order_end with no matching order_start"); } order_state &= ~IN_ORDER; cur_section = new_section(NULL);}static void do_reorder_after(void){ char *t; ll_item_t *lli; const weight_t *w; int save_cur_num_weights; char save_cur_rule[MAX_COLLATION_WEIGHTS]; if (order_state & ~IN_REORDER) { error_msg("reorder_after following order_start or reorder_sections_after"); } order_state |= IN_REORDER; if (superset) { error_msg("currently reorder_after is not supported in supersets"); }#warning have to use rule for current section!!! if (!(t = next_token())) { error_msg("missing arg for reorder_after"); } t = xsymdup(t); if (next_token() != NULL) { error_msg("trailing text reorder_after: %s", pos); } if (cur_col == cur_base) { error_msg("sorry.. reorder_after in base locale is not currently supported"); } if (!(lli = find_wi_index(t, cur_base))) { error_msg("reorder_after for non-base item currently not supported: %s", t); } w = ((weighted_item_t *)(lli->data))->weight; save_cur_num_weights = cur_num_weights; memcpy(save_cur_rule, cur_rule, MAX_COLLATION_WEIGHTS); cur_section = new_section("R"); insque(new_ll_item(DT_REORDER, cur_section), lli);#if 0 { ll_item_t *l1; ll_item_t *l2; ll_item_t *l3; l1 = new_ll_item(DT_REORDER, cur_section); l2 = find_ll_last(cur_col->section_list); insque(l1, l2); l3 = find_ll_last(cur_col->section_list); fprintf(stderr, "reorder_after %p %p %p %s\n", l1, l2, l3, cur_section->name); }#else insque(new_ll_item(DT_REORDER, cur_section), find_ll_last(cur_col->section_list));#endif cur_num_weights = cur_section->num_rules = save_cur_num_weights; memcpy(cur_rule, save_cur_rule, MAX_COLLATION_WEIGHTS); memcpy(cur_section->rules, save_cur_rule, MAX_COLLATION_WEIGHTS);#warning devel code/* fprintf(stderr, "reorder -- %s %d\n", ((weighted_item_t *)(lli->data))->symbol, w->num_weights); */#warning hack to get around hu_HU reorder-after problem/* if (!w->num_weights) { *//* } else { *//* cur_num_weights = w->num_weights; *//* memcpy(cur_rule, w->rule, MAX_COLLATION_WEIGHTS); *//* } *//* fprintf(stderr, "reorder_after succeeded for %s\n", t); */}static void do_reorder_end(void){ if (!(order_state & IN_REORDER)) { error_msg("reorder_end with no matching reorder_after"); } order_state &= ~IN_REORDER;}static void do_reorder_sections_after(void){ const char *t; ll_item_t *lli; if (order_state & ~IN_REORDER_SECTIONS) { error_msg("reorder_sections_after following order_start or reorder_after"); } order_state |= IN_REORDER_SECTIONS; if (superset) { error_msg("currently reorder_sections_after is not supported in supersets"); } if (!(t = next_token())) { error_msg("missing arg for reorder_sections_after"); } t = xsymdup(t); if (next_token() != NULL) { error_msg("trailing text reorder_sections_after: %s", pos); } if (cur_col == cur_base) { error_msg("sorry.. reorder_sections_after in base locale is not currently supported"); } lli = cur_base->section_list; do {/* fprintf(stderr, "hmm -- |%s|%d|\n", ((section_t *)(lli->data))->name, lli->data_type); */ if (lli->data_type & DT_SECTION) {/* fprintf(stderr, "checking |%s|%s|\n", ((section_t *)(lli->data))->name, t); */ if (!strcmp(((section_t *)(lli->data))->name, t)) { reorder_section_ptr = lli; return; } } lli = lli->next; } while (lli); error_msg("reorder_sections_after for non-base item currently not supported: %s", t);}static void do_reorder_sections_end(void){ if (!(order_state & IN_REORDER_SECTIONS)) { error_msg("reorder_sections_end with no matching reorder_sections_after"); } order_state &= ~IN_REORDER_SECTIONS; reorder_section_ptr = NULL;}static ll_item_t *new_ll_item(int data_type, void *data){ ll_item_t *p; p = xmalloc(sizeof(ll_item_t)); p->next = p->prev = NULL; p->data_type = data_type; p->data = data; p->idx = INT_MIN; return p;}static int sym_cmp(const void *n1, const void *n2){/* fprintf(stderr, "sym_cmp: |%s| |%s|\n", (const char *)n1, (const char *)n2); */ return strcmp((const char *) n1, (const char *) n2);}static char *xsymdup(const char *s){ void *p; if (!(p = tfind(s, &root_sym, sym_cmp))) { /* not a currently known symbol */ if (!(s = strdup(s)) || !(p = tsearch(s, &root_sym, sym_cmp))) { error_msg("OUT OF MEMORY!"); } ++num_sym; mem_sym += strlen(s) + 1;/* fprintf(stderr, "xsymdup: alloc |%s| %p |%s| %p\n", *(char **)p, p, s, s); *//* } else { *//* fprintf(stderr, "xsymdup: found |%s| %p\n", *(char **)p, p); */ } return *(char **) p;}static int weight_cmp(const void *n1, const void *n2){ const weight_t *w1 = (const weight_t *) n1; const weight_t *w2 = (const weight_t *) n2; int i, r; if (w1->num_weights != w2->num_weights) { return w1->num_weights - w2->num_weights; } for (i=0 ; i < w1->num_weights ; i++) { if (w1->rule[i] != w2->rule[i]) { return w1->rule[i] - w2->rule[i]; } if ((r = strcmp(w1->colitem[i], w2->colitem[i])) != 0) { return r; } } return 0;}static weight_t *register_weight(weight_t *w){ void *p; if (!(p = tfind(w, &root_weight, weight_cmp))) { /* new weight */ p = xmalloc(sizeof(weight_t)); memcpy(p, w, sizeof(weight_t)); if (!(p = tsearch(p, &root_weight, weight_cmp))) { error_msg("OUT OF MEMORY!"); } ++unique_weights;/* } else { *//* fprintf(stderr, "rw: found\n"); */ } return *(weight_t **)p;}static size_t ll_len(ll_item_t *l){ size_t n = 0; ll_item_t *p = l; while (p) { ++n; p = p->next; if (p == l) { /* work for circular too */ break; } } return n;}static size_t ll_count(ll_item_t *l, int mask){ size_t n = 0; ll_item_t *p = l; while (p) { if (p->data_type & mask) { ++n; } p = p->next; if (p == l) { /* work for circular too */ break; } } return n;}static int wi_index_cmp(const void *n1, const void *n2){ const char *s1 = ((weighted_item_t *)(((ll_item_t *) n1)->data))->symbol; const char *s2 = ((weighted_item_t *)(((ll_item_t *) n2)->data))->symbol; return strcmp(s1, s2);}static void add_wi_index(ll_item_t *l){ assert(l->data_type == DT_WEIGHTED); if (!strcmp(((weighted_item_t *)(l->data))->symbol, "UNDEFINED")) { cur_col->undefined_idx = l; } if (!tfind(l, &cur_col->root_wi_index, wi_index_cmp)) { /* new wi_index */ if (!tsearch(l, &cur_col->root_wi_index, wi_index_cmp)) { error_msg("OUT OF MEMORY!"); } } if (cur_base != cur_col) { if (!tfind(l, &cur_base->root_wi_index, wi_index_cmp)) {/* not a base val *//* printf("derived: %s\n", ((weighted_item_t *)(l->data))->symbol); */ if (!tfind(l, &cur_base->root_derived_wi, wi_index_cmp)) { /* new derived */ if (!tsearch(l, &cur_base->root_derived_wi, wi_index_cmp)) { error_msg("OUT OF MEMORY!"); } } } }}static int final_index;static int is_ucode(const char *s){ if ((s[0] == '<') && (s[1] == 'U') && isxdigit(s[2]) && isxdigit(s[3]) && isxdigit(s[4]) && isxdigit(s[5]) && (s[6] == '>') ) { return 7; } else { return 0; }}static void add_final_col_index(const char *s){ ENTRY e; e.key = (char *) s; e.data = (void *)(final_index); if (!hsearch(e, FIND)) { /* not in the table */ if (!hsearch(e, ENTER)) { error_msg("OUT OF MEMORY! (hsearch)"); }#if 0 { int n; void *v; colitem_t ci; colitem_t *p; const char *t; if (!strcmp(s, "UNDEFINED")) { printf("%6d: %s\n", final_index, s); } else { assert(*s == '<'); if ((n = is_ucode(s)) != 0) { assert(!s[n]); printf("%6d: %s\n", final_index, s); } else { ci.string = (char *) s; ci.element = NULL; /* don't care */ v = tfind(&ci, &cur_base->root_colitem, colitem_cmp); if (!v) { fprintf(stderr, "%s NOT DEFINED!!!\n", s); } else { p = *((colitem_t **) v); if (p->element != NULL) { t = p->element; assert(*t == '"'); ++t; n = is_ucode(t); assert(n); printf("%6d: %.*s | ", final_index, n, t); do { t += n; assert(*t);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -