📄 ucdomap.c
字号:
if (!strcmp(UC_MIMEcharset, "cn-big5")) { return UCGetLYhndl_byMIME("big5"); } if (!strcmp(UC_MIMEcharset, "x-mac-roman") || !strcmp(UC_MIMEcharset, "mac-roman")) { return UCGetLYhndl_byMIME("macintosh"); } if (!strcmp(UC_MIMEcharset, "x-next") || !strcmp(UC_MIMEcharset, "nextstep") || !strcmp(UC_MIMEcharset, "x-nextstep")) { return UCGetLYhndl_byMIME("next"); } if (!strcmp(UC_MIMEcharset, "iso-8859-1-windows-3.1-latin-1") || !strcmp(UC_MIMEcharset, "cp1252") || !strcmp(UC_MIMEcharset, "cp-1252") || !strcmp(UC_MIMEcharset, "ibm1252") || !strcmp(UC_MIMEcharset, "iso-8859-1-windows-3.0-latin-1")) { /* * Treat these as synonyms for windows-1252, which is more * commonly used than the IANA registered name. - FM */ return UCGetLYhndl_byMIME("windows-1252"); } if (!strcmp(UC_MIMEcharset, "iso-8859-2-windows-latin-2") || !strcmp(UC_MIMEcharset, "cp1250") || !strcmp(UC_MIMEcharset, "cp-1250") || !strcmp(UC_MIMEcharset, "ibm1250")) { /* * Treat these as synonyms for windows-1250. - FM */ return UCGetLYhndl_byMIME("windows-1250"); } if ((!strncmp(UC_MIMEcharset, "ibm", 3) || !strncmp(UC_MIMEcharset, "cp-", 3)) && isdigit((unsigned char)UC_MIMEcharset[3]) && isdigit((unsigned char)UC_MIMEcharset[4]) && isdigit((unsigned char)UC_MIMEcharset[5])) { /* * For "ibmNNN<...>" or "cp-NNN", try "cpNNN<...>" * if not yet found. - KW & FM */ char * cptmp = NULL; StrAllocCopy(cptmp, (UC_MIMEcharset + 1)); cptmp[0] = 'c'; cptmp[1] = 'p'; if ((LYhndl = UCGetLYhndl_byMIME(cptmp)) >= 0) { FREE(cptmp); return LYhndl; } /* * Try windows-NNN<...> if not yet found. - FM */ StrAllocCopy(cptmp, "windows-"); StrAllocCat(cptmp, (UC_MIMEcharset + 3)); LYhndl = UCGetLYhndl_byMIME(cptmp); FREE(cptmp); return LYhndl; } if (!strncmp(UC_MIMEcharset, "windows-", 8) && isdigit((unsigned char)UC_MIMEcharset[8]) && isdigit((unsigned char)UC_MIMEcharset[9]) && isdigit((unsigned char)UC_MIMEcharset[10])) { /* * For "windows-NNN<...>", try "cpNNN<...>" - FM */ char * cptmp = NULL; StrAllocCopy(cptmp, (UC_MIMEcharset + 6)); cptmp[0] = 'c'; cptmp[1] = 'p'; LYhndl = UCGetLYhndl_byMIME(cptmp); FREE(cptmp); return LYhndl; } if (!strcmp(UC_MIMEcharset, "koi-8")) { /* accentsoft bugosity */ return UCGetLYhndl_byMIME("koi8-r"); } } return LYhndl; /* returns -1 if no charset found by that MIME name */}/* * Function UC_setup_LYCharSets_repl() tries to set up a subtable in * LYCharSets[] appropriate for this new charset, for compatibility * with the "old method". Maybe not nice (maybe not even necessary * any more), but it works (as far as it goes..). * * We try to be conservative and only allocate new memory for this * if needed. If not needed, just point to SevenBitApproximations[i]. * [Could do the same for ISO_Latin1[] if it's identical to that, but * would make it even *more* messy than it already is...] * This the only function in this file that knows, or cares, about the * HTMLDTD or details of LYCharSets[] subtables (and therefore somewhat * violates the idea that this file should be independent of those). * As in other places, we rely on ISO_Latin1 being the *first* table * in LYCharSets. - KW *//* * We need to remember which ones were allocated and which are static. */PRIVATE char ** remember_allocated_LYCharSets[MAXCHARSETS];PRIVATE void UCreset_allocated_LYCharSets NOARGS{ int i = 0; for (; i < MAXCHARSETS; i++) { remember_allocated_LYCharSets[i] = NULL; }}PRIVATE void UCfree_allocated_LYCharSets NOARGS{ int i = 0; for (; i < MAXCHARSETS; i++) { if (remember_allocated_LYCharSets[i] != NULL) { FREE(remember_allocated_LYCharSets[i]); } }}PRIVATE char ** UC_setup_LYCharSets_repl ARGS2( int, UC_charset_in_hndl, unsigned, lowest8){ char **ISO_Latin1 = LYCharSets[0]; char **p; char **prepl; u16 *pp; char **tp; char *s7; char *s8; size_t i; int j, changed; u16 k; u8 *ti; /* * Create a temporary table for reverse lookup of latin1 codes: */ tp = (char **)malloc(96 * sizeof(char *)); if (!tp) return NULL; for (i = 0; i < 96; i++) tp[i] = NULL; ti = (u8 *)malloc(96 * sizeof(u8)); if (!ti) { FREE(tp); return NULL; } for (i = 0; i < 96; i++) ti[i] = 0; pp = UCInfo[UC_charset_in_hndl].unitable; /* * Determine if we have any mapping of a Unicode in the range 160-255 * to an allowed code point > 0x80 in our new charset... * Store any mappings found in ti[]. */ if (UCInfo[UC_charset_in_hndl].num_uni > 0) { for (i = 0; i < 256; i++) { if ((j = UCInfo[UC_charset_in_hndl].unicount[i])) { if ((k = *pp) >= 160 && k < 256 && i >= lowest8) { ti[k-160] = i; } for (; j; j--) { pp++; } } } } { u16 ct; struct unipair_str *list; /* * Determine if we have any mapping of a Unicode in the range * 160-255 to a replacement string for our new charset... * Store any mappings found in tp[]. */ ct = UCInfo[UC_charset_in_hndl].replacedesc.entry_ct; list = UCInfo[UC_charset_in_hndl].replacedesc.entries; while (ct--) { if ((k = list->unicode) >= 160 && k < 256) { tp[k-160] = list->replace_str; } list++; } } /* * Now allocate a new table compatible with LYCharSets[] * and with the HTMLDTD for entities. * We don't know yet whether we'll keep it around. */ p = prepl = (char **)malloc(HTML_dtd.number_of_entities * sizeof(char *)); if (!p) { FREE(tp); FREE(ti); return NULL; } changed = 0; for (i = 0; i < HTML_dtd.number_of_entities; i++, p++) { /* * For each of those entities, we check what the "old method" * ISO_Latin1[] mapping does with them. If it is nothing we * want to use, just point to the SevenBitApproximations[] string. */ s7 = SevenBitApproximations[i]; s8 = ISO_Latin1[i]; *p = s7; if (s8 && (unsigned char)(*s8) >= 160 && strlen(s8) == 1) { /* * We have an entity that is mapped to * one valid eightbit latin1 char. */ if (ti[(unsigned char)(*s8) - 160] >= lowest8 && !(s7[0] == ti[(unsigned char)(*s8) - 160] && s7[1] == '\0')) { /* * ...which in turn is mapped, by our "new method", * to another valid eightbit char for this new * charset: either to itself... */ if (ti[(unsigned char)(*s8) - 160] == (unsigned char)(*s8)) { *p = s8; } else { /* * ...or another byte... */#ifdef NOTDEFINED *p = (char *)malloc(2*sizeof(char)); if (!*p) { FREE(tp); FREE(ti); FREE(prepl); return NULL; } (*p)[0] = ti[(unsigned char)(*s8) - 160]; (*p)[1] = '\0';#else /* * Use this instead... make those 1-char strings * into HTAtoms, so they will be cleaned up * at exit... all for the sake of preventing * memory leaks, sigh. */ static char dummy[2]; /* one char dummy string */ dummy[0] = ti[(unsigned char)(*s8) - 160]; *p = HTAtom_name(HTAtom_for(dummy));#endif /* NOTDEFINED */ } changed = 1; } else if (tp[(unsigned char)(*s8) - 160] && strcmp(s7, tp[(unsigned char)(*s8) - 160])) { /* * ...or which is mapped, by our "new method", * to a replacement string for this new charset. */ *p = tp[(unsigned char)(*s8) - 160]; changed = 1; } } } FREE(tp); FREE(ti); if (!changed) { FREE(prepl); return NULL; } return prepl;}/* * "New method" meets "Old method" ... */PRIVATE int UC_Register_with_LYCharSets ARGS4( int, s, CONST char *, UC_MIMEcharset, CONST char *, UC_LYNXcharset, int, lowest_eightbit){ int i, LYhndl, found; char **repl; LYhndl = -1; if (LYNumCharsets == 0) { /* * Initialize here; so whoever changes * LYCharSets.c doesn't have to count... */ for (i = 0; (i < MAXCHARSETS) && LYchar_set_names[i]; i++) { LYNumCharsets = i+1; } } /* * Do different kinds of searches... * Normally the first should find the match if there is one! */ for (i = 0; i < MAXCHARSETS && LYchar_set_names[i] && LYhndl < 0; i++) { if (!strcmp(UC_LYNXcharset, LYchar_set_names[i])) { LYhndl = i; } } for (i = 0; i < MAXCHARSETS && LYchar_set_names[i] && LYhndl < 0; i++) { if (LYCharSet_UC[i].MIMEname && !strcmp(UC_MIMEcharset, LYCharSet_UC[i].MIMEname)) { LYhndl = i; } } if (LYhndl < 0) { /* not found */ found = 0; if (LYNumCharsets >= MAXCHARSETS) { if (TRACE) { fprintf(stderr, "UC_Register_with_LYCharSets: Too many. Ignoring %s/%s.", UC_MIMEcharset, UC_LYNXcharset); } return -1; } /* * Add to LYCharSets.c lists. */ LYhndl = LYNumCharsets; LYNumCharsets ++; LYlowest_eightbit[LYhndl] = 999; LYCharSets[LYhndl] = SevenBitApproximations; /* * Hmm, try to be conservative here. */ LYchar_set_names[LYhndl] = UC_LYNXcharset; LYchar_set_names[LYhndl+1] = NULL; /* * Terminating NULL may be looked for by Lynx code. */ } else { found = 1; } LYCharSet_UC[LYhndl].UChndl = s; /* * Can we just copy the pointer? Hope so... */ LYCharSet_UC[LYhndl].MIMEname = UC_MIMEcharset; LYCharSet_UC[LYhndl].enc = UCInfo[s].enc; /* * @@@ We really SHOULD get more info from the table files, * and set relevant flags in the LYCharSet_UC[] entry with * that info... For now, let's try it without. - KW */ if (lowest_eightbit < LYlowest_eightbit[LYhndl]) { LYlowest_eightbit[LYhndl] = lowest_eightbit; } else if (lowest_eightbit > LYlowest_eightbit[LYhndl]) { UCInfo[s].lowest_eight = LYlowest_eightbit[LYhndl]; } if (!found && LYhndl > 0) { repl = UC_setup_LYCharSets_repl(s,UCInfo[s].lowest_eight); if (repl) { LYCharSets[LYhndl] = repl; /* * Remember to FREE at exit. */ remember_allocated_LYCharSets[LYhndl]=repl; } } return LYhndl;}/* * This only sets up the structure - no initialization of the tables * is done here yet. */PUBLIC void UC_Charset_Setup ARGS8( CONST char *, UC_MIMEcharset, CONST char *, UC_LYNXcharset, u8 *, unicount, u16 *, unitable, int, nnuni, struct unimapdesc_str, replacedesc, int, lowest_eight, int, UC_rawuni){ int s, Gn; int i, status = 0, found; /* * Get (new?) slot. */ found = -1; for (i = 0; i < UCNumCharsets && found < 0; i++) { if (!strcmp(UCInfo[i].MIMEname, UC_MIMEcharset)) { found = i; } } if (found >= 0) { s = found; } else { if (UCNumCharsets >= MAXCHARSETS) { if (TRACE) { fprintf(stderr, "UC_Charset_Setup: Too many. Ignoring %s/%s.", UC_MIMEcharset, UC_LYNXcharset); } return; } s = UCNumCharsets; UCInfo[s].MIMEname = UC_MIMEcharset; } UCInfo[s].LYNXname = UC_LYNXcharset; UCInfo[s].unicount = unicount; UCInfo[s].unitable = unitable; UCInfo[s].num_uni = nnuni; UCInfo[s].replacedesc = replacedesc; if (replacedesc.isdefault) { default_UChndl = s; } Gn = UC_FindGN_byMIME(UC_MIMEcharset); if (Gn >= 0) UC_GNhandles[Gn] = s; UCInfo[s].GN = Gn; if (UC_rawuni == UCT_ENC_UTF8) lowest_eight = 128; /* cheat here */ UCInfo[s].lowest_eight = lowest_eight; UCInfo[s].enc = UC_rawuni; UCInfo[s].LYhndl = UC_Register_with_LYCharSets(s, UC_MIMEcharset, UC_LYNXcharset, lowest_eight); UCInfo[s].uc_status = status; if (found < 0) UCNumCharsets++; return;}PRIVATE void UCcleanup_mem NOARGS{ int i; UCfree_allocated_LYCharSets(); con_clear_unimap_str(0); con_clear_unimap_str(1); con_clear_unimap(0); con_clear_unimap(1); for (i = 1; i < 4; i++) { /* first one is static! */ FREE(inverse_translations[i]); }}PUBLIC void UCInit NOARGS{ UCreset_allocated_LYCharSets(); atexit(UCcleanup_mem); UCconsole_map_init(); UC_CHARSET_SETUP; /* us-ascii */ /* 7 bit approximations *//* * The order of charset names visible in Lynx Options menu * correspond to the order of lines below, * except for CJK and others described in LYCharSet.c */ UC_CHARSET_SETUP_iso_8859_1; /* ISO Latin 1 */ UC_CHARSET_SETUP_cp850; /* DosLatin1 (cp850) */ UC_CHARSET_SETUP_windows_1252; /* WinLatin1 (cp1252) */ UC_CHARSET_SETUP_cp437; /* DosLatinUS (cp437) */ UC_CHARSET_SETUP_dec_mcs; /* DEC Multinational */ UC_CHARSET_SETUP_macintosh; /* Macintosh (8 bit) */ UC_CHARSET_SETUP_next; /* NeXT character set */ UC_CHARSET_SETUP_viscii; /* Vietnamese (VISCII) */ UC_CHARSET_SETUP_iso_8859_2; /* ISO Latin 2 */ UC_CHARSET_SETUP_cp852; /* DosLatin2 (cp852) */ UC_CHARSET_SETUP_windows_1250; /* WinLatin2 (cp1250) */ UC_CHARSET_SETUP_iso_8859_3; /* ISO Latin 3 */ UC_CHARSET_SETUP_iso_8859_4; /* ISO Latin 4 */ UC_CHARSET_SETUP_cp775; /* DosBaltRim (cp775) */ UC_CHARSET_SETUP_windows_1257; /* WinBaltRim (cp1257) */ UC_CHARSET_SETUP_iso_8859_5; /* ISO 8859-5 Cyrillic */ UC_CHARSET_SETUP_cp866; /* DosCyrillic (cp866) */ UC_CHARSET_SETUP_windows_1251; /* WinCyrillic (cp1251) */ UC_CHARSET_SETUP_koi8_r; /* KOI8-R Cyrillic */ UC_CHARSET_SETUP_iso_8859_6; /* ISO 8869-6 Arabic */ UC_CHARSET_SETUP_cp864; /* DosArabic (cp864) */ UC_CHARSET_SETUP_windows_1256; /* WinArabic (cp1256) */ UC_CHARSET_SETUP_iso_8859_7; /* ISO 8859-7 Greek */ UC_CHARSET_SETUP_cp737; /* DosGreek (cp737) */ UC_CHARSET_SETUP_cp869; /* DosGreek2 (cp869) */ UC_CHARSET_SETUP_windows_1253; /* WinGreek (cp1253) */ UC_CHARSET_SETUP_iso_8859_8; /* ISO 8859-8 Hebrew */ UC_CHARSET_SETUP_cp862; /* DosHebrew (cp862) */ UC_CHARSET_SETUP_windows_1255; /* WinHebrew (cp1255) */ UC_CHARSET_SETUP_iso_8859_9; /* ISO 8859-9 (Latin 5) */ UC_CHARSET_SETUP_iso_8859_10; /* ISO 8859-10 */ UC_CHARSET_SETUP_utf_8; /* UNICODE UTF-8 */ UC_CHARSET_SETUP_mnemonic_ascii_0; /* RFC 1345 w/o Intro */ UC_CHARSET_SETUP_mnemonic; /* RFC 1345 Mnemonic */#ifdef NOTDEFINED UC_CHARSET_SETUP_mnem;#endif /* NOTDEFINED *//* * To add synonyms for any charset name * check function UCGetLYhndl_byMIME in this file. */}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -