📄 charset.c
字号:
/*SMS Server Tools 3Copyright (C) Keijo Kasvihttp://smstools3.kekekasvi.com/Based on SMS Server Tools 2 from Stefan Fringshttp://www.meinemullemaus.de/This program is free software unless you got it under another license directlyfrom the author. You can redistribute it and/or modify it under the terms ofthe GNU General Public License as published by the Free Software Foundation.Either version 2 of the License, or (at your option) any later version.*/#include <stdlib.h>#include <stdio.h>#include <string.h>#include <stdarg.h>#include <syslog.h>#include "charset.h"#include "logging.h"#include "smsd_cfg.h"// For incoming character 0x24 conversion:// Change this if other than Euro character is wanted, like '?' or '$'.#define GSM_CURRENCY_SYMBOL_TO_ISO 0xA4// iso = ISO8859-15 (you might change the table to any other 8-bit character set)// sms = sms character set used by mobile phones// iso smschar charset[] = { '@' , 0x00, // COMMERCIAL AT 0xA3, 0x01, // POUND SIGN '$' , 0x02, // DOLLAR SIGN 0xA5, 0x03, // YEN SIGN 0xE8, 0x04, // LATIN SMALL LETTER E WITH GRAVE 0xE9, 0x05, // LATIN SMALL LETTER E WITH ACUTE 0xF9, 0x06, // LATIN SMALL LETTER U WITH GRAVE 0xEC, 0x07, // LATIN SMALL LETTER I WITH GRAVE 0xF2, 0x08, // LATIN SMALL LETTER O WITH GRAVE 0xC7, 0x09, // LATIN CAPITAL LETTER C WITH CEDILLA 0x0A, 0x0A, // LF 0xD8, 0x0B, // LATIN CAPITAL LETTER O WITH STROKE 0xF8, 0x0C, // LATIN SMALL LETTER O WITH STROKE 0x0D, 0x0D, // CR 0xC5, 0x0E, // LATIN CAPITAL LETTER A WITH RING ABOVE 0xE5, 0x0F, // LATIN SMALL LETTER A WITH RING ABOVE// ISO8859-7, Capital greek characters// 0xC4, 0x10,// 0x5F, 0x11,// 0xD6, 0x12,// 0xC3, 0x13,// 0xCB, 0x14,// 0xD9, 0x15,// 0xD0, 0x16,// 0xD8, 0x17,// 0xD3, 0x18,// 0xC8, 0x19,// 0xCE, 0x1A,// ISO8859-1, ISO8859-15 0x81, 0x10, // GREEK CAPITAL LETTER DELTA 0x5F, 0x11, // LOW LINE 0x82, 0x12, // GREEK CAPITAL LETTER PHI 0x83, 0x13, // GREEK CAPITAL LETTER GAMMA 0x84, 0x14, // GREEK CAPITAL LETTER LAMDA 0x85, 0x15, // GREEK CAPITAL LETTER OMEGA 0x86, 0x16, // GREEK CAPITAL LETTER PI 0x87, 0x17, // GREEK CAPITAL LETTER PSI 0x88, 0x18, // GREEK CAPITAL LETTER SIGMA 0x89, 0x19, // GREEK CAPITAL LETTER THETA 0x8A, 0x1A, // GREEK CAPITAL LETTER XI 0x1B, 0x1B, // ESC 0xC6, 0x1C, // LATIN CAPITAL LETTER AE 0xE6, 0x1D, // LATIN SMALL LETTER AE 0xDF, 0x1E, // LATIN SMALL LETTER SHARP S 0xC9, 0x1F, // LATIN CAPITAL LETTER E WITH ACUTE ' ' , 0x20, // SPACE '!' , 0x21, // EXCLAMATION MARK 0x22, 0x22, // QUOTATION MARK '#' , 0x23, // NUMBER SIGN // GSM character 0x24 is a "currency symbol". // This character is never sent. Incoming character is converted without conversion tables. '%' , 0x25, // PERSENT SIGN '&' , 0x26, // AMPERSAND 0x27, 0x27, // APOSTROPHE '(' , 0x28, // LEFT PARENTHESIS ')' , 0x29, // RIGHT PARENTHESIS '*' , 0x2A, // ASTERISK '+' , 0x2B, // PLUS SIGN ',' , 0x2C, // COMMA '-' , 0x2D, // HYPHEN-MINUS '.' , 0x2E, // FULL STOP '/' , 0x2F, // SOLIDUS '0' , 0x30, // DIGIT 0...9 '1' , 0x31, '2' , 0x32, '3' , 0x33, '4' , 0x34, '5' , 0x35, '6' , 0x36, '7' , 0x37, '8' , 0x38, '9' , 0x39, ':' , 0x3A, // COLON ';' , 0x3B, // SEMICOLON '<' , 0x3C, // LESS-THAN SIGN '=' , 0x3D, // EQUALS SIGN '>' , 0x3E, // GREATER-THAN SIGN '?' , 0x3F, // QUESTION MARK 0xA1, 0x40, // INVERTED EXCLAMATION MARK 'A' , 0x41, // LATIN CAPITAL LETTER A...Z 'B' , 0x42, 'C' , 0x43, 'D' , 0x44, 'E' , 0x45, 'F' , 0x46, 'G' , 0x47, 'H' , 0x48, 'I' , 0x49, 'J' , 0x4A, 'K' , 0x4B, 'L' , 0x4C, 'M' , 0x4D, 'N' , 0x4E, 'O' , 0x4F, 'P' , 0x50, 'Q' , 0x51, 'R' , 0x52, 'S' , 0x53, 'T' , 0x54, 'U' , 0x55, 'V' , 0x56, 'W' , 0x57, 'X' , 0x58, 'Y' , 0x59, 'Z' , 0x5A, 0xC4, 0x5B, // LATIN CAPITAL LETTER A WITH DIAERESIS 0xD6, 0x5C, // LATIN CAPITAL LETTER O WITH DIAERESIS 0xD1, 0x5D, // LATIN CAPITAL LETTER N WITH TILDE 0xDC, 0x5E, // LATIN CAPITAL LETTER U WITH DIAERESIS 0xA7, 0x5F, // SECTION SIGN 0xBF, 0x60, // INVERTED QUESTION MARK 'a' , 0x61, // LATIN SMALL LETTER A...Z 'b' , 0x62, 'c' , 0x63, 'd' , 0x64, 'e' , 0x65, 'f' , 0x66, 'g' , 0x67, 'h' , 0x68, 'i' , 0x69, 'j' , 0x6A, 'k' , 0x6B, 'l' , 0x6C, 'm' , 0x6D, 'n' , 0x6E, 'o' , 0x6F, 'p' , 0x70, 'q' , 0x71, 'r' , 0x72, 's' , 0x73, 't' , 0x74, 'u' , 0x75, 'v' , 0x76, 'w' , 0x77, 'x' , 0x78, 'y' , 0x79, 'z' , 0x7A, 0xE4, 0x7B, // LATIN SMALL LETTER A WITH DIAERESIS 0xF6, 0x7C, // LATIN SMALL LETTER O WITH DIAERESIS 0xF1, 0x7D, // LATIN SMALL LETTER N WITH TILDE 0xFC, 0x7E, // LATIN SMALL LETTER U WITH DIAERESIS 0xE0, 0x7F, // LATIN SMALL LETTER A WITH GRAVE// Moved to the special char handling:// 0x60, 0x27, // GRAVE ACCENT// 0xE1, 0x61, // replacement for accented a// 0xED, 0x69, // replacement for accented i// 0xF3, 0x6F, // replacement for accented o// 0xFA, 0x75, // replacement for accented u 0 , 0 // End marker };// Extended characters. In GSM they are preceeded by 0x1B.char ext_charset[] = { 0x0C, 0x0A, // <FF> '^' , 0x14, // CIRCUMFLEX ACCENT '{' , 0x28, // LEFT CURLY BRACKET '}' , 0x29, // RIGHT CURLY BRACKET '\\', 0x2F, // REVERSE SOLIDUS '[' , 0x3C, // LEFT SQUARE BRACKET '~' , 0x3D, // TILDE ']' , 0x3E, // RIGHT SQUARE BRACKET 0x7C, 0x40, // VERTICAL LINE 0xA4, 0x65, // EURO SIGN 0 , 0 // End marker };char iso_8859_15_chars[] ={ 0x60, 0x27, // GRAVE ACCENT --> APOSTROPHE 0xA0, 0x20, // NO-BREAK SPACE --> SPACE 0xA2, 0x63, // CENT SIGN --> c 0xA6, 0x53, // LATIN CAPITAL LETTER S WITH CARON --> S 0xA8, 0x73, // LATIN SMALL LETTER S WITH CARON --> s 0xA9, 0x43, // COPYRIGHT SIGN --> C 0xAA, 0x61, // FEMININE ORDINAL INDICATOR --> a 0xAB, 0x3C, // LEFT-POINTING DOUBLE ANGLE QUOTATION MARK --> < 0xAC, 0x2D, // NOT SIGN --> - 0xAD, 0x2D, // SOFT HYPHEN --> - 0xAE, 0x52, // REGISTERED SIGN --> R 0xAF, 0x2D, // MACRON --> - 0xB0, 0x6F, // DEGREE SIGN --> o 0xB1, 0x2B, // PLUS-MINUS SIGN --> + 0xB2, 0x32, // SUPERSCRIPT TWO --> 2 0xB3, 0x33, // SUPERSCRIPT THREE --> 3 0xB4, 0x5A, // LATIN CAPITAL LETTER Z WITH CARON --> Z 0xB5, 0x75, // MICRO SIGN --> u 0xB6, 0x49, // PILCROW SIGN --> I 0xB7, 0x2E, // MIDDLE DOT --> . 0xB8, 0x7A, // LATIN SMALL LETTER Z WITH CARON --> z 0xB9, 0x31, // SUPERSCRIPT ONE --> 1 0xBA, 0x6F, // MASCULINE ORDINAL INDICATOR --> o 0xBB, 0x3E, // RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK --> > 0xBC, 0x4F, // LATIN CAPITAL LIGATURE OE --> O 0xBD, 0x6F, // LATIN SMALL LIGATURE OE --> o 0xBE, 0x59, // LATIN CAPITAL LETTER Y WITH DIAERESIS --> Y 0xC0, 0x41, // LATIN CAPITAL LETTER A WITH GRAVE --> A 0xC1, 0x41, // LATIN CAPITAL LETTER A WITH ACUTE --> A 0xC2, 0x41, // LATIN CAPITAL LETTER A WITH CIRCUMFLEX --> A 0xC3, 0x41, // LATIN CAPITAL LETTER A WITH TILDE --> A 0xC8, 0x45, // LATIN CAPITAL LETTER E WITH GRAVE --> E 0xCA, 0x45, // LATIN CAPITAL LETTER E WITH CIRCUMFLEX --> E 0xCB, 0x45, // LATIN CAPITAL LETTER E WITH DIAERESIS --> E 0xCC, 0x49, // LATIN CAPITAL LETTER I WITH GRAVE --> I 0xCD, 0x49, // LATIN CAPITAL LETTER I WITH ACUTE --> I 0xCE, 0x49, // LATIN CAPITAL LETTER I WITH CIRCUMFLEX --> I 0xCF, 0x49, // LATIN CAPITAL LETTER I WITH DIAERESIS --> I 0xD0, 0x44, // LATIN CAPITAL LETTER ETH --> D 0xD2, 0x4F, // LATIN CAPITAL LETTER O WITH GRAVE --> O 0xD3, 0x4F, // LATIN CAPITAL LETTER O WITH ACUTE --> O 0xD4, 0x4F, // LATIN CAPITAL LETTER O WITH CIRCUMFLEX --> O 0xD5, 0x4F, // LATIN CAPITAL LETTER O WITH TILDE --> O 0xD7, 0x78, // MULTIPLICATION SIGN --> x 0xD9, 0x55, // LATIN CAPITAL LETTER U WITH GRAVE --> U 0xDA, 0x55, // LATIN CAPITAL LETTER U WITH ACUTE --> U 0xDB, 0x55, // LATIN CAPITAL LETTER U WITH CIRCUMFLEX --> U 0xDD, 0x59, // LATIN CAPITAL LETTER Y WITH ACUTE --> Y 0xDE, 0x62, // LATIN CAPITAL LETTER THORN --> b 0xE1, 0x61, // LATIN SMALL LETTER A WITH ACUTE --> a 0xE2, 0x61, // LATIN SMALL LETTER A WITH CIRCUMFLEX --> a 0xE3, 0x61, // LATIN SMALL LETTER A WITH TILDE --> a 0xE7, 0x09, // LATIN SMALL LETTER C WITH CEDILLA --> LATIN CAPITAL LETTER C WITH CEDILLA 0xEA, 0x65, // LATIN SMALL LETTER E WITH CIRCUMFLEX --> e 0xEB, 0x65, // LATIN SMALL LETTER E WITH DIAERESIS --> e 0xED, 0x69, // LATIN SMALL LETTER I WITH ACUTE --> i 0xEE, 0x69, // LATIN SMALL LETTER I WITH CIRCUMFLEX --> i 0xEF, 0x69, // LATIN SMALL LETTER I WITH DIAERESIS --> i 0xF0, 0x6F, // LATIN SMALL LETTER ETH --> o 0xF3, 0x6F, // LATIN SMALL LETTER O WITH ACUTE --> o 0xF4, 0x6F, // LATIN SMALL LETTER O WITH CIRCUMFLEX --> o 0xF5, 0x6F, // LATIN SMALL LETTER O WITH TILDE --> o 0xF7, 0x2F, // DIVISION SIGN --> / (SOLIDUS) 0xFA, 0x75, // LATIN SMALL LETTER U WITH ACUTE --> u 0xFB, 0x75, // LATIN SMALL LETTER U WITH CIRCUMFLEX --> u 0xFD, 0x79, // LATIN SMALL LETTER Y WITH ACUTE --> y 0xFE, 0x62, // LATIN SMALL LETTER THORN --> b 0xFF, 0x79, // LATIN SMALL LETTER Y WITH DIAERESIS --> y 0 , 0};int special_char2gsm(char ch, char *newch){ int table_row = 0; char *table = iso_8859_15_chars; while (table[table_row *2]) { if (table[table_row *2] == ch) { if (newch) *newch = table[table_row *2 +1]; return 1; } table_row++; } return 0;}// Return value:// 0 = ch not found.// 1 = ch found from normal table// 2 = ch found from extended tableint char2gsm(char ch, char *newch){ int result = 0; int table_row; // search in normal translation table table_row=0; while (charset[table_row*2]) { if (charset[table_row*2] == ch) { if (newch) *newch = charset[table_row*2+1]; result = 1; break; } table_row++; } // if not found in normal table, then search in the extended table if (result == 0) { table_row=0; while (ext_charset[table_row*2]) { if (ext_charset[table_row*2] == ch) { if (newch) *newch = ext_charset[table_row*2+1]; result = 2; break; } table_row++; } } return result;}int gsm2char(char ch, char *newch, int which_table){ int table_row = 0; char *table; if (which_table == 1) table = charset; else if (which_table == 2) table = ext_charset; else return 0; while (table[table_row *2]) { if (table[table_row *2 +1] == ch) { *newch = table[table_row *2]; return 1; } table_row++; } return 0;}int iso_utf8_2gsm(char* source, int size, char* destination, int max){ int source_count=0; int dest_count=0; int found=0; char newch; char logtmp[51]; char tmpch; destination[dest_count]=0; if (source==0 || size <= 0) return 0;#ifdef DEBUGMSG log_charconv = 1;#endif if (log_charconv) { *logch_buffer = 0; logch("!! iso_utf8_2gsm(source=%.*s, size=%i)", size, source, size); logch(NULL); } // Convert each character until end of string while (source_count<size && dest_count<max) { found = char2gsm(source[source_count], &newch); if (found == 2) { if (dest_count >= max -2) break; destination[dest_count++] = 0x1B; } if (found >= 1) { destination[dest_count++] = newch; if (log_charconv) { sprintf(logtmp, "%02X[%c]", (unsigned char)source[source_count], prch(source[source_count])); if (found > 1 || source[source_count] != newch) { sprintf(strchr(logtmp, 0), "->%s%02X", (found == 2)? "Esc-" : "", (unsigned char)newch); if (gsm2char(newch, &tmpch, found)) sprintf(strchr(logtmp, 0), "[%c]", tmpch); } logch("%s ", logtmp); } } if (found == 0 && outgoing_utf8) { // ASCII and UTF-8 table: http://members.dslextreme.com/users/kkj/webtools/ascii_utf8_table.html // Good converter: http://www.macchiato.com/unicode/convert.html unsigned int c; int iterations = 0; // 3.1beta7: If UTF-8 decoded character is not found from tables, decoding is ignored: int saved_source_count = source_count; char sourcechars[51]; c = source[source_count]; if (log_charconv) sprintf(sourcechars, "%02X", (unsigned char)source[source_count]); // 3.1beta7: Check if there is enough characters left. // Following bytes in UTF-8 should begin with 10xx xxxx // which means 0x80 ... 0xBF if (((c & 0xFF) >= 0xC2 && (c & 0xFF) <= 0xC7) || ((c & 0xFF) >= 0xD0 && (c & 0xFF) <= 0xD7)) { if (source_count < size -1 && (source[source_count +1] & 0xC0) == 0x80) { // 110xxxxx c &= 0x1F; iterations = 1; } }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -