html.c
来自「The Kannel Open Source WAP and SMS gatew」· C语言 代码 · 共 308 行
C
308 行
/* ==================================================================== * The Kannel Software License, Version 1.0 * * Copyright (c) 2001-2004 Kannel Group * Copyright (c) 1998-2001 WapIT Ltd. * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the * distribution. * * 3. The end-user documentation included with the redistribution, * if any, must include the following acknowledgment: * "This product includes software developed by the * Kannel Group (http://www.kannel.org/)." * Alternately, this acknowledgment may appear in the software itself, * if and wherever such third-party acknowledgments normally appear. * * 4. The names "Kannel" and "Kannel Group" must not be used to * endorse or promote products derived from this software without * prior written permission. For written permission, please * contact org@kannel.org. * * 5. Products derived from this software may not be called "Kannel", * nor may "Kannel" appear in their name, without prior written * permission of the Kannel Group. * * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE * DISCLAIMED. IN NO EVENT SHALL THE KANNEL GROUP OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * ==================================================================== * * This software consists of voluntary contributions made by many * individuals on behalf of the Kannel Group. For more information on * the Kannel Group, please see <http://www.kannel.org/>. * * Portions of this software are based upon software originally written at * WapIT Ltd., Helsinki, Finland for the Kannel project. */ /* * html.c - routines for manipulating HTML. * * Lars Wirzenius */#include <ctype.h>#include <stdio.h>#include <string.h>#include "html.h"#include "gwlib/gwlib.h"#define SMS_MAX 161/* Is there a comment beginning at offset `pos'? */static int html_comment_begins(Octstr *html, long pos){ char buf[10]; octstr_get_many_chars(buf, html, pos, 4); buf[5] = '\0'; return strcmp(buf, "<!--") == 0;}/* Skip a comment in HTML. */static void skip_html_comment(Octstr *html, long *pos){ long i; *pos += 4; /* Skip "<!--" at beginning of comment. */ i = octstr_search(html, octstr_imm("-->"), *pos); if (i == -1) *pos = octstr_len(html); else *pos = i;}/* Skip a beginning or ending tag in HTML, including any attributes. */static void skip_html_tag(Octstr *html, long *pos){ long i, len; int c; /* Skip leading '<'. */ ++(*pos); /* Skip name of tag and attributes with values. */ len = octstr_len(html); while (*pos < len && (c = octstr_get_char(html, *pos)) != '>') { if (c == '"' || c == '\'') { i = octstr_search_char(html, c, *pos + 1); if (i == -1) *pos = len; else *pos = i + 1; } else ++(*pos); } /* Skip trailing '>' if it is there. */ if (octstr_get_char(html, *pos) == '>') ++(*pos);}/* Convert an HTML entity into a single character and advance `*html' past the entity. */static void convert_html_entity(Octstr *sms, Octstr *html, long *pos){ static struct { char *entity; int latin1; } tab[] = { { "&", '&' }, { "<", '<' }, { ">", '>' }, /* The following is copied from http://www.hut.fi/~jkorpela/HTML3.2/latin1.html by Jukka Korpela. Hand and script edited to form this table. */ { " ", ' ' }, { "¡", 161 }, { "¢", 162 }, { "£", 163 }, { "¤", 164 }, { "¥", 165 }, { "¦", 166 }, { "§", 167 }, { "¨", 168 }, { "©", 169 }, { "ª", 170 }, { "«", 171 }, { "¬", 172 }, { "­", 173 }, { "®", 174 }, { "¯", 175 }, { "°", 176 }, { "±", 177 }, { "²", 178 }, { "³", 179 }, { "´", 180 }, { "µ", 181 }, { "¶", 182 }, { "·", 183 }, { "¸", 184 }, { "¹", 185 }, { "º", 186 }, { "»", 187 }, { "¼", 188 }, { "½", 189 }, { "¾", 190 }, { "¿", 191 }, { "À", 192 }, { "Á", 193 }, { "Â", 194 }, { "Ã", 195 }, { "Ä", 196 }, { "Å", 197 }, { "Æ", 198 }, { "Ç", 199 }, { "È", 200 }, { "É", 201 }, { "Ê", 202 }, { "Ë", 203 }, { "Ì", 204 }, { "Í", 205 }, { "Î", 206 }, { "Ï", 207 }, { "Ð", 208 }, { "Ñ", 209 }, { "Ò", 210 }, { "Ó", 211 }, { "Ô", 212 }, { "Õ", 213 }, { "Ö", 214 }, { "×", 215 }, { "Ø", 216 }, { "Ù", 217 }, { "Ú", 218 }, { "Û", 219 }, { "Ü", 220 }, { "Ý", 221 }, { "Þ", 222 }, { "ß", 223 }, { "à", 224 }, { "á", 225 }, { "â", 226 }, { "ã", 227 }, { "ä", 228 }, { "å", 229 }, { "æ", 230 }, { "ç", 231 }, { "è", 232 }, { "é", 233 }, { "ê", 234 }, { "ë", 235 }, { "ì", 236 }, { "í", 237 }, { "î", 238 }, { "ï", 239 }, { "ð", 240 }, { "ñ", 241 }, { "ò", 242 }, { "ó", 243 }, { "ô", 244 }, { "õ", 245 }, { "ö", 246 }, { "÷", 247 }, { "ø", 248 }, { "ù", 249 }, { "ú", 250 }, { "û", 251 }, { "ü", 252 }, { "ý", 253 }, { "þ", 254 }, { "ÿ", 255 }, }; int num_tab = sizeof(tab) / sizeof(tab[0]); long i, code; size_t len; char buf[1024]; if (octstr_get_char(html, (*pos) + 1) == '#') { i = octstr_parse_long(&code, html, (*pos) + 2, 10); if (i > 0) { if (code < 256) octstr_append_char(sms, code); *pos = i + 1; if (octstr_get_char(html, *pos) == ';') ++(*pos); } } else { for (i = 0; i < num_tab; ++i) { len = strlen(tab[i].entity); octstr_get_many_chars(buf, html, *pos, len); buf[len] = '\0'; if (strcmp(buf, tab[i].entity) == 0) { *pos += len; octstr_append_char(sms, tab[i].latin1); break; } } if (i == num_tab) { ++(*pos); octstr_append_char(sms, '&'); } }}Octstr *html_to_sms(Octstr *html){ long i, len; int c; Octstr *sms; sms = octstr_create(""); len = octstr_len(html); i = 0; while (i < len) { c = octstr_get_char(html, i); switch (c) { case '<': if (html_comment_begins(html, i)) skip_html_comment(html, &i); else skip_html_tag(html, &i); break; case '&': convert_html_entity(sms, html, &i); break; default: octstr_append_char(sms, c); ++i; break; } } octstr_shrink_blanks(sms); octstr_strip_blanks(sms); return sms;}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?