📄 htmlparse.c
字号:
type = M_TITLE; } else if (caseless_equal(str, MT_FIXED)) { type = M_FIXED; } else if (caseless_equal(str, MT_BOLD)) { type = M_BOLD; } else if (caseless_equal(str, MT_ITALIC)) { type = M_ITALIC; } else if (caseless_equal(str, MT_EMPHASIZED)) { type = M_EMPHASIZED; } else if (caseless_equal(str, MT_STRONG)) { type = M_STRONG; } else if (caseless_equal(str, MT_CODE)) { type = M_CODE; } else if (caseless_equal(str, MT_SAMPLE)) { type = M_SAMPLE; } else if (caseless_equal(str, MT_KEYBOARD)) { type = M_KEYBOARD; } else if (caseless_equal(str, MT_VARIABLE)) { type = M_VARIABLE; } else if (caseless_equal(str, MT_CITATION)) { type = M_CITATION; } else if (caseless_equal(str, MT_STRIKEOUT)) { type = M_STRIKEOUT; } else if (caseless_equal(str, MT_HEADER_1)) { type = M_HEADER_1; } else if (caseless_equal(str, MT_HEADER_2)) { type = M_HEADER_2; } else if (caseless_equal(str, MT_HEADER_3)) { type = M_HEADER_3; } else if (caseless_equal(str, MT_HEADER_4)) { type = M_HEADER_4; } else if (caseless_equal(str, MT_HEADER_5)) { type = M_HEADER_5; } else if (caseless_equal(str, MT_HEADER_6)) { type = M_HEADER_6; } else if (caseless_equal(str, MT_ADDRESS)) { type = M_ADDRESS; } else if (caseless_equal(str, MT_PLAIN_TEXT)) { type = M_PLAIN_TEXT; } else if (caseless_equal(str, MT_LISTING_TEXT)) { type = M_LISTING_TEXT; } else if (caseless_equal(str, MT_PLAIN_FILE)) { type = M_PLAIN_FILE; } else if (caseless_equal(str, MT_PARAGRAPH)) { type = M_PARAGRAPH; } else if (caseless_equal(str, MT_UNUM_LIST)) { type = M_UNUM_LIST; } else if (caseless_equal(str, MT_NUM_LIST)) { type = M_NUM_LIST; } else if (caseless_equal(str, MT_MENU)) { type = M_MENU; } else if (caseless_equal(str, MT_DIRECTORY)) { type = M_DIRECTORY; } else if (caseless_equal(str, MT_LIST_ITEM)) { type = M_LIST_ITEM; } else if (caseless_equal(str, MT_DESC_LIST)) { type = M_DESC_LIST; } else if (caseless_equal(str, MT_DESC_TITLE)) { type = M_DESC_TITLE; } else if (caseless_equal(str, MT_DESC_TEXT)) { type = M_DESC_TEXT; } else if (caseless_equal(str, MT_PREFORMAT)) { type = M_PREFORMAT; } else if (caseless_equal(str, MT_BLOCKQUOTE)) { type = M_BLOCKQUOTE; } else if (caseless_equal(str, MT_INDEX)) { type = M_INDEX; } else if (caseless_equal(str, MT_HRULE)) { type = M_HRULE; } else if (caseless_equal(str, MT_BASE)) { type = M_BASE; } else if (caseless_equal(str, MT_LINEBREAK)) { type = M_LINEBREAK; } else if (caseless_equal(str, MT_IMAGE)) { type = M_IMAGE; } else if (caseless_equal(str, MT_FIGURE)) { type = M_FIGURE; } else if (caseless_equal(str, MT_SELECT)) { type = M_SELECT; } else if (caseless_equal(str, MT_OPTION)) { type = M_OPTION; } else if (caseless_equal(str, MT_INPUT)) { type = M_INPUT; } else if (caseless_equal(str, MT_TEXTAREA)) { type = M_TEXTAREA; } else if (caseless_equal(str, MT_FORM)) { type = M_FORM; }/*amb*/ else if (caseless_equal(str, MT_SUP)) { type = M_SUP; } else if (caseless_equal(str, MT_SUB)) { type = M_SUB; } else if (caseless_equal(str, MT_DOC_HEAD)) { type = M_DOC_HEAD; } else if (caseless_equal(str, MT_UNDERLINED)) { type = M_UNDERLINED; } else if (caseless_equal(str, MT_DOC_BODY)) { type = M_DOC_BODY; } else if (caseless_equal(str, MT_TABLE)) { type = M_TABLE; } else if (caseless_equal(str, MT_CAPTION)) { type = M_CAPTION; } else if (caseless_equal(str, MT_TABLE_ROW)) { type = M_TABLE_ROW; } else if (caseless_equal(str, MT_TABLE_HEADER)) { type = M_TABLE_HEADER; } else if (caseless_equal(str, MT_TABLE_DATA)) { type = M_TABLE_DATA; } else if (caseless_equal(str, MT_MAP)) { type=M_MAP; } else if (caseless_equal(str, MT_META)) { type=M_META; } else {#ifdef VERBOSE errorlog("warning: unknown mark (%s)\n", str);#endif type = M_UNKNOWN; } *tptr = tchar; return(type);}/* * Parse a single anchor tag. ptrp is a pointer to a pointer to the * string to be parsed. On return, the ptr should be changed to * point to after the text we have parsed. * On return start and end should point to the beginning, and just * after the end of the tag's name in the original anchor string. * Finally the function returns the tag value in a malloced buffer. */char *AnchorTag(ptrp, startp, endp) char **ptrp; char **startp; char **endp;{ char *tag_val; char *ptr; char *start; char tchar; int quoted; int has_value; quoted = 0; /* * remove leading spaces, and set start */ ptr = *ptrp; while (isspace((int)*ptr)) { ptr++; } *startp = ptr; /* * Find and set the end of the tag */ while ((!isspace((int)*ptr))&&(*ptr != '=')&&(*ptr != '\0')) { ptr++; } *endp = ptr; has_value=0; if (*ptr == '\0') { *ptrp = ptr;/* return(NULL);*/ /* try to handle <A NAME=blah></A> correctly -bjs*/ } else { /* * Move to the start of the tag value, if there is one. */ while ((isspace((int)*ptr))||(*ptr == '=')) { if (*ptr == '=') { has_value = 1; } ptr++; } } /* * For a tag with no value, this is a boolean flag. * Return the string "1" so we know the tag is there. */ if (!has_value) { *ptrp = *endp; /* * set a tag value of 1. */ tag_val = (char *)malloc(strlen("1") + 1); if (tag_val == NULL) { errorlog(stderr, "can't malloc space for tag value\n"); return(NULL); } strcpy(tag_val, "1"); return(tag_val); } if (*ptr == '\"') { quoted = 1; ptr++; } start = ptr; /* * Get tag value. Either a quoted string or a single word */ if (quoted) { while ((*ptr != '\"')&&(*ptr != '\0')) { ptr++; } } else { while ((!isspace((int)*ptr))&&(*ptr != '\0')) { ptr++; } }/* amb - everyone forgets the end quotes on anchor attributes, so we'll let it slide *//* if ((quoted)&&(*ptr == '\0')) { *ptrp = ptr; return(NULL); }*/ /* * Copy the tag value out into a malloced string */ tchar = *ptr; *ptr = '\0'; tag_val = (char *)malloc(strlen(start) + 1); if (tag_val == NULL) { errorlog("can't malloc space for tag value\n"); *ptr = tchar; *ptrp = ptr; return(NULL); } strcpy(tag_val, start); *ptr = tchar; /* If you forgot the end quote, you need to make sure you aren't indexing ptr past the end of its own array -- SWP */ if (quoted && *ptr!='\0') { ptr++; } *ptrp = ptr; return(tag_val);}/* * Parse mark text for the value associated with the * passed mark tag. * If the passed tag is not found, return NULL. * If the passed tag is found but has no value, return "". */char* ParseMarkTag(text, mtext, mtag) char *text; char *mtext; char *mtag;{ char *ptr; char *start; char *end; char *tag_val; char tchar; if ((text == NULL)||(mtext == NULL)||(mtag == NULL)) { return(NULL); } ptr = (char *)(text + strlen(mtext)); while (*ptr != '\0') { tag_val = AnchorTag(&ptr, &start, &end); tchar = *end; *end = '\0'; if (caseless_equal(start, mtag)) { *end = tchar; if (tag_val == NULL) { tag_val = (char *)malloc(1); *tag_val = '\0'; return(tag_val); } else { return(tag_val); } } *end = tchar; if (tag_val != NULL) { free(tag_val); } } return(NULL);}/* HTMLlists.c *//* * Code to manage a linked list of parsed HTML objects generated * from a raw text file. * Also code to manage a linked list of formatted elements that * make up a page of a formatted document. *//* * Free up the passed linked list of parsed elements, freeing * all memory associates with each element. */static void FreeObjList(List) struct mark_up *List;{ struct mark_up *current; struct mark_up *mptr; current = List; while (current != NULL) { mptr = current; current = current->next; mptr->next = NULL; if (mptr->start != NULL) { free((char *) mptr->start); } if (mptr->text != NULL) { free((char *) mptr->text); } if (mptr->end != NULL) { free((char *) mptr->end); } free((char *) mptr); }}/* * Add an object to the parsed object list. * return a pointer to the current (end) position in the list. * If the object is a normal text object containing nothing but * white space, throw it out, unless we have been told to keep * white space. */static struct mark_up * AddObj(listp, current, mark, keep_wsp) struct mark_up **listp; struct mark_up *current; struct mark_up *mark; int keep_wsp;{ if (mark == NULL) { return (current); } /* * Throw out normal text blocks that are only white space, * unless keep_wsp is set. */ if ((mark->type == M_NONE) && (!keep_wsp)) { char *ptr; ptr = mark->text; if (ptr == NULL) { free((char *) mark); return (current); } /* * No longer throw out whitespace, it is important to keep * white space between tags. while ((*ptr == ' ')||(*ptr == '\t')||(*ptr == '\n')) { ptr++; } * */ if (*ptr == '\0') { free(mark->text); free((char *) mark); return (current); } } /* * Add object to either the head of the list for a new list, * or at the end after the current pointer. */ if (*listp == NULL) { *listp = mark; current = *listp; } else { current->next = mark; current = current->next; } current->next = NULL; return (current);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -