⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 lex.uri.c

📁 在linux下的crawler程序,来自北大天网tiny search engine spider
💻 C
📖 第 1 页 / 共 5 页
字号:
      284,  282,    0,    0,    0,    0,  284,  282,  282,  284,      282,  282,  283,  283,  296,  283,  283,    0,    0,    0,      296,  283,  283,  296,  283,  283,  292,    0,    0,    0,        0,    0,  292,  292,    0,  292,  292,  294,    0,    0,        0,    0,    0,  294,  294,    0,  294,  294,  298,  298,      298,  298,  298,  298,  298,  298,  298,  298,  298,  298,      298,  298,  298,  298,  298,  298,  298,  298,  298,  298,      298,  298,  298,  298,  299,    0,  302,    0,    0,    0,      299,    0,  302,  299,  300,  302,    0,    0,    0,    0,      300,  300,    0,  300,  300,  301,    0,    0,    0,    0,        0,  301,  301,    0,  301,  301,  303,  303,  303,  303,      303,  303,  303,  303,  303,  303,  303,  303,  303,  303,      303,  303,  303,  303,  303,  303,  303,  303,  303,  303,      303,  303,  304,  304,  310,  304,    0,    0,    0,    0,      310,  304,  304,  310,  304,  304,  305,  305,  305,  305,      305,  305,  305,  305,  305,  305,  305,  305,  305,  305,      305,  305,  305,  305,  305,  305,  305,  305,  305,  305,      305,  305,  306,  306,  306,  306,  306,  306,  306,  306,      306,  306,  306,  306,  306,  306,  306,  306,  306,  306,      306,  306,  306,  306,  306,  306,  306,  306,  307,  307,      313,  307,  307,    0,    0,    0,  313,  307,  307,  313,      307,  307,  316,    0,  316,  316,  316,  316,  316,  316,      316,  316,  316,  316,  316,  316,  316,  316,  316,  316,        0,  316,  316,  316,  316,  316,  316,  316,  317,  320,      321,    0,    0,    0,  317,  320,  321,  317,  320,  321,      323,    0,  323,  323,  323,  323,  323,  323,  323,  323,      323,  323,  323,  323,  323,  323,  323,  323,    0,  323,      323,  323,  323,  323,  323,  323,  324,    0,  327,    0,        0,    0,  324,    0,  327,  324,  325,  327,  325,  325,      325,  325,  325,  325,  325,  325,  325,  325,  325,  325,      325,  325,  325,  325,    0,  325,  325,  325,  325,  325,      325,  325,  326,  326,  326,  326,  326,  326,  326,  326,      326,  326,  326,  326,  326,  326,  326,  326,  326,  326,      326,  326,  326,  326,  326,  326,  326,  326,  328,  328,      328,  328,  328,  328,  328,  328,  328,  328,  328,  328,      328,  328,  328,  328,  328,  328,  328,  328,  328,  328,      328,  328,  328,  328,  330,  332,  335,    0,    0,    0,      330,  332,  335,  330,  332,  335,  336,  340,    0,    0,        0,    0,  336,  340,    0,  336,  340,  348,    0,    0,        0,    0,    0,  348,  348,    0,  348,  348,  349,    0,        0,    0,    0,    0,  349,  349,    0,  349,  349,  353,        0,  357,    0,    0,    0,  353,  353,  357,  353,  353,      357,  358,  358,  358,  358,  358,  358,  358,  358,  358,      358,  358,  358,  358,  358,  358,  358,  358,  358,  358,      358,  358,  358,  358,  358,  358,  358,  360,  363,    0,        0,    0,    0,  360,  363,    0,  360,  363,  364,  364,      364,  364,  364,  364,  364,  364,  364,  364,  364,  364,      364,  364,  364,  364,  364,  364,  364,  364,  364,  364,      364,  364,  364,  364,  365,  365,  365,  365,  365,  365,      365,  365,  365,  365,  365,  365,  365,  365,  365,  365,      365,  365,  365,  365,  365,  365,  365,  365,  365,  365,      372,    0,    0,    0,    0,    0,  372,    0,    0,  372,      373,  373,  373,  373,  373,  373,  373,  373,  373,  373,      373,  373,  373,  373,  373,  373,  373,  373,  373,  373,      373,  373,  373,  373,  373,  373,  374,    0,    0,    0,        0,    0,  374,  374,    0,  374,  374,  375,    0,  376,        0,    0,    0,  375,  375,  376,  375,  375,  376,  377,      377,  384,  377,    0,    0,    0,    0,  384,  377,  377,      384,  377,  377,  378,    0,    0,    0,    0,    0,  378,      378,    0,  378,  378,  379,  379,  385,  379,  379,    0,        0,    0,  385,  379,  379,  385,  379,  379,  383,    0,        0,    0,    0,    0,  383,  383,    0,  383,  383,  386,        0,  386,  386,  386,  386,  386,  386,  386,  386,  386,      386,  386,  386,  386,  386,  386,  386,    0,  386,  386,      386,  386,  386,  386,  386,  387,    0,  395,    0,    0,        0,  387,    0,  395,  387,  388,  395,  388,  388,  388,      388,  388,  388,  388,  388,  388,  388,  388,  388,  388,      388,  388,  388,    0,  388,  388,  388,  388,  388,  388,      388,  391,    0,  391,  391,  391,  391,  391,  391,  391,      391,  391,  391,  391,  391,  391,  391,  391,  391,    0,      391,  391,  391,  391,  391,  391,  391,  398,    0,    0,        0,    0,    0,  398,  398,    0,  398,  398,  402,    0,      405,    0,    0,    0,  402,  402,  405,  402,  402,  405,      408,    0,  410,    0,    0,    0,  408,    0,  410,  408,      409,  410,  413,    0,    0,    0,  409,  409,  413,  409,      409,  413,  414,  414,  414,  414,  414,  414,  414,  414,      414,  414,  414,  414,  414,  414,  414,  414,  414,  414,      414,  414,  414,  414,  414,  414,  414,  414,  417,  420,        0,    0,    0,    0,  417,  420,    0,  417,  420,  421,        0,    0,    0,    0,    0,  421,  421,    0,  421,  421,      422,    0,  423,    0,    0,    0,  422,  422,  423,  422,      422,  423,  424,  424,  426,  424,    0,    0,    0,    0,      426,  424,  424,  426,  424,  424,  425,  425,  448,  425,      425,    0,    0,    0,  448,  425,  425,  448,  425,  425,      434,    0,    0,    0,    0,    0,  434,  434,    0,  434,      434,  436,    0,  450,    0,    0,    0,  436,  436,  450,      436,  436,  450,  453,  454,  458,    0,    0,    0,  453,      454,  458,  453,  454,  458,  466,    0,    0,    0,    0,        0,  466,  466,    0,  466,  466,  467,    0,    0,    0,        0,    0,  467,  467,    0,  467,  467,  471,    0,  479,        0,    0,    0,  471,  471,  479,  471,  471,  479,  482,        0,    0,    0,    0,    0,  482,  482,    0,  482,  482,      486,    0,    0,    0,    0,    0,  486,  486,    0,  486,      486,  514,  514,  515,  515,  516,  516,  517,  517,  518,      518,  519,  519,  520,  520,  521,  521,  513,  513,  513,      513,  513,  513,  513,  513,  513,  513,  513,  513,  513,      513,  513,  513,  513,  513,  513,  513,  513,  513,  513,      513,  513,  513,  513,  513,  513    } ;static yy_state_type yy_state_buf[YY_BUF_SIZE + 2], *yy_state_ptr;static char *yy_full_match;static int yy_lp;static int yy_looking_for_trail_begin = 0;static int yy_full_lp;static int *yy_full_state;#define YY_TRAILING_MASK 0x2000#define YY_TRAILING_HEAD_MASK 0x4000#define REJECT \{ \*yy_cp = yy_hold_char; /* undo effects of setting up yytext */ \yy_cp = yy_full_match; /* restore poss. backed-over text */ \yy_lp = yy_full_lp; /* restore orig. accepting pos. */ \yy_state_ptr = yy_full_state; /* restore orig. state */ \yy_current_state = *yy_state_ptr; /* restore curr. state */ \++yy_lp; \goto find_rule; \}#define yymore() yymore_used_but_not_detected#define YY_MORE_ADJ 0#define YY_RESTORE_YY_MORE_OFFSETchar *yytext;#line 1 "uri.l"#define INITIAL 0/* * uri.l -- Routines dealing with URI, mainly parsing and merging. * Created: Xie Han, OS lab of Peking University. <me@pku.edu> * * The grammar of URI is quite simply so yacc is not needed. * * Created: Sep 25 04:15am 2003. version 0.1.1 *		# Can read URI from standard input, parse it and print every *		  part of the URI. * * Updated: Sep 25 05:10pm 2003. version 0.1.2 * * Updated: Sep 25 10:30pm 2003. version 0.1.3 *		# A lot of changes in the program structure, much more clear. *		# Now the result is written into a uri structure. *		# A little bug. String "?" is accepted as a valid URI. * * Updated: Sep 26 02:16am 2003. version 0.1.4 *		# 2 states are added to solve the bug in version 0.1.3. *		# The uri structure was redesigned. * * Updated: Sep 27 05:21am 2003. version 1.0.0!!!! *		# All states are changed from exclusive to shared to make *		  the lex program more compatible because some lexer do not *		  support exclusive start condition. *		# "unput" is substituted by "yyless" when putting back excess *		  text, because the latter cost less. *		# 4 parsing functions are completed, each of which has different *		  characteristic. *		# Function "uri_rally_parse" is added, which cooperates with *		  other lex programs. * * Updated: Sep 28 1:00pm 2003. version 1.1.0 *		# The function "uri_rel_to_abs" is under construction. *		# The URI structure is changed again: "path_type" field is remove, *		  and "abs_path", "rel_path", "opaque_path" are substituted by *		  "path" because the original design is too sophisticated. * * Updated: National Day 5:06am 2003. version 1.1.1 *		# The function "__uri_path_merge" which merges the base path and *		  the relative path is finished and tested and proved to be a *		  nice job! This function is the core of merging a base URI and *		  a relative URI into an absolute URI. *		# The previous function "uri_rel_to_abs" is commented out. It's *		  terrible. * * Updated: National Day 5:00pm 2003. version 1.1.2 *		# Now we distinguish between "no" and "empty". Related fields *		  include "userinfo", "port", "query", and "fragment". The previous *		  version treats "no" and "empty" equally, which is not right. * * Updated: Oct 2 2:42am 2003. version 1.1.3 *		# Solve the conflict of sever and reg_name by adding a start *		  condition and some ugly codes that are normally not reached. *		# The "uri_rel_to_abs" is rewritten but has not been tested. * * Updated: Oct 4 6:33pm 2003. version 1.2.0 *		# Function "uri_path_merge" now returns the length of the result *		  path while the result path is returned by a pointer. *		# Function "uri_rel_to_abs" is renamed "uri_merge", and it will *		  return the string length of the result URI. *		# Function "uri_recombine" is coded, which recombine the a URI *		  structure into a URI string, and the string length is returned. *		# The URI merging algorithm in RFC 2396 is found not flawless, and *		  the problem is fixed in "uri_merge" with not very graceful means. * * Updated: Oct 5 2:09am 2003. version 1.2.1 *		# From this version function "uri_parse_string" is implemented by *		  "uri_parse_bytes" which from now on is dependent on *		  "uri_parse_buffer". *		# "uri_validate_string" and "uri_validate_bytes" were added. Their *		  names explain what they do. *		# Key word "static" was taken off from the front of function *		  "__uri_path_merge" while it is not declared in the .h file. I *		  mean to make it a secret interface ^_^ * * Updated: Oct 5 4:43am 2003. version 1.2.2 *		# Cross the first change log of version 1.2.1! *		# "uri_validate_string" is no longer implemented by *		  "uri_validate_bytes". * * Updated: Oct 9 4:49am 2003. version 1.2.3 *		# Function "uri_rally_parse" no longer exists, because it cannot *		  work. The original desire that it can cooperate with other lex *		  programs is indeed out of the question. *		# Macro URI_INIT changed slightly. * * Updated: Oct 10 4:28am 2003. version 1.2.4 *		# The arguments sequence of "rel_xxx" and "base_xxx" of functions *		  "__uri_path_merge" and "uri_merge" are exchanged. "rel_xxx" from *		  now on precedes "base_xxx". * * Updated: Oct 11 3:31am 2003. version 1.2.5 *		# URI merging algorithm is changed again. We check if the path of *		  the relative URI is defined before merging path, and if it's not *		  defined, we take the base URI's path directly other than merging *		  the paths. I thinks that the URI merging algorithm is now at the *		  edge of perfect. (The recommended merging algorithm of RFC 2396 *		  has a lot of problems.) *		# The rule of "<REG_NAME>{reg_name}" is optimized, because "strcat" *		  is slow. * * Updated: Oct 12 4:49am 2003. version 1.2.6 *		# "goto" statements are substituted by "do ... while (0)" and *		  "break" in function "uri_merge" and "uri_recombine". No real *		  improvement is made but showing off continues. * * Updated: Oct 12 4:55pm 2003. version 1.2.7 *		# URIs with chinese character are supported in an obviously *		  not perfect way: the "unreserved" BNF allows non-US-ASCII *		  characters, i.e., characters with value that ranges from *		  128 to 255. * * Updated: Oct 13 1:06am 2003. version 1.2.8 *		# Some codes of "__uri_path_merge" was rewritten because the *		  interface of stack module changed. *		# NOTE!!! NULL can no long be passed to "__uri_path_merge" as *		  a relative path or a base path, and the function from now on *		  won't set NULL as the result path. If the result path is *		  empty, the "abs_path" will point to a pointer that points to *		  an empty string ("\0"). So you can always "free(*abs_path)" *		  if "__uri_path_merge" succeeded. * * Updated: Oct 15 1:13am 2003. version 1.4.0 *		# The update of 1.x.x version continues because version 2.0.x *		  runs slower than 1.x.x (half speed), though 2.0.x is written *		  in much shorter and very nice codes. *		# The new uri structure is derived from 1.3.x. 1.3.x is a test *		  version which has been updated into 2.0.0. Indeed, 1.4.x is *		  compasible with 2.0.x completely, but 2.0.x is NOT compasible *		  with 1.4.x because 2.0.x has only one p

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -