parse.c
来自「elinks下lynx是最重要的二个文本浏览器, 在linux下非常实用, el」· C语言 代码 · 共 630 行
C
630 行
/* Parsing of FTP `ls' directory output. *//* Parts of this file was part of GNU Wget * Copyright (C) 1995, 1996, 1997, 2000, 2001 Free Software Foundation, Inc. */#ifdef HAVE_CONFIG_H#include "config.h"#endif#include <errno.h>#include <stdio.h>#include <stdlib.h>#ifdef HAVE_STRING_H# include <string.h>#else# include <strings.h>#endif#ifdef HAVE_UNISTD_H# include <unistd.h>#endif#include <sys/stat.h>#include <sys/types.h>#include "elinks.h"#include "osdep/ascii.h"#include "osdep/stat.h"#include "protocol/date.h"#include "protocol/ftp/parse.h"#include "util/conv.h"#include "util/string.h"#include "util/time.h"/* Examples of what the FTP parser is supposed to handle (and not handle) can * be found in the test-ftp-parser file. */#define skip_space_end(src, end) \ do { while ((src) < (end) && *(src) == ' ') (src)++; } while (0)#define skip_nonspace_end(src, end) \ do { while ((src) < (end) && *(src) != ' ') (src)++; } while (0)static longparse_ftp_number(unsigned char **src, unsigned char *end, long from, long to){ long number = 0; unsigned char *pos = *src; for (; pos < end && isdigit(*pos); pos++) number = (*pos - '0') + 10 * number; *src = pos; if (number < from || to < number) return -1; return number;}/* Parser for the EPLF format (see http://pobox.com/~djb/proto/eplf.txt). * * Lines end with \r\n (CR-LF), but that is handled elsewhere. */enum ftp_eplf { FTP_EPLF_FILENAME = ASCII_TAB, /* Filename follows */ FTP_EPLF_PLAINFILE = 'r', /* RETR is possible */ FTP_EPLF_DIRECTORY = '/', /* CWD is possible */ FTP_EPLF_SIZE = 's', /* File size follows */ FTP_EPLF_MTIME = 'm', /* Modification time follows */ FTP_EPLF_ID = 'i', /* Unique file id follows */};static struct ftp_file_info *parse_ftp_eplf_response(struct ftp_file_info *info, unsigned char *src, int len){ /* Skip the '+'-char which starts the line. */ unsigned char *end = src + len; unsigned char *pos = src++; /* Handle the series of facts about the file. */ for (; src < end && pos; src = pos + 1) { /* Find the end of the current fact. */ pos = memchr(src, ',', end - src); if (!pos) pos = end; switch (*src++) { case FTP_EPLF_FILENAME: if (src >= end) break; info->name.source = src; info->name.length = end - src; return info; case FTP_EPLF_DIRECTORY: info->type = FTP_FILE_DIRECTORY; break; case FTP_EPLF_PLAINFILE: info->type = FTP_FILE_PLAINFILE; break; case FTP_EPLF_SIZE: if (src >= pos) break; info->size = parse_ftp_number(&src, pos, 0, LONG_MAX); break; case FTP_EPLF_MTIME: if (src >= pos) break; info->mtime = (time_t) parse_ftp_number(&src, pos, 0, LONG_MAX); break; case FTP_EPLF_ID: /* Not used */ break; } } return NULL;}/* Parser for UNIX-style listing: */enum ftp_unix { FTP_UNIX_PERMISSIONS, FTP_UNIX_SIZE, FTP_UNIX_DAY, FTP_UNIX_TIME, FTP_UNIX_NAME};/* Converts Un*x-style symbolic permissions to number-style ones, e.g. string * rwxr-xr-x to 755. * Borrowed from lftp source code by Alexander V. Lukyanov. * On parse error, it returns 0. */static intparse_ftp_unix_permissions(const unsigned char *src, int len){ mode_t perms = 0; if (len != 9 && !(len == 10 && src[9] == '+')) /* ACL tag */ return 0; /* User permissions */ switch (src[0]) { case('r'): perms |= S_IRUSR; break; case('-'): break; default: return 0; } switch (src[1]) { case('w'): perms |= S_IWUSR; break; case('-'): break; default: return 0; } switch (src[2]) { case('S'): perms |= S_ISUID; break; case('s'): perms |= S_ISUID; /* fall-through */ case('x'): perms |= S_IXUSR; break; case('-'): break; default: return 0; } /* Group permissions */ switch (src[3]) { case('r'): perms |= S_IRGRP; break; case('-'): break; default: return 0; } switch (src[4]) { case('w'): perms |= S_IWGRP; break; case('-'): break; default: return 0; } switch (src[5]) { case('S'): perms |= S_ISGID; break; case('s'): perms |= S_ISGID; /* fall-through */ case('x'): perms |= S_IXGRP; break; case('-'): break; default: return 0; } /* Others permissions */ switch (src[6]) { case('r'): perms |= S_IROTH; break; case('-'): break; default: return 0; } switch (src[7]) { case('w'): perms |= S_IWOTH; break; case('-'): break; default: return 0; } switch (src[8]) { case('T'): perms |= S_ISVTX; break; case('t'): perms |= S_ISVTX; /* fall-through */ case('x'): perms |= S_IXOTH; break; case('l'): case('L'): perms |= S_ISGID; perms &= ~S_IXGRP; break; case('-'): break; default: return 0; } return perms;}static struct ftp_file_info *parse_ftp_unix_response(struct ftp_file_info *info, unsigned char *src, int len){ unsigned char *end = src + len; unsigned char *pos; struct tm mtime; enum ftp_unix fact; /* Decide the file type. */ { enum ftp_file_type type = *src++; switch (type) { case FTP_FILE_PLAINFILE: case FTP_FILE_DIRECTORY: case FTP_FILE_SYMLINK: info->type = type; break; default: info->type = FTP_FILE_UNKNOWN; } } memset(&mtime, 0, sizeof(mtime)); mtime.tm_isdst = -1; /* Following is only needed to handle NetWare listings which are not * (yet) handled. So disabled for now. --Zas */ /* skip_space_end(src, end); */ fact = FTP_UNIX_PERMISSIONS; for (pos = src; src < end; src = pos) { skip_nonspace_end(pos, end); switch (fact) { case FTP_UNIX_PERMISSIONS: /* We wanna know permissions as well! And I decided to * completely ignore the NetWare perms, they are very * rare and of some nonstandart format. If you want * them, though, I'll accept patch enabling them. * --pasky */ if (pos - src == 9) /* 9 is length of "rwxrwxrwx". */ info->permissions = parse_ftp_unix_permissions(src, 9); fact = FTP_UNIX_SIZE; break; case FTP_UNIX_SIZE: /* Search for the size and month name combo: */ if (info->size != FTP_SIZE_UNKNOWN && pos - src == 3) { int month = parse_month((const unsigned char **) &src, pos); if (month != -1) { fact = FTP_UNIX_DAY; mtime.tm_mon = month; break; } } if (!isdigit(*src)) { info->size = FTP_SIZE_UNKNOWN; break; } info->size = parse_ftp_number(&src, pos, 0, LONG_MAX); break; case FTP_UNIX_DAY: mtime.tm_mday = parse_day((const unsigned char **) &src, pos); fact = FTP_UNIX_TIME; break; case FTP_UNIX_TIME: /* This ought to be either the time, or the * year. Let's be flexible! */ fact = FTP_UNIX_NAME; /* We must deal with digits. */ if (!isdigit (*src)) break; /* If we have a number x, it's a year. If we have x:y, * it's hours and minutes. */ if (!memchr(src, ':', pos - src)) { mtime.tm_year = parse_year((const unsigned char **) &src, pos); break; } if (!parse_time((const unsigned char **) &src, &mtime, pos)) { mtime.tm_hour = mtime.tm_min = mtime.tm_sec = 0; } break; case FTP_UNIX_NAME: /* Since the file name may contain spaces use @end as the * token ending and not @pos. */ info->name.source = src; info->name.length = end - src; /* Some FTP sites choose to have ls -F as their default * LIST output, which marks the symlinks with a trailing * `@', directory names with a trailing `/' and * executables with a trailing `*'. This is no problem * unless encountering a symbolic link ending with `@', * or an executable ending with `*' on a server without * default -F output. I believe these cases are very * rare. */#define check_trailing_char(string, trailchar) \ ((string)->length > 0 \ && (string)->source[(string)->length - 1] == (trailchar)) switch (info->type) { case FTP_FILE_DIRECTORY: /* Check for trailing `/' */ if (check_trailing_char(&info->name, '/')) info->name.length--; break; case FTP_FILE_SYMLINK: /* If the file is a symbolic link, it should * have a ` -> ' somewhere. */ while (pos && pos + 3 < end) { if (!memcmp(pos, " -> ", 4)) { info->symlink.source = pos + 4; info->symlink.length = end - pos - 4; info->name.length = pos - src; break; } pos = memchr(pos, ' ', end - pos); } if (!info->symlink.source) return NULL; /* Check for trailing `@' on link and trailing * `/' on the link target if it's a directory */ if (check_trailing_char(&info->name, '@')) info->name.length--; if (check_trailing_char(&info->symlink, '/')) info->symlink.length--; break; case FTP_FILE_PLAINFILE: /* Check for trailing `*' on files which are * executable. */ if ((info->permissions & 0111) && check_trailing_char(&info->name, '*')) info->name.length--; default: break; } if (mtime.tm_year == 0) { /* Get the current time. */ time_t timenow = time(NULL); struct tm *now = localtime(&timenow); mtime.tm_year = now->tm_year; /* Some listings will not specify the year if it * is "obvious" that the file was from the * previous year. E.g. if today is 97-01-12, and * you see a file of Dec 15th, its year is 1996, * not 1997. Thanks to Vladimir Volovich for * mentioning this! */ if (mtime.tm_mon > now->tm_mon) mtime.tm_year--; } info->mtime = mktime(&mtime); /* store the time-stamp */ info->local_time_zone = 1; return info; } skip_space_end(pos, end); } return NULL;}/* Parser for VMS-style MultiNet (some spaces removed from examples): *//* Converts VMS symbolic permissions to number-style ones, e.g. string * RWED,RWE,RE to 755. "D" (delete) is taken to be equal to "W" (write). * Inspired by a patch of Stoyan Lekov <lekov@eda.bg>. */static intparse_ftp_vms_permissions(const unsigned char *src, int len){ int perms = 0; int pos; for (pos = 0; pos < len; pos++) { switch (src[pos]) { case ',': perms <<= 3; break; case 'R': perms |= 4; break; case 'W': case 'D': perms |= 2; break; case 'E': perms |= 1; break; default: /* Wrong VMS permissons! */ return 0; } } return perms;}static struct ftp_file_info *parse_ftp_vms_response(struct ftp_file_info *info, unsigned char *src, int len){ unsigned char *end = src + len; unsigned char *pos; /* First column: Name. A bit of black magic again. The name maybe either * ABCD.EXT or ABCD.EXT;NUM and it might be on a separate line. * Therefore we will first try to get the complete name until the first * space character; if it fails, we assume that the name occupies the * whole line. After that we search for the version separator ";", we * remove it and check the extension of the file; extension .DIR denotes * directory. */ pos = memchr(src, ';', end - src); if (!pos) return NULL; info->name.source = src; info->name.length = pos - src; /* If the name ends on .DIR or .DIR;#, it's a directory. We also * set the file size to zero as the listing does tell us only * the size in filesystem blocks - for an integrity check (when * mirroring, for example) we would need the size in bytes. */ if (info->name.length > 4 && !memcmp(&pos[-4], ".DIR", 4)) { info->type = FTP_FILE_DIRECTORY; info->name.length -= 4; } else { info->type = FTP_FILE_PLAINFILE; } skip_nonspace_end(pos, end); skip_space_end(pos, end); src = pos; /* Second column, if exists, or the first column of the next line * contain file size in blocks. We will skip it. */ if (src >= end) { /* FIXME: Handle multi-lined views. */ return NULL; } skip_nonspace_end(src, end); skip_space_end(src, end); if (src >= end) return NULL; /* Third/Second column: Date DD-MMM-YYYY and * Fourth/Third column: Time hh:mm[:ss] */ /* If the server produces garbage like * 'EA95_0PS.GZ;1 No privilege for attempted operation' * parse_date() will fail. */ info->mtime = parse_date(&src, end, 1, 0); if (info->mtime == 0) return NULL; /* Be more tolerant from here on ... */ /* Skip the fifth column */ skip_space_end(src, end); skip_nonspace_end(src, end); skip_space_end(src, end); if (src >= end) return info; /* Sixth column: Permissions */ src = memchr(src, '(', end - src); if (!src || src >= end) return info; src++; pos = memchr(src, ')', end - src); if (!pos) return info; /* Permissons have the format "RWED,RWED,RE" */ info->permissions = parse_ftp_vms_permissions(src, pos - src); return info;}/* Parser for the MSDOS-style format: */struct ftp_file_info *parse_ftp_winnt_response(struct ftp_file_info *info, unsigned char *src, int len){ struct tm mtime; unsigned char *end = src + len; /* Extracting name is a bit of black magic and we have to do it * before `strtok' inserted extra \0 characters in the line * string. For the moment let us just suppose that the name starts at * column 39 of the listing. This way we could also recognize * filenames that begin with a series of space characters (but who * really wants to use such filenames anyway?). */ if (len <= 39) return NULL; info->name.source = src + 39; info->name.length = end - src - 39; /* First column: mm-dd-yy. Should number parsing of the month fail, * january will be assumed. */ memset(&mtime, 0, sizeof(mtime)); mtime.tm_isdst = -1; mtime.tm_mon = parse_ftp_number(&src, end, 1, 12); if (src + 2 >= end || *src != '-') return NULL; src++; mtime.tm_mday = parse_day((const unsigned char **) &src, end); if (src + 2 >= end || *src != '-') return NULL; src++; mtime.tm_year = parse_year((const unsigned char **) &src, end); if (src >= end || mtime.tm_year == -1) return NULL; skip_space_end(src, end); if (src >= end) return NULL; /* Second column: hh:mm[AP]M, listing does not contain value for * seconds */ if (!parse_time((const unsigned char **) &src, &mtime, end)) return NULL; /* Store the time-stamp. */ info->mtime = mktime(&mtime); skip_nonspace_end(src, end); skip_space_end(src, end); if (src >= end) return NULL; /* Third column: Either file length, or <DIR>. We also set the * permissions (guessed as 0644 for plain files and 0755 for directories * as the listing does not give us a clue) and filetype here. */ if (*src == '<') { info->type = FTP_FILE_DIRECTORY; info->permissions = S_IRWXU | S_IRGRP | S_IXGRP | S_IROTH | S_IXOTH; } else if (isdigit(*src)) { info->type = FTP_FILE_PLAINFILE; info->size = parse_ftp_number(&src, end, 0, LONG_MAX); info->permissions = S_IRUSR | S_IWUSR | S_IRGRP | S_IROTH; } else { info->type = FTP_FILE_UNKNOWN; } return info;}struct ftp_file_info *parse_ftp_file_info(struct ftp_file_info *info, unsigned char *src, int len){ assert(info && src && len > 0); if_assert_failed return NULL; switch (*src) { case '+': return parse_ftp_eplf_response(info, src, len); case 'b': case 'c': case 'd': case 'l': case 'p': case 's': case '-': break; default: if (memchr(src, ';', len)) return parse_ftp_vms_response(info, src, len); if (isdigit(*src)) return parse_ftp_winnt_response(info, src, len); } return parse_ftp_unix_response(info, src, len);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?