📄 mod_speling.c
字号:
/* Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */#define WANT_BASENAME_MATCH#include "httpd.h"#include "http_core.h"#include "http_config.h"#include "http_log.h"/* mod_speling.c - by Alexei Kosut <akosut@organic.com> June, 1996 * * This module is transparent, and simple. It attempts to correct * misspellings of URLs that users might have entered, namely by checking * capitalizations. If it finds a match, it sends a redirect. * * 08-Aug-1997 <Martin.Kraemer@Mch.SNI.De> * o Upgraded module interface to apache_1.3a2-dev API (more NULL's in * speling_module). * o Integrated tcsh's "spelling correction" routine which allows one * misspelling (character insertion/omission/typo/transposition). * Rewrote it to ignore case as well. This ought to catch the majority * of misspelled requests. * o Commented out the second pass where files' suffixes are stripped. * Given the better hit rate of the first pass, this rather ugly * (request index.html, receive index.db ?!?!) solution can be * omitted. * o wrote a "kind of" html page for mod_speling * * Activate it with "CheckSpelling On" */MODULE_VAR_EXPORT module speling_module;typedef struct { int enabled;} spconfig;/* * Create a configuration specific to this module for a server or directory * location, and fill it with the default settings. * * The API says that in the absence of a merge function, the record for the * closest ancestor is used exclusively. That's what we want, so we don't * bother to have such a function. */static void *mkconfig(pool *p){ spconfig *cfg = ap_pcalloc(p, sizeof(spconfig)); cfg->enabled = 0; return cfg;}/* * Respond to a callback to create configuration record for a server or * vhost environment. */static void *create_mconfig_for_server(pool *p, server_rec *s){ return mkconfig(p);}/* * Respond to a callback to create a config record for a specific directory. */static void *create_mconfig_for_directory(pool *p, char *dir){ return mkconfig(p);}/* * Handler for the CheckSpelling directive, which is FLAG. */static const char *set_speling(cmd_parms *cmd, void *mconfig, int arg){ spconfig *cfg = (spconfig *) mconfig; cfg->enabled = arg; return NULL;}/* * Define the directives specific to this module. This structure is referenced * later by the 'module' structure. */static const command_rec speling_cmds[] ={ { "CheckSpelling", set_speling, NULL, OR_OPTIONS, FLAG, "whether or not to fix miscapitalized/misspelled requests" }, { NULL }};typedef enum { SP_IDENTICAL = 0, SP_MISCAPITALIZED = 1, SP_TRANSPOSITION = 2, SP_MISSINGCHAR = 3, SP_EXTRACHAR = 4, SP_SIMPLETYPO = 5, SP_VERYDIFFERENT = 6} sp_reason;static const char *sp_reason_str[] ={ "identical", "miscapitalized", "transposed characters", "character missing", "extra character", "mistyped character", "common basename",};typedef struct { const char *name; sp_reason quality;} misspelled_file;/* * spdist() is taken from Kernighan & Pike, * _The_UNIX_Programming_Environment_ * and adapted somewhat to correspond better to psychological reality. * (Note the changes to the return values) * * According to Pollock and Zamora, CACM April 1984 (V. 27, No. 4), * page 363, the correct order for this is: * OMISSION = TRANSPOSITION > INSERTION > SUBSTITUTION * thus, it was exactly backwards in the old version. -- PWP * * This routine was taken out of tcsh's spelling correction code * (tcsh-6.07.04) and re-converted to apache data types ("char" type * instead of tcsh's NLS'ed "Char"). Plus it now ignores the case * during comparisons, so is a "approximate strcasecmp()". * NOTE that is still allows only _one_ real "typo", * it does NOT try to correct multiple errors. */static sp_reason spdist(const char *s, const char *t){ for (; ap_tolower(*s) == ap_tolower(*t); t++, s++) { if (*t == '\0') { return SP_MISCAPITALIZED; /* exact match (sans case) */ } } if (*s) { if (*t) { if (s[1] && t[1] && ap_tolower(*s) == ap_tolower(t[1]) && ap_tolower(*t) == ap_tolower(s[1]) && strcasecmp(s + 2, t + 2) == 0) { return SP_TRANSPOSITION; /* transposition */ } if (strcasecmp(s + 1, t + 1) == 0) { return SP_SIMPLETYPO; /* 1 char mismatch */ } } if (strcasecmp(s + 1, t) == 0) { return SP_EXTRACHAR; /* extra character */ } } if (*t && strcasecmp(s, t + 1) == 0) { return SP_MISSINGCHAR; /* missing character */ } return SP_VERYDIFFERENT; /* distance too large to fix. */}static int sort_by_quality(const void *left, const void *rite){ return (int) (((misspelled_file *) left)->quality) - (int) (((misspelled_file *) rite)->quality);}static int check_speling(request_rec *r){ spconfig *cfg; char *good, *bad, *postgood, *url; int filoc, dotloc, urlen, pglen; DIR *dirp; struct DIR_TYPE *dir_entry; array_header *candidates = NULL; cfg = ap_get_module_config(r->per_dir_config, &speling_module); if (!cfg->enabled) { return DECLINED; } /* We only want to worry about GETs */ if (r->method_number != M_GET) { return DECLINED; } /* We've already got a file of some kind or another */ if (r->proxyreq != NOT_PROXY || (r->finfo.st_mode != 0)) { return DECLINED; } /* This is a sub request - don't mess with it */ if (r->main) { return DECLINED; } /* * The request should end up looking like this: * r->uri: /correct-url/mispelling/more * r->filename: /correct-file/mispelling r->path_info: /more * * So we do this in steps. First break r->filename into two pieces */ filoc = ap_rind(r->filename, '/'); /* * Don't do anything if the request doesn't contain a slash, or * requests "/" */ if (filoc == -1 || strcmp(r->uri, "/") == 0) { return DECLINED; } /* good = /correct-file */ good = ap_pstrndup(r->pool, r->filename, filoc); /* bad = mispelling */ bad = ap_pstrdup(r->pool, r->filename + filoc + 1); /* postgood = mispelling/more */ postgood = ap_pstrcat(r->pool, bad, r->path_info, NULL); urlen = strlen(r->uri); pglen = strlen(postgood); /* Check to see if the URL pieces add up */ if (strcmp(postgood, r->uri + (urlen - pglen))) { return DECLINED; } /* url = /correct-url */ url = ap_pstrndup(r->pool, r->uri, (urlen - pglen)); /* Now open the directory and do ourselves a check... */ dirp = ap_popendir(r->pool, good); if (dirp == NULL) { /* Oops, not a directory... */ return DECLINED; } candidates = ap_make_array(r->pool, 2, sizeof(misspelled_file)); dotloc = ap_ind(bad, '.'); if (dotloc == -1) { dotloc = strlen(bad);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -