📄 fstrcmp.c
字号:
int k; for (k = 1; xv[x - k] == yv[y - k]; k++) { if (k == SNAKE_LIMIT) { best = v; part->xmid = x; part->ymid = y; break; } } } } } if (best > 0) { part->lo_minimal = 1; part->hi_minimal = 0; return 2 * c - 1; } best = 0; for (d = bmax; d >= bmin; d -= 2) { int dd; int x; int y; int v; dd = d - bmid; x = bd[d]; y = x - d; v = (xlim - x) * 2 + dd; if (v > 12 * (c + (dd < 0 ? -dd : dd))) { if (v > best && xoff < x && x <= xlim - SNAKE_LIMIT && yoff < y && y <= ylim - SNAKE_LIMIT) { /* We have a good enough best diagonal; now insist that it end with a significant snake. */ int k; for (k = 0; xv[x + k] == yv[y + k]; k++) { if (k == SNAKE_LIMIT - 1) { best = v; part->xmid = x; part->ymid = y; break; } } } } } if (best > 0) { part->lo_minimal = 0; part->hi_minimal = 1; return 2 * c - 1; } }#endif /* MINUS_H_FLAG */ /* Heuristic: if we've gone well beyond the call of duty, give up and report halfway between our best results so far. */ if (c >= too_expensive) { int fxybest; int fxbest; int bxybest; int bxbest; /* Pacify `gcc -Wall'. */ fxbest = 0; bxbest = 0; /* Find forward diagonal that maximizes X + Y. */ fxybest = -1; for (d = fmax; d >= fmin; d -= 2) { int x; int y; x = fd[d] < xlim ? fd[d] : xlim; y = x - d; if (ylim < y) { x = ylim + d; y = ylim; } if (fxybest < x + y) { fxybest = x + y; fxbest = x; } } /* Find backward diagonal that minimizes X + Y. */ bxybest = INT_MAX; for (d = bmax; d >= bmin; d -= 2) { int x; int y; x = xoff > bd[d] ? xoff : bd[d]; y = x - d; if (y < yoff) { x = yoff + d; y = yoff; } if (x + y < bxybest) { bxybest = x + y; bxbest = x; } } /* Use the better of the two diagonals. */ if ((xlim + ylim) - bxybest < fxybest - (xoff + yoff)) { part->xmid = fxbest; part->ymid = fxybest - fxbest; part->lo_minimal = 1; part->hi_minimal = 0; } else { part->xmid = bxbest; part->ymid = bxybest - bxbest; part->lo_minimal = 0; part->hi_minimal = 1; } return 2 * c - 1; } }}/* NAME compareseq - find edit sequence SYNOPSIS void compareseq(int xoff, int xlim, int yoff, int ylim, int minimal); DESCRIPTION Compare in detail contiguous subsequences of the two strings which are known, as a whole, to match each other. The subsequence of string 0 is [XOFF, XLIM) and likewise for string 1. Note that XLIM, YLIM are exclusive bounds. All character numbers are origin-0. If MINIMAL is nonzero, find a minimal difference no matter how expensive it is. */static void compareseq PARAMS ((int, int, int, int, int));static voidcompareseq (xoff, xlim, yoff, ylim, minimal) int xoff; int xlim; int yoff; int ylim; int minimal;{ const char *const xv = string[0].data; /* Help the compiler. */ const char *const yv = string[1].data; if (string[1].edit_count + string[0].edit_count > max_edits) return; /* Slide down the bottom initial diagonal. */ while (xoff < xlim && yoff < ylim && xv[xoff] == yv[yoff]) { ++xoff; ++yoff; } /* Slide up the top initial diagonal. */ while (xlim > xoff && ylim > yoff && xv[xlim - 1] == yv[ylim - 1]) { --xlim; --ylim; } /* Handle simple cases. */ if (xoff == xlim) { while (yoff < ylim) { ++string[1].edit_count; ++yoff; } } else if (yoff == ylim) { while (xoff < xlim) { ++string[0].edit_count; ++xoff; } } else { int c; struct partition part; /* Find a point of correspondence in the middle of the strings. */ c = diag (xoff, xlim, yoff, ylim, minimal, &part); if (c == 1) {#if 0 /* This should be impossible, because it implies that one of the two subsequences is empty, and that case was handled above without calling `diag'. Let's verify that this is true. */ abort ();#else /* The two subsequences differ by a single insert or delete; record it and we are done. */ if (part.xmid - part.ymid < xoff - yoff) ++string[1].edit_count; else ++string[0].edit_count;#endif } else { /* Use the partitions to split this problem into subproblems. */ compareseq (xoff, part.xmid, yoff, part.ymid, part.lo_minimal); compareseq (part.xmid, xlim, part.ymid, ylim, part.hi_minimal); } }}/* NAME fstrcmp - fuzzy string compare SYNOPSIS double fstrcmp(const char *, const char *, double); DESCRIPTION The fstrcmp function may be used to compare two string for similarity. It is very useful in reducing "cascade" or "secondary" errors in compilers or other situations where symbol tables occur. RETURNS double; 0 if the strings are entirly dissimilar, 1 if the strings are identical, and a number in between if they are similar. */doublefstrcmp (const char *string1, const char *string2, double minimum){ int i; size_t fdiag_len; static int *fdiag_buf; static size_t fdiag_max; /* set the info for each string. */ string[0].data = string1; string[0].data_length = strlen (string1); string[1].data = string2; string[1].data_length = strlen (string2); /* short-circuit obvious comparisons */ if (string[0].data_length == 0 && string[1].data_length == 0) return 1.0; if (string[0].data_length == 0 || string[1].data_length == 0) return 0.0; /* Set TOO_EXPENSIVE to be approximate square root of input size, bounded below by 256. */ too_expensive = 1; for (i = string[0].data_length + string[1].data_length; i != 0; i >>= 2) too_expensive <<= 1; if (too_expensive < 256) too_expensive = 256; /* Because fstrcmp is typically called multiple times, while scanning symbol tables, etc, attempt to minimize the number of memory allocations performed. Thus, we use a static buffer for the diagonal vectors, and never free them. */ fdiag_len = string[0].data_length + string[1].data_length + 3; if (fdiag_len > fdiag_max) { fdiag_max = fdiag_len; fdiag_buf = realloc (fdiag_buf, fdiag_max * (2 * sizeof (int))); } fdiag = fdiag_buf + string[1].data_length + 1; bdiag = fdiag + fdiag_len; max_edits = 1 + (string[0].data_length + string[1].data_length) * (1. - minimum); /* Now do the main comparison algorithm */ string[0].edit_count = 0; string[1].edit_count = 0; compareseq (0, string[0].data_length, 0, string[1].data_length, 0); /* The result is ((number of chars in common) / (average length of the strings)). This is admittedly biased towards finding that the strings are similar, however it does produce meaningful results. */ return ((double) (string[0].data_length + string[1].data_length - string[1].edit_count - string[0].edit_count) / (string[0].data_length + string[1].data_length));}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -