📄 diff_file.c
字号:
/* * diff_file.c : routines for doing diffs on files * * ==================================================================== * Copyright (c) 2000-2006 CollabNet. All rights reserved. * * This software is licensed as described in the file COPYING, which * you should have received as part of this distribution. The terms * are also available at http://subversion.tigris.org/license-1.html. * If newer versions of this license are posted there, you may use a * newer version instead, at your option. * * This software consists of voluntary contributions made by many * individuals. For exact contribution history, see the revision * history and logs, available at http://subversion.tigris.org/. * ==================================================================== */#include <apr.h>#include <apr_pools.h>#include <apr_general.h>#include <apr_file_io.h>#include <apr_file_info.h>#include <apr_time.h>#include <apr_mmap.h>#include <apr_getopt.h>#include "svn_error.h"#include "svn_diff.h"#include "svn_types.h"#include "svn_string.h"#include "svn_io.h"#include "svn_utf.h"#include "svn_pools.h"#include "svn_ctype.h"#include "diff.h"#include "svn_private_config.h"/* A token, i.e. a line read from a file. */typedef struct svn_diff__file_token_t{ /* Next token in free list. */ struct svn_diff__file_token_t *next; svn_diff_datasource_e datasource; /* Offset in the datasource. */ apr_off_t offset; /* Total length - before normalization. */ apr_off_t raw_length; /* Total length - after normalization. */ apr_off_t length;} svn_diff__file_token_t;/* State used when normalizing whitespace and EOL styles. */typedef enum normalize_state_t{ /* Initial state; not in a sequence of whitespace. */ state_normal, /* We're in a sequence of whitespace characters. Only entered if we ignore whitespace. */ state_whitespace, /* The previous character was CR. */ state_cr} normalize_state_t; typedef struct svn_diff__file_baton_t{ const svn_diff_file_options_t *options; const char *path[4]; apr_file_t *file[4]; apr_off_t size[4]; int chunk[4]; char *buffer[4]; char *curp[4]; char *endp[4]; /* List of free tokens that may be reused. */ svn_diff__file_token_t *tokens; normalize_state_t normalize_state[4]; apr_pool_t *pool;} svn_diff__file_baton_t;/* Look for the start of an end-of-line sequence (i.e. CR or LF) * in the array pointed to by BUF, of length LEN. * If such a byte is found, return the pointer to it, else return NULL. */static char *find_eol_start(char *buf, apr_size_t len){ for (; len > 0; ++buf, --len) { if (*buf == '\n' || *buf == '\r') return buf; } return NULL;} static intsvn_diff__file_datasource_to_index(svn_diff_datasource_e datasource){ switch (datasource) { case svn_diff_datasource_original: return 0; case svn_diff_datasource_modified: return 1; case svn_diff_datasource_latest: return 2; case svn_diff_datasource_ancestor: return 3; } return -1;}/* Files are read in chunks of 128k. There is no support for this number * whatsoever. If there is a number someone comes up with that has some * argumentation, let's use that. */#define CHUNK_SHIFT 17#define CHUNK_SIZE (1 << CHUNK_SHIFT)#define chunk_to_offset(chunk) ((chunk) << CHUNK_SHIFT)#define offset_to_chunk(offset) ((offset) >> CHUNK_SHIFT)#define offset_in_chunk(offset) ((offset) & (CHUNK_SIZE - 1))/* Read a chunk from a FILE into BUFFER, starting from OFFSET, going for * *LENGTH. The actual bytes read are stored in *LENGTH on return. */static APR_INLINE svn_error_t *read_chunk(apr_file_t *file, const char *path, char *buffer, apr_size_t length, apr_off_t offset, apr_pool_t *pool){ /* XXX: The final offset may not be the one we asked for. * XXX: Check. */ SVN_ERR(svn_io_file_seek(file, APR_SET, &offset, pool)); SVN_ERR(svn_io_file_read_full(file, buffer, length, NULL, pool)); return SVN_NO_ERROR;}/* Map or read a file at PATH. *BUFFER will point to the file * contents; if the file was mapped, *FILE and *MM will contain the * mmap context; otherwise they will be NULL. SIZE will contain the * file size. Allocate from POOL. */#if APR_HAS_MMAP#define MMAP_T_PARAM(NAME) apr_mmap_t **NAME,#define MMAP_T_ARG(NAME) &(NAME),#else#define MMAP_T_PARAM(NAME)#define MMAP_T_ARG(NAME)#endifstatic svn_error_t *map_or_read_file(apr_file_t **file, MMAP_T_PARAM(mm) char **buffer, apr_off_t *size, const char *path, apr_pool_t *pool){ apr_finfo_t finfo; apr_status_t rv; *buffer = NULL; SVN_ERR(svn_io_file_open(file, path, APR_READ, APR_OS_DEFAULT, pool)); SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, *file, pool));#if APR_HAS_MMAP if (finfo.size > APR_MMAP_THRESHOLD) { rv = apr_mmap_create(mm, *file, 0, finfo.size, APR_MMAP_READ, pool); if (rv == APR_SUCCESS) { *buffer = (*mm)->mm; } /* On failure we just fall through and try reading the file into * memory instead. */ }#endif /* APR_HAS_MMAP */ if (*buffer == NULL && finfo.size > 0) { *buffer = apr_palloc(pool, finfo.size); SVN_ERR(svn_io_file_read_full(*file, *buffer, finfo.size, NULL, pool)); /* Since we have the entire contents of the file we can * close it now. */ SVN_ERR(svn_io_file_close(*file, pool)); *file = NULL; } *size = finfo.size; return SVN_NO_ERROR;}/* Implements svn_diff_fns_t::datasource_open */static svn_error_t *svn_diff__file_datasource_open(void *baton, svn_diff_datasource_e datasource){ svn_diff__file_baton_t *file_baton = baton; int idx; apr_finfo_t finfo; apr_size_t length; char *curp; char *endp; idx = svn_diff__file_datasource_to_index(datasource); SVN_ERR(svn_io_file_open(&file_baton->file[idx], file_baton->path[idx], APR_READ, APR_OS_DEFAULT, file_baton->pool)); SVN_ERR(svn_io_file_info_get(&finfo, APR_FINFO_SIZE, file_baton->file[idx], file_baton->pool)); file_baton->size[idx] = finfo.size; length = finfo.size > CHUNK_SIZE ? CHUNK_SIZE : finfo.size; if (length == 0) return SVN_NO_ERROR; endp = curp = apr_palloc(file_baton->pool, length); endp += length; file_baton->buffer[idx] = file_baton->curp[idx] = curp; file_baton->endp[idx] = endp; SVN_ERR(read_chunk(file_baton->file[idx], file_baton->path[idx], curp, length, 0, file_baton->pool)); return SVN_NO_ERROR;}/* Implements svn_diff_fns_t::datasource_close */static svn_error_t *svn_diff__file_datasource_close(void *baton, svn_diff_datasource_e datasource){ /* Do nothing. The compare_token function needs previous datasources * to stay available until all datasources are processed. */ return SVN_NO_ERROR;}/* Normalize the characters pointed to by BUF of length *LENGTTHP, starting * in state *STATEP according to the OPTIONS. * Adjust *LENGTHP and *STATEP to be the length of the normalized buffer and * the final state, respectively. * The normalization is done in-place, so the new length will be <= the old. */static voidnormalize(char *buf, apr_off_t *lengthp, normalize_state_t *statep, const svn_diff_file_options_t *opts){ char *curp, *endp; /* Start of next chunk to copy. */ char *start = buf; /* The current end of the normalized buffer. */ char *newend = buf; normalize_state_t state = *statep; /* If this is a noop, then just get out of here. */ if (! opts->ignore_space && ! opts->ignore_eol_style) return; for (curp = buf, endp = buf + *lengthp; curp != endp; ++curp) { switch (state) { case state_cr: state = state_normal; if (*curp == '\n' && opts->ignore_eol_style) { start = curp + 1; break; } /* Else, fall through. */ case state_normal: if (svn_ctype_isspace(*curp)) { /* Flush non-ws characters. */ if (newend != start) memmove(newend, start, curp - start); newend += curp - start; start = curp; switch (*curp) { case '\r': state = state_cr; if (opts->ignore_eol_style) { /* Replace this CR with an LF; if we're followed by an LF, that will be ignored. */ *newend++ = '\n'; ++start; } break; case '\n': break; default: /* Some other whitespace character. */ if (opts->ignore_space) { state = state_whitespace; if (opts->ignore_space == svn_diff_file_ignore_space_change) *newend++ = ' '; } break; } } break; case state_whitespace: /* This is only entered if we're ignoring whitespace. */ if (svn_ctype_isspace(*curp)) switch (*curp) { case '\r': state = state_cr; if (opts->ignore_eol_style) { *newend++ = '\n'; start = curp + 1; } else start = curp; break; case '\n': state = state_normal; start = curp; break; default: break; } else { /* Non-whitespace character. */ start = curp; state = state_normal; } break; } } /* If we're not in whitespace, flush the last chunk of data. * Note that this will work correctly when this is the last chunk of the * file: * * If there is an eol, it will either have been output when we entered * the state_cr, or it will be output now. * * If there is no eol and we're not in whitespace, then we just output * everything below. * * If there's no eol and we are in whitespace, we want to ignore * whitespace unconditionally. */ if (state != state_whitespace) { if (start != newend) memmove(newend, start, curp - start); newend += curp - start; } *lengthp = newend - buf; *statep = state;}/* Implements svn_diff_fns_t::datasource_get_next_token */static svn_error_t *svn_diff__file_datasource_get_next_token(apr_uint32_t *hash, void **token, void *baton, svn_diff_datasource_e datasource){ svn_diff__file_baton_t *file_baton = baton; svn_diff__file_token_t *file_token; int idx; char *endp; char *curp; char *eol; int last_chunk; apr_off_t length; apr_uint32_t h = 0; /* Did the last chunk end in a CR character? */ svn_boolean_t had_cr = FALSE; *token = NULL;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -