📄 reps-strings.c
字号:
Return -1 if offset is beyond the end of the represented data.
### The basic assumption is that all delta windows are the same size
and aligned at the same offset, so this number is the same in all
dependent deltas. Oh, and the chunks in REP must be ordered. */
static int
get_chunk_offset (representation_t *rep,
svn_filesize_t rep_offset,
apr_size_t *chunk_offset)
{
const apr_array_header_t *chunks = rep->contents.delta.chunks;
int cur_chunk;
assert (chunks->nelts);
/* ### Yes, this is a linear search. I'll change this to bisection
the very second we notice it's slowing us down. */
for (cur_chunk = 0; cur_chunk < chunks->nelts; ++cur_chunk)
{
const rep_delta_chunk_t *const this_chunk
= APR_ARRAY_IDX (chunks, cur_chunk, rep_delta_chunk_t*);
if ((this_chunk->offset + this_chunk->size) > rep_offset)
{
assert (this_chunk->offset <= rep_offset);
assert (rep_offset - this_chunk->offset < SVN_MAX_OBJECT_SIZE);
*chunk_offset = (apr_size_t) (rep_offset - this_chunk->offset);
return cur_chunk;
}
}
return -1;
}
/* Copy into BUF *LEN bytes starting at OFFSET from the string
represented via REP_KEY in FS, as part of TRAIL.
The number of bytes actually copied is stored in *LEN. */
static svn_error_t *
rep_read_range (svn_fs_t *fs,
const char *rep_key,
svn_filesize_t offset,
char *buf,
apr_size_t *len,
trail_t *trail)
{
representation_t *rep;
apr_size_t chunk_offset;
/* Read in our REP. */
SVN_ERR (svn_fs_bdb__read_rep (&rep, fs, rep_key, trail));
if (rep->kind == rep_kind_fulltext)
{
SVN_ERR (svn_fs_bdb__string_read (fs, rep->contents.fulltext.string_key,
buf, offset, len, trail));
}
else if (rep->kind == rep_kind_delta)
{
const int cur_chunk = get_chunk_offset (rep, offset, &chunk_offset);
if (cur_chunk < 0)
*len = 0;
else
{
/* Make a list of all the rep's we need to undeltify this range.
We'll have to read them within this trail anyway, so we might
as well do it once and up front. */
apr_array_header_t *reps = /* ### what constant here? */
apr_array_make (trail->pool, 666, sizeof (rep));
do
{
const rep_delta_chunk_t *const first_chunk
= APR_ARRAY_IDX (rep->contents.delta.chunks,
0, rep_delta_chunk_t*);
const rep_delta_chunk_t *const chunk
= APR_ARRAY_IDX (rep->contents.delta.chunks,
cur_chunk, rep_delta_chunk_t*);
/* Verify that this chunk is of the same version as the first. */
if (first_chunk->version != chunk->version)
return svn_error_createf
(SVN_ERR_FS_CORRUPT, NULL,
"Diff version inconsistencies in representation '%s'",
rep_key);
rep_key = chunk->rep_key;
*(representation_t**) apr_array_push (reps) = rep;
SVN_ERR (svn_fs_bdb__read_rep (&rep, fs, rep_key, trail));
}
while (rep->kind == rep_kind_delta
&& rep->contents.delta.chunks->nelts > cur_chunk);
/* Right. We've either just read the fulltext rep, a rep that's
too short, in which case we'll undeltify without source data.*/
if (rep->kind != rep_kind_delta && rep->kind != rep_kind_fulltext)
abort(); /* unknown kind */
if (rep->kind == rep_kind_delta)
rep = NULL; /* Don't use source data */
SVN_ERR (rep_undeltify_range (fs, reps, rep, cur_chunk,
buf, chunk_offset, len, trail));
}
}
else /* unknown kind */
abort ();
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_base__get_mutable_rep (const char **new_rep_key,
const char *rep_key,
svn_fs_t *fs,
const char *txn_id,
trail_t *trail)
{
representation_t *rep = NULL;
const char *new_str = NULL;
/* We were passed an existing REP_KEY, so examine it. If it is
mutable already, then just return REP_KEY as the mutable result
key. */
if (rep_key && (rep_key[0] != '\0'))
{
SVN_ERR (svn_fs_bdb__read_rep (&rep, fs, rep_key, trail));
if (rep_is_mutable (rep, txn_id))
{
*new_rep_key = rep_key;
return SVN_NO_ERROR;
}
}
/* Either we weren't provided a base key to examine, or the base key
we were provided was not mutable. So, let's make a new
representation and return its key to the caller. */
SVN_ERR (svn_fs_bdb__string_append (fs, &new_str, 0, NULL, trail));
rep = make_fulltext_rep (new_str, txn_id, svn_md5_empty_string_digest (),
trail->pool);
SVN_ERR (svn_fs_bdb__write_new_rep (new_rep_key, fs, rep, trail));
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_base__delete_rep_if_mutable (svn_fs_t *fs,
const char *rep_key,
const char *txn_id,
trail_t *trail)
{
representation_t *rep;
SVN_ERR (svn_fs_bdb__read_rep (&rep, fs, rep_key, trail));
if (! rep_is_mutable (rep, txn_id))
return SVN_NO_ERROR;
if (rep->kind == rep_kind_fulltext)
{
SVN_ERR (svn_fs_bdb__string_delete (fs,
rep->contents.fulltext.string_key,
trail));
}
else if (rep->kind == rep_kind_delta)
{
apr_array_header_t *keys;
SVN_ERR (delta_string_keys (&keys, rep, trail->pool));
SVN_ERR (delete_strings (keys, fs, trail));
}
else /* unknown kind */
abort ();
SVN_ERR (svn_fs_bdb__delete_rep (fs, rep_key, trail));
return SVN_NO_ERROR;
}
/*** Reading and writing data via representations. ***/
/** Reading. **/
struct rep_read_baton
{
/* The FS from which we're reading. */
svn_fs_t *fs;
/* The representation skel whose contents we want to read. If this
is NULL, the rep has never had any contents, so all reads fetch 0
bytes.
Formerly, we cached the entire rep skel here, not just the key.
That way we didn't have to fetch the rep from the db every time
we want to read a little bit more of the file. Unfortunately,
this has a problem: if, say, a file's representation changes
while we're reading (changes from fulltext to delta, for
example), we'll never know it. So for correctness, we now
refetch the representation skel every time we want to read
another chunk. */
const char *rep_key;
/* How many bytes have been read already. */
svn_filesize_t offset;
/* If present, the read will be done as part of this trail, and the
trail's pool will be used. Otherwise, see `pool' below. */
trail_t *trail;
/* MD5 checksum. Initialized when the baton is created, updated as
we read data, and finalized when the stream is closed. */
struct apr_md5_ctx_t md5_context;
/* The length of the rep's contents (as fulltext, that is,
independent of how the rep actually stores the data.) This is
retrieved when the baton is created, and used to determine when
we have read the last byte, at which point we compare checksums.
Getting this at baton creation time makes interleaved reads and
writes on the same rep in the same trail impossible. But we're
not doing that, and probably no one ever should. And anyway if
they do, they should see problems immediately. */
svn_filesize_t size;
/* Set to FALSE when the baton is created, TRUE when the md5_context
is digestified. */
svn_boolean_t checksum_finalized;
/* Used for temporary allocations, iff `trail' (above) is null. */
apr_pool_t *pool;
};
static svn_error_t *
rep_read_get_baton (struct rep_read_baton **rb_p,
svn_fs_t *fs,
const char *rep_key,
svn_boolean_t use_trail_for_reads,
trail_t *trail,
apr_pool_t *pool)
{
struct rep_read_baton *b;
b = apr_pcalloc (pool, sizeof (*b));
apr_md5_init (&(b->md5_context));
if (rep_key)
SVN_ERR (svn_fs_base__rep_contents_size (&(b->size), fs, rep_key, trail));
else
b->size = 0;
b->checksum_finalized = FALSE;
b->fs = fs;
b->trail = use_trail_for_reads ? trail : NULL;
b->pool = pool;
b->rep_key = rep_key;
b->offset = 0;
*rb_p = b;
return SVN_NO_ERROR;
}
/*** Retrieving data. ***/
svn_error_t *
svn_fs_base__rep_contents_size (svn_filesize_t *size_p,
svn_fs_t *fs,
const char *rep_key,
trail_t *trail)
{
representation_t *rep;
SVN_ERR (svn_fs_bdb__read_rep (&rep, fs, rep_key, trail));
if (rep->kind == rep_kind_fulltext)
{
/* Get the size by asking Berkeley for the string's length. */
SVN_ERR (svn_fs_bdb__string_size (size_p, fs,
rep->contents.fulltext.string_key,
trail));
}
else if (rep->kind == rep_kind_delta)
{
/* Get the size by finding the last window pkg in the delta and
adding its offset to its size. This way, we won't even be
messed up by overlapping windows, as long as the window pkgs
are still ordered. */
apr_array_header_t *chunks = rep->contents.delta.chunks;
rep_delta_chunk_t *last_chunk;
assert (chunks->nelts);
last_chunk = APR_ARRAY_IDX (chunks, chunks->nelts - 1,
rep_delta_chunk_t *);
*size_p = last_chunk->offset + last_chunk->size;
}
else /* unknown kind */
abort ();
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_base__rep_contents_checksum (unsigned char digest[],
svn_fs_t *fs,
const char *rep_key,
trail_t *trail)
{
representation_t *rep;
SVN_ERR (svn_fs_bdb__read_rep (&rep, fs, rep_key, trail));
memcpy (digest, rep->checksum, APR_MD5_DIGESTSIZE);
return SVN_NO_ERROR;
}
svn_error_t *
svn_fs_base__rep_contents (svn_string_t *str,
svn_fs_t *fs,
const char *rep_key,
trail_t *trail)
{
svn_filesize_t contents_size;
apr_size_t len;
char *data;
SVN_ERR (svn_fs_base__rep_contents_size (&contents_size, fs, rep_key,
trail));
/* What if the contents are larger than we can handle? */
if (contents_size > SVN_MAX_OBJECT_SIZE)
return svn_error_createf
(SVN_ERR_FS_GENERAL, NULL,
"Rep contents are too large "
"(got %" SVN_FILESIZE_T_FMT ", limit is %" APR_SIZE_T_FMT ")",
contents_size, SVN_MAX_OBJECT_SIZE);
else
str->len = (apr_size_t) contents_size;
data = apr_palloc (trail->pool, str->len);
str->data = data;
len = str->len;
SVN_ERR (rep_read_range (fs, rep_key, 0, data, &len, trail));
/* Paranoia. */
if (len != str->len)
return svn_error_createf
(SVN_ERR_FS_CORRUPT, NULL,
"Failure reading rep '%s'", rep_key);
/* Just the standard paranoia. */
{
representation_t *rep;
apr_md5_ctx_t md5_context;
unsigned char checksum[APR_MD5_DIGESTSIZE];
apr_md5_init (&md5_context);
apr_md5_update (&md5_context, str->data, str->len);
apr_md5_final (checksum, &md5_context);
SVN_ERR (svn_fs_bdb__read_rep (&rep, fs, rep_key, trail));
if (! svn_md5_digests_match (checksum, rep->checksum))
return svn_error_createf
(SVN_ERR_FS_CORRUPT, NULL,
"Checksum mismatch on rep '%s':\n"
" expected: %s\n"
" actual: %s\n", rep_key,
svn_md5_digest_to_cstring (rep->checksum, trail->pool),
svn_md5_digest_to_cstring (checksum, trail->pool));
}
return SVN_NO_ERROR;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -