From db39fa453614a4996ca76763025c61e11b4297c0 Mon Sep 17 00:00:00 2001 From: kylin-bot Date: Fri, 29 May 2026 17:08:15 +0800 Subject: [PATCH] apply patch 5327 --- checksum.c | 185 +++++++++++++++++++++++++++++++++++++++++++++++++---- fileio.c | 59 ++++++++++++++--- match.c | 105 ++++++++++++++++++++++++++++-- receiver.c | 36 ++++++++++- rsync.h | 3 + sender.c | 18 ++++++ token.c | 85 +++++++++++++++++++++++- 7 files changed, 460 insertions(+), 31 deletions(-) diff --git a/checksum.c b/checksum.c index 46a224e..428d093 100644 --- a/checksum.c +++ b/checksum.c @@ -398,8 +398,151 @@ void get_checksum2(char *buf, int32 len, char *sum) } } +struct file_checksum_scan { + struct map_struct *buf; + OFF_T len; + OFF_T span_end; + int sparse; + int span_is_data; + uchar *scratch; +}; + +static uchar file_checksum_zero_buf[CHUNK_SIZE]; + +static int file_checksum_is_sparse(const STRUCT_STAT *st_p) +{ +#ifdef SEEK_DATA + OFF_T len = st_p->st_size; + OFF_T logical_blocks; + + if (len <= 0) + return 0; + + logical_blocks = len / 512 + (len % 512 != 0); + return st_p->st_blocks < logical_blocks; +#else + (void)st_p; + return 0; +#endif +} + +static void file_checksum_scan_init(struct file_checksum_scan *scan, + struct map_struct *buf, + const STRUCT_STAT *st_p) +{ + scan->buf = buf; + scan->len = st_p->st_size; + scan->span_end = 0; + scan->sparse = file_checksum_is_sparse(st_p); + scan->span_is_data = 1; + scan->scratch = NULL; +} + +static uchar *file_checksum_scratch(struct file_checksum_scan *scan) +{ + if (!scan->scratch) + scan->scratch = new_array(uchar, CHUNK_SIZE); + return scan->scratch; +} + +/* Refresh scan's cached data/hole span so that it covers pos. */ +static int file_checksum_refresh_span(struct file_checksum_scan *scan, OFF_T pos) +{ +#ifdef SEEK_DATA + OFF_T data, hole; + + if (!scan->sparse) + return 0; + if (pos < scan->span_end) + return 1; + + errno = 0; + data = do_lseek(scan->buf->fd, pos, SEEK_DATA); + scan->buf->p_fd_offset = -1; + if (data < 0) { + if (errno == ENXIO) { + scan->span_end = scan->len; + scan->span_is_data = 0; + return 1; + } + scan->sparse = 0; + return 0; + } + + if (data > pos) { + scan->span_end = data > scan->len ? scan->len : data; + scan->span_is_data = 0; + return 1; + } + + errno = 0; + hole = do_lseek(scan->buf->fd, pos, SEEK_HOLE); + scan->buf->p_fd_offset = -1; + if (hole < 0) { + if (errno == EINVAL) { + scan->sparse = 0; + return 0; + } + hole = scan->len; + } + if (hole > scan->len) + hole = scan->len; + if (hole <= pos) + hole = pos + 1; + + scan->span_end = hole; + scan->span_is_data = 1; + return 1; +#else + (void)scan; + (void)pos; + return 0; +#endif +} + +static const uchar *file_checksum_sparse_map_ptr(struct file_checksum_scan *scan, OFF_T pos, int32 len) +{ + OFF_T end = pos + len; + uchar *out, *p; + + if (!file_checksum_refresh_span(scan, pos)) + return (const uchar *)map_ptr(scan->buf, pos, len); + if (end <= scan->span_end) { + if (scan->span_is_data) + return (const uchar *)map_ptr(scan->buf, pos, len); + return file_checksum_zero_buf; + } + + p = out = file_checksum_scratch(scan); + while (1) { + int32 n; + + n = (int32)MIN(end - pos, scan->span_end - pos); + if (scan->span_is_data) + memcpy(p, map_ptr(scan->buf, pos, n), n); + else + memset(p, 0, n); + p += n; + pos += n; + if (pos >= end) + break; + if (!file_checksum_refresh_span(scan, pos)) { + n = (int32)(end - pos); + memcpy(p, map_ptr(scan->buf, pos, n), n); + break; + } + } + + return out; +} + +#define file_checksum_map_ptr(scan, pos, len) \ + ((scan)->sparse ? file_checksum_sparse_map_ptr((scan), (pos), (len)) \ + : (const uchar *)map_ptr((scan)->buf, (pos), (len))) + void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) { + struct file_checksum_scan scan; struct map_struct *buf; OFF_T i, len = st_p->st_size; int32 remainder; @@ -411,7 +554,8 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) return; } - buf = map_file(fd, len, MAX_MAP_SIZE, CHUNK_SIZE); + buf = map_file(fd, st_p->st_size, MAX_MAP_SIZE, CHUNK_SIZE); + file_checksum_scan_init(&scan, buf, st_p); #ifdef USE_OPENSSL if (file_sum_evp_md) { @@ -422,11 +566,11 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) EVP_DigestInit_ex(evp, file_sum_evp_md, NULL); for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) - EVP_DigestUpdate(evp, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE); + EVP_DigestUpdate(evp, file_checksum_map_ptr(&scan, i, CHUNK_SIZE), CHUNK_SIZE); remainder = (int32)(len - i); if (remainder > 0) - EVP_DigestUpdate(evp, (uchar *)map_ptr(buf, i, remainder), remainder); + EVP_DigestUpdate(evp, file_checksum_map_ptr(&scan, i, remainder), remainder); EVP_DigestFinal_ex(evp, (uchar *)sum, NULL); } else @@ -441,11 +585,11 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) XXH64_reset(state, 0); for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) - XXH64_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE); + XXH64_update(state, file_checksum_map_ptr(&scan, i, CHUNK_SIZE), CHUNK_SIZE); remainder = (int32)(len - i); if (remainder > 0) - XXH64_update(state, (uchar *)map_ptr(buf, i, remainder), remainder); + XXH64_update(state, file_checksum_map_ptr(&scan, i, remainder), remainder); SIVAL64(sum, 0, XXH64_digest(state)); break; @@ -460,11 +604,11 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) XXH3_64bits_reset(state); for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) - XXH3_64bits_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE); + XXH3_64bits_update(state, file_checksum_map_ptr(&scan, i, CHUNK_SIZE), CHUNK_SIZE); remainder = (int32)(len - i); if (remainder > 0) - XXH3_64bits_update(state, (uchar *)map_ptr(buf, i, remainder), remainder); + XXH3_64bits_update(state, file_checksum_map_ptr(&scan, i, remainder), remainder); SIVAL64(sum, 0, XXH3_64bits_digest(state)); break; @@ -478,11 +622,11 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) XXH3_128bits_reset(state); for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) - XXH3_128bits_update(state, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE); + XXH3_128bits_update(state, file_checksum_map_ptr(&scan, i, CHUNK_SIZE), CHUNK_SIZE); remainder = (int32)(len - i); if (remainder > 0) - XXH3_128bits_update(state, (uchar *)map_ptr(buf, i, remainder), remainder); + XXH3_128bits_update(state, file_checksum_map_ptr(&scan, i, remainder), remainder); digest = XXH3_128bits_digest(state); SIVAL64(sum, 0, digest.low64); @@ -496,11 +640,11 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) md5_begin(&m5); for (i = 0; i + CHUNK_SIZE <= len; i += CHUNK_SIZE) - md5_update(&m5, (uchar *)map_ptr(buf, i, CHUNK_SIZE), CHUNK_SIZE); + md5_update(&m5, file_checksum_map_ptr(&scan, i, CHUNK_SIZE), CHUNK_SIZE); remainder = (int32)(len - i); if (remainder > 0) - md5_update(&m5, (uchar *)map_ptr(buf, i, remainder), remainder); + md5_update(&m5, file_checksum_map_ptr(&scan, i, remainder), remainder); md5_result(&m5, (uchar *)sum); break; @@ -514,7 +658,7 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) mdfour_begin(&m); for (i = 0; i + CSUM_CHUNK <= len; i += CSUM_CHUNK) - mdfour_update(&m, (uchar *)map_ptr(buf, i, CSUM_CHUNK), CSUM_CHUNK); + mdfour_update(&m, file_checksum_map_ptr(&scan, i, CSUM_CHUNK), CSUM_CHUNK); /* Prior to version 27 an incorrect MD4 checksum was computed * by failing to call mdfour_tail() for block sizes that @@ -522,7 +666,7 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) * even when there are no more bytes. */ remainder = (int32)(len - i); if (remainder > 0 || file_sum_nni->num > CSUM_MD4_BUSTED) - mdfour_update(&m, (uchar *)map_ptr(buf, i, remainder), remainder); + mdfour_update(&m, file_checksum_map_ptr(&scan, i, remainder), remainder); mdfour_result(&m, (uchar *)sum); break; @@ -534,6 +678,8 @@ void file_checksum(const char *fname, const STRUCT_STAT *st_p, char *sum) } close(fd); + if (scan.scratch) + free(scan.scratch); unmap_file(buf); } @@ -678,6 +824,19 @@ void sum_update(const char *p, int32 len) } } +void sum_update_sparse_hole_token(OFF_T len) +{ + char buf[8]; + static const char sparse_hole_marker[] = "rsync-sparse-hole"; + + if (len <= 0) + return; + + SIVAL64(buf, 0, len); + sum_update(sparse_hole_marker, sizeof sparse_hole_marker); + sum_update(buf, sizeof buf); +} + /* The sum buffer only needs to be as long as the current checksum's digest * len, not MAX_DIGEST_LEN. Note that for CSUM_MD4_ARCHAIC that is the full * MD4_DIGEST_LEN even if the file-list code is going to ignore all but the diff --git a/fileio.c b/fileio.c index f80af19..9e8559f 100644 --- a/fileio.c +++ b/fileio.c @@ -40,6 +40,23 @@ OFF_T preallocated_len = 0; static OFF_T sparse_seek = 0; static OFF_T sparse_past_write = 0; +static int flush_sparse_seek(int f) +{ + if (!sparse_seek) + return 0; + + if (sparse_past_write >= preallocated_len) { + if (do_lseek(f, sparse_seek, SEEK_CUR) < 0) + return -1; + } else if (do_punch_hole(f, sparse_past_write, sparse_seek) < 0) { + sparse_seek = 0; + return -1; + } + + sparse_seek = 0; + return 0; +} + int sparse_end(int f, OFF_T size) { int ret; @@ -84,15 +101,8 @@ static int write_sparse(int f, int use_seek, OFF_T offset, const char *buf, int if (l1 == len) return len; - if (sparse_seek) { - if (sparse_past_write >= preallocated_len) { - if (do_lseek(f, sparse_seek, SEEK_CUR) < 0) - return -1; - } else if (do_punch_hole(f, sparse_past_write, sparse_seek) < 0) { - sparse_seek = 0; - return -1; - } - } + if (flush_sparse_seek(f) < 0) + return -1; sparse_seek = l2; sparse_past_write = offset + len - l2; @@ -182,6 +192,37 @@ int write_file(int f, int use_seek, OFF_T offset, const char *buf, int len) return ret; } +int write_sparse_hole(OFF_T len) +{ + if (len < 0) { + errno = EINVAL; + return -1; + } + + sparse_seek += len; + return 0; +} + +int write_sparse_literal(int f, OFF_T offset, const char *buf, int len) +{ + int ret, written = 0; + + if (flush_sparse_seek(f) < 0) + return -1; + + while (written < len) { + ret = write(f, buf + written, len - written); + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) + return -1; + written += ret; + } + + sparse_past_write = offset + len; + return len; +} + /* An in-place update found identical data at an identical location. We either * just seek past it, or (for an in-place sparse update), we give the data to * the sparse processor with the use_seek flag set. */ diff --git a/match.c b/match.c index 10e60bd..ba54de1 100644 --- a/match.c +++ b/match.c @@ -136,6 +136,101 @@ static void matched(int f, struct sum_struct *s, struct map_struct *buf, OFF_T o show_progress(last_match, buf->file_size); } +static void matched_sparse_hole(int f, struct map_struct *buf, OFF_T offset) +{ + OFF_T n = offset - last_match; + + if (n <= 0) + return; + + send_sparse_hole_token(f, n); + sum_update_sparse_hole_token(n); + data_transfer += n; + last_match = offset; + + if (buf && INFO_GTE(PROGRESS, 1)) + show_progress(last_match, buf->file_size); +} + +static void matched_sparse_data_range(int f, struct map_struct *buf, OFF_T offset) +{ + while (last_match < offset) { + int32 n = (int32)MIN(offset - last_match, (OFF_T)CHUNK_SIZE); + char *map = map_ptr(buf, last_match, n); + + send_sparse_data_token(f, map, n); + sum_update(map, n); + data_transfer += n; + last_match += n; + + if (buf && INFO_GTE(PROGRESS, 1)) + show_progress(last_match, buf->file_size); + } +} + +static int sparse_whole_file_match(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) +{ +#ifdef SEEK_DATA + OFF_T pos; + + /* Keep this protocol shortcut local to ordinary whole-file sparse transfers. */ + if (!buf || !sparse_fast_tokens_enabled() || !buf->sparse_file) + return 0; + + pos = last_match; + while (pos < len) { + OFF_T data, hole; + + errno = 0; + data = do_lseek(buf->fd, pos, SEEK_DATA); + buf->p_fd_offset = -1; + if (data < 0) { + if (errno == EINVAL && pos == 0) + return 0; + if (errno == ENXIO) { + matched_sparse_hole(f, buf, len); + break; + } + matched_sparse_data_range(f, buf, len); + break; + } + if (data < pos) + data = pos; + if (data > len) + data = len; + if (data > pos) + matched_sparse_hole(f, buf, data); + if (data >= len) + break; + + errno = 0; + hole = do_lseek(buf->fd, data, SEEK_HOLE); + buf->p_fd_offset = -1; + if (hole < 0) { + if (errno == EINVAL && pos == 0) + return 0; + hole = len; + } + if (hole > len) + hole = len; + if (hole <= data) + hole = data + 1; + + matched_sparse_data_range(f, buf, hole); + pos = hole; + } + + matched(f, s, buf, len, -1); + return 1; +#else + (void)f; + (void)s; + (void)buf; + (void)len; + return 0; +#endif +} + static void hash_search(int f,struct sum_struct *s, struct map_struct *buf, OFF_T len) @@ -402,10 +497,12 @@ void match_sums(int f, struct sum_struct *s, struct map_struct *buf, OFF_T len) rprintf(FINFO,"done hash search\n"); } else { OFF_T j; - /* by doing this in pieces we avoid too many seeks */ - for (j = last_match + CHUNK_SIZE; j < len; j += CHUNK_SIZE) - matched(f, s, buf, j, -2); - matched(f, s, buf, len, -1); + if (!sparse_whole_file_match(f, s, buf, len)) { + /* by doing this in pieces we avoid too many seeks */ + for (j = last_match + CHUNK_SIZE; j < len; j += CHUNK_SIZE) + matched(f, s, buf, j, -2); + matched(f, s, buf, len, -1); + } } sum_end(sender_file_sum); diff --git a/receiver.c b/receiver.c index 77de869..76a19ef 100644 --- a/receiver.c +++ b/receiver.c @@ -247,7 +247,7 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, OFF_T total_size = F_LENGTH(file); OFF_T offset = 0; OFF_T offset2; - char *data; + char *data = NULL; int32 i; char *map = NULL; @@ -319,7 +319,32 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, if (allowed_lull) maybe_send_keepalive(time(NULL), MSK_ALLOW_FLUSH | MSK_ACTIVE_RECEIVER); + if (i == SPARSE_HOLE_TOKEN) { + OFF_T len64 = recv_sparse_hole_len(); + + if (len64 <= 0 || offset + len64 < offset || offset + len64 > total_size) { + rprintf(FERROR_XFER, "invalid sparse token length %s for %s\n", + big_num(len64), full_fname(fname)); + exit_cleanup(RERR_PROTOCOL); + } + + stats.literal_data += len64; + cleanup_got_literal = 1; + sum_update_sparse_hole_token(len64); + + if (fd != -1 && write_sparse_hole(len64) != 0) + goto report_write_error; + offset += len64; + continue; + } + if (i > 0) { + if (offset + i < offset || offset + i > total_size) { + rprintf(FERROR_XFER, "invalid data token length %ld for %s\n", + (long)i, full_fname(fname)); + exit_cleanup(RERR_PROTOCOL); + } + if (DEBUG_GTE(DELTASUM, 3)) { rprintf(FINFO,"data recv %d at %s\n", i, big_num(offset)); @@ -330,8 +355,13 @@ static int receive_data(int f_in, char *fname_r, int fd_r, OFF_T size_r, sum_update(data, i); - if (fd != -1 && write_file(fd, 0, offset, data, i) != i) - goto report_write_error; + if (fd != -1) { + int wrote = recv_sparse_literal_data() + ? write_sparse_literal(fd, offset, data, i) + : write_file(fd, 0, offset, data, i); + if (wrote != i) + goto report_write_error; + } offset += i; continue; } diff --git a/rsync.h b/rsync.h index 1344ecc..052b5ce 100644 --- a/rsync.h +++ b/rsync.h @@ -154,6 +154,8 @@ #define RSYNC_PORT 873 #define SPARSE_WRITE_SIZE (1024) +#define SPARSE_HOLE_TOKEN (-2147483647-1) +#define SPARSE_DATA_TOKEN (-2147483647) #define WRITE_SIZE (32*1024) #define CHUNK_SIZE (32*1024) #define MAX_MAP_SIZE (256*1024) @@ -981,6 +983,7 @@ struct map_struct { int32 def_window_size; /* Default window size */ int fd; /* File Descriptor */ int status; /* first errno from read errors */ + int sparse_file; /* st_blocks reports less allocation than size */ }; #define sum2_at(s, i) ((s)->sum2_array + ((size_t)(i) * xfer_sum_len)) diff --git a/sender.c b/sender.c index a4d46c3..966c044 100644 --- a/sender.c +++ b/sender.c @@ -126,6 +126,23 @@ static struct sum_struct *receive_sums(int f) return s; } +static int stat_sparse_file(const STRUCT_STAT *st) +{ +#ifdef SEEK_DATA + OFF_T len = st->st_size; + OFF_T logical_blocks; + + if (len <= 0) + return 0; + + logical_blocks = len / 512 + (len % 512 != 0); + return st->st_blocks < logical_blocks; +#else + (void)st; + return 0; +#endif +} + void successful_send(int ndx) { char fname[MAXPATHLEN]; @@ -400,6 +417,7 @@ void send_files(int f_in, int f_out) if (st.st_size) { int32 read_size = MAX(s->blength * 3, MAX_MAP_SIZE); mbuf = map_file(fd, st.st_size, read_size, s->blength); + mbuf->sparse_file = stat_sparse_file(&st); } else mbuf = NULL; diff --git a/token.c b/token.c index c108b3a..3a4e7b1 100644 --- a/token.c +++ b/token.c @@ -21,6 +21,7 @@ #include "rsync.h" #include "itypes.h" +#include "inums.h" #include #ifdef SUPPORT_ZSTD #include @@ -30,9 +31,15 @@ #endif extern int do_compression; +extern int local_server; extern int protocol_version; extern int module_id; extern int do_compression_level; +extern int preallocate_files; +extern int read_batch; +extern int sparse_files; +extern int whole_file; +extern int write_batch; extern char *skip_compress; #ifndef Z_INSERT_ONLY @@ -42,6 +49,9 @@ extern char *skip_compress; static int skip_compression_level; /* The least possible compressing for handling skip-compress files. */ static int per_file_default_level; /* The default level that each new file gets prior to checking its suffix. */ +static OFF_T sparse_hole_token_len; +static int sparse_token_literal_data; + struct suffix_tree { struct suffix_tree *sibling; struct suffix_tree *child; @@ -277,10 +287,28 @@ void set_compression(const char *fname) #endif } +int sparse_fast_tokens_enabled(void) +{ + return local_server && sparse_files > 0 && whole_file > 0 + && do_compression == CPRES_NONE + && !preallocate_files && !read_batch && !write_batch; +} + +OFF_T recv_sparse_hole_len(void) +{ + return sparse_hole_token_len; +} + +int recv_sparse_literal_data(void) +{ + return sparse_token_literal_data; +} + /* non-compressing recv token */ static int32 simple_recv_token(int f, char **data) { static int32 residue; + static int residue_is_sparse_data; static char *buf; int32 n; @@ -289,14 +317,42 @@ static int32 simple_recv_token(int f, char **data) if (residue == 0) { int32 i = read_int(f); - if (i <= 0) + sparse_token_literal_data = residue_is_sparse_data = 0; + if (i == SPARSE_HOLE_TOKEN) { + if (!sparse_fast_tokens_enabled()) { + rprintf(FERROR, "unexpected sparse hole token\n"); + exit_cleanup(RERR_PROTOCOL); + } + sparse_hole_token_len = (OFF_T)read_longint(f); + if (sparse_hole_token_len <= 0) { + rprintf(FERROR, "invalid sparse hole token length %s\n", + big_num(sparse_hole_token_len)); + exit_cleanup(RERR_PROTOCOL); + } return i; - residue = i; + } + if (i == SPARSE_DATA_TOKEN) { + if (!sparse_fast_tokens_enabled()) { + rprintf(FERROR, "unexpected sparse data token\n"); + exit_cleanup(RERR_PROTOCOL); + } + residue = read_int(f); + if (residue <= 0 || residue > CHUNK_SIZE) { + rprintf(FERROR, "invalid sparse data token length %ld\n", (long)residue); + exit_cleanup(RERR_PROTOCOL); + } + residue_is_sparse_data = 1; + } else { + if (i <= 0) + return i; + residue = i; + } } *data = buf; n = MIN(CHUNK_SIZE,residue); residue -= n; + sparse_token_literal_data = residue_is_sparse_data; read_buf(f,buf,n); return n; } @@ -318,6 +374,31 @@ static void simple_send_token(int f, int32 token, struct map_struct *buf, OFF_T write_int(f, -(token+1)); } +void send_sparse_hole_token(int f, OFF_T len) +{ + if (len <= 0) + return; + + if (do_compression != CPRES_NONE) + NOISY_DEATH("Sparse tokens require uncompressed local I/O"); + + write_int(f, SPARSE_HOLE_TOKEN); + write_longint(f, len); +} + +void send_sparse_data_token(int f, const char *data, int32 len) +{ + if (len <= 0) + return; + + if (do_compression != CPRES_NONE) + NOISY_DEATH("Sparse data tokens require uncompressed local I/O"); + + write_int(f, SPARSE_DATA_TOKEN); + write_int(f, len); + write_buf(f, data, len); +} + /* Flag bytes in compressed stream are encoded as follows: */ #define END_FLAG 0 /* that's all folks */ #define TOKEN_LONG 0x20 /* followed by 32-bit token number */ -- Gitee