tWhen matching a pattern check if bottom bits of hash are 0 - dedup - deduplicating backup program
(HTM) git clone git://git.z3bra.org/dedup.git
(DIR) Log
(DIR) Files
(DIR) Refs
(DIR) README
(DIR) LICENSE
---
(DIR) commit d60ace395a74a5efe067ee9cd5d85446c7facf43
(DIR) parent d8bfc3a69ce4c3c35dfa8c0d5cef3ce10e424300
(HTM) Author: sin <sin@2f30.org>
Date: Tue, 26 Feb 2019 09:48:57 +0000
When matching a pattern check if bottom bits of hash are 0
This approach is more efficient and easier to understand.
Diffstat:
M chunker.c | 10 +---------
M config.h | 1 +
2 files changed, 2 insertions(+), 9 deletions(-)
---
(DIR) diff --git a/chunker.c b/chunker.c
t@@ -14,7 +14,6 @@ struct chunker {
size_t cap;
size_t rpos;
size_t wpos;
- size_t discr;
int fd;
};
t@@ -88,7 +87,7 @@ match_pattern(struct chunker *chunker, size_t chunk_size, uint32_t fp)
return 1;
if (chunk_size < BLKSIZE_MIN)
return 0;
- return (fp % chunker->discr) == chunker->discr - 1;
+ return (fp & HASHMASK_BITS) == 0;
}
static size_t
t@@ -123,12 +122,6 @@ get_chunk_size(struct chunker *chunker)
return chunk_size;
}
-static size_t
-calc_discr(size_t avg)
-{
- return avg / (-1.42888852e-7 * avg + 1.33237515);
-}
-
struct chunker *
alloc_chunker(size_t cap, int fd)
{
t@@ -145,7 +138,6 @@ alloc_chunker(size_t cap, int fd)
chunker->rpos = 0;
chunker->wpos = 0;
chunker->fd = fd;
- chunker->discr = calc_discr(BLKSIZE_AVG);
return chunker;
}
(DIR) diff --git a/config.h b/config.h
t@@ -1,4 +1,5 @@
#define BLKSIZE_AVG ((size_t)524288)
#define BLKSIZE_MIN ((BLKSIZE_AVG) / 4)
#define BLKSIZE_MAX ((BLKSIZE_AVG) * 4)
+#define HASHMASK_BITS (BLKSIZE_AVG - 1)
#define WINSIZE 32