From: Jeff Garzik <jeff@garzik.org>
To: hail-devel@vger.kernel.org
Subject: [PATCH 3/3] chunkd: on-disk format stores per-64k checksums
Date: Thu, 15 Jul 2010 14:21:28 -0400 [thread overview]
Message-ID: <20100715182128.GC16808@havoc.gtf.org> (raw)
In-Reply-To: <20100715181857.GA16632@havoc.gtf.org>
commit e6fcc02bea062af291148771a59ee2028ae98834
Author: Jeff Garzik <jeff@garzik.org>
Date: Thu Jul 15 13:57:17 2010 -0400
chunkd: Add checksum table to on-disk format, one sum per 64k of data
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
chunkd/be-fs.c | 145 +++++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 127 insertions(+), 18 deletions(-)
diff --git a/chunkd/be-fs.c b/chunkd/be-fs.c
index 671c8fd..1bd85ea 100644
--- a/chunkd/be-fs.c
+++ b/chunkd/be-fs.c
@@ -40,6 +40,11 @@
#define BE_FS_OBJ_MAGIC "CHU1"
+enum {
+ CHUNK_BLK_ORDER = 16, /* 64k blocks */
+ CHUNK_BLK_SZ = 1 << CHUNK_BLK_ORDER,
+};
+
struct fs_obj {
struct backend_obj bo;
@@ -49,14 +54,23 @@ struct fs_obj {
int in_fd;
char *in_fn;
+
+ size_t checked_bytes;
+ SHA_CTX checksum;
+ unsigned int csum_idx;
+ void *csum_tbl;
+ size_t csum_tbl_sz;
+
+ unsigned int n_blk;
};
struct be_fs_obj_hdr {
char magic[4];
uint32_t key_len;
uint64_t value_len;
+ uint32_t n_blk;
- char reserved[16];
+ char reserved[12];
unsigned char hash[CHD_CSUM_SZ];
char owner[128];
@@ -204,6 +218,8 @@ static struct fs_obj *fs_obj_alloc(void)
obj->out_fd = -1;
obj->in_fd = -1;
+ SHA1_Init(&obj->checksum);
+
return obj;
}
@@ -314,6 +330,17 @@ static bool key_valid(const void *key, size_t key_len)
return true;
}
+static unsigned int fs_blk_count(uint64_t data_len)
+{
+ uint64_t n_blk;
+
+ n_blk = data_len >> CHUNK_BLK_ORDER;
+ if (data_len & (CHUNK_BLK_SZ - 1))
+ n_blk++;
+
+ return (unsigned int) n_blk;
+}
+
struct backend_obj *fs_obj_new(uint32_t table_id,
const void *key, size_t key_len,
uint64_t data_len,
@@ -321,6 +348,7 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
{
struct fs_obj *obj;
char *fn = NULL;
+ size_t csum_bytes;
enum chunk_errcode erc = che_InternalError;
off_t skip_len;
@@ -335,6 +363,13 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
return NULL;
}
+ obj->n_blk = fs_blk_count(data_len);
+ csum_bytes = obj->n_blk * CHD_CSUM_SZ;
+ obj->csum_tbl = malloc(csum_bytes);
+ if (!obj->csum_tbl)
+ goto err_out;
+ obj->csum_tbl_sz = csum_bytes;
+
/* build local fs pathname */
fn = fs_obj_pathname(table_id, key, key_len);
if (!fn)
@@ -355,7 +390,7 @@ struct backend_obj *fs_obj_new(uint32_t table_id,
obj->out_fn = fn;
/* calculate size of front-of-file metadata area */
- skip_len = sizeof(struct be_fs_obj_hdr) + key_len;
+ skip_len = sizeof(struct be_fs_obj_hdr) + key_len + csum_bytes;
/* position file pointer where object data (as in, not metadata)
* will begin
@@ -391,8 +426,11 @@ struct backend_obj *fs_obj_open(uint32_t table_id, const char *user,
struct stat st;
struct be_fs_obj_hdr hdr;
ssize_t rrc;
- uint64_t value_len;
+ uint64_t value_len, tmp64;
+ size_t csum_bytes;
enum chunk_errcode erc = che_InternalError;
+ struct iovec iov[2];
+ size_t total_rd_len;
if (!key_valid(key, key_len)) {
*err_code = che_InvalidKey;
@@ -447,25 +485,49 @@ struct backend_obj *fs_obj_open(uint32_t table_id, const char *user,
}
/* verify object key length matches input key length */
- if (GUINT32_FROM_LE(hdr.key_len) != key_len)
+ if (G_UNLIKELY(GUINT32_FROM_LE(hdr.key_len) != key_len))
goto err_out;
- /* verify file size large enough to contain value */
value_len = GUINT64_FROM_LE(hdr.value_len);
- if ((st.st_size - sizeof(hdr) - key_len) < value_len) {
+ obj->n_blk = GUINT32_FROM_LE(hdr.n_blk);
+ csum_bytes = obj->n_blk * CHD_CSUM_SZ;
+
+ /* verify file size large enough to contain value */
+ tmp64 = value_len + sizeof(hdr) + key_len + csum_bytes;
+ if (G_UNLIKELY(st.st_size < tmp64)) {
applog(LOG_ERR, "obj(%s) unexpected size change", obj->in_fn);
goto err_out;
}
+ /* verify expected size of checksum table */
+ if (G_UNLIKELY(fs_blk_count(value_len) != obj->n_blk)) {
+ applog(LOG_ERR, "obj(%s) unexpected blk count "
+ "(%u from val sz, %u from hdr)",
+ obj->in_fn, fs_blk_count(value_len), obj->n_blk);
+ goto err_out;
+ }
+
+ obj->csum_tbl = malloc(csum_bytes);
+ if (!obj->csum_tbl)
+ goto err_out;
+ obj->csum_tbl_sz = csum_bytes;
+
obj->bo.key = malloc(key_len);
obj->bo.key_len = key_len;
if (!obj->bo.key)
goto err_out;
- /* read object variable-length header */
- rrc = read(obj->in_fd, obj->bo.key, key_len);
- if ((rrc != key_len) || (memcmp(key, obj->bo.key, key_len))) {
- applog(LOG_ERR, "read hdr key obj(%s) failed: %s",
+ /* init additional header segment list */
+ iov[0].iov_base = obj->bo.key;
+ iov[0].iov_len = key_len;
+ iov[1].iov_base = obj->csum_tbl;
+ iov[1].iov_len = csum_bytes;
+ total_rd_len = iov[0].iov_len + iov[1].iov_len;
+
+ /* read additional header segments (key, checksum table) */
+ rrc = readv(obj->in_fd, iov, ARRAY_SIZE(iov));
+ if ((rrc != total_rd_len) || (memcmp(key, obj->bo.key, key_len))) {
+ applog(LOG_ERR, "read addnl hdrs(%s) failed: %s",
obj->in_fn,
(rrc < 0) ? strerror(errno) : "<unknown reasons>");
goto err_out;
@@ -508,6 +570,7 @@ void fs_obj_free(struct backend_obj *bo)
if (obj->in_fd >= 0)
close(obj->in_fd);
+ free(obj->csum_tbl);
free(obj);
}
@@ -524,17 +587,48 @@ ssize_t fs_obj_read(struct backend_obj *bo, void *ptr, size_t len)
return rc;
}
+static void obj_flush_csum(struct backend_obj *bo)
+{
+ struct fs_obj *obj = bo->private;
+ unsigned char md[CHD_CSUM_SZ];
+
+ SHA1_Final(md, &obj->checksum);
+
+ memcpy(obj->csum_tbl + ((obj->csum_idx++) * CHD_CSUM_SZ),
+ md, CHD_CSUM_SZ);
+
+ obj->checked_bytes = 0;
+ SHA1_Init(&obj->checksum);
+}
+
ssize_t fs_obj_write(struct backend_obj *bo, const void *ptr, size_t len)
{
struct fs_obj *obj = bo->private;
- ssize_t rc;
+ ssize_t rc = 0;
+
+ while (len > 0) {
+ size_t unchecked;
+
+ unchecked = CHUNK_BLK_SZ - obj->checked_bytes;
+
+ rc = write(obj->out_fd, ptr, MIN(unchecked, len));
+ if (rc < 0) {
+ applog(LOG_ERR, "obj write(%s) failed: %s",
+ obj->out_fn, strerror(errno));
+ break;
+ }
+
+ SHA1_Update(&obj->checksum, ptr, rc);
- rc = write(obj->out_fd, ptr, len);
- if (rc < 0)
- applog(LOG_ERR, "obj write(%s) failed: %s",
- obj->out_fn, strerror(errno));
- else
obj->written_bytes += rc;
+ obj->checked_bytes += rc;
+ ptr += rc;
+ len -= rc;
+
+ /* if at end of 64k block, update csum table with new csum */
+ if (obj->checked_bytes == CHUNK_BLK_SZ)
+ obj_flush_csum(bo);
+ }
return rc;
}
@@ -546,7 +640,7 @@ bool fs_obj_write_commit(struct backend_obj *bo, const char *user,
struct be_fs_obj_hdr hdr;
ssize_t wrc;
size_t total_wr_len;
- struct iovec iov[2];
+ struct iovec iov[3];
memset(&hdr, 0, sizeof(hdr));
memcpy(hdr.magic, BE_FS_OBJ_MAGIC, strlen(BE_FS_OBJ_MAGIC));
@@ -554,6 +648,19 @@ bool fs_obj_write_commit(struct backend_obj *bo, const char *user,
strncpy(hdr.owner, user, sizeof(hdr.owner));
hdr.key_len = GUINT32_TO_LE(bo->key_len);
hdr.value_len = GUINT64_TO_LE(obj->written_bytes);
+ hdr.n_blk = GUINT32_TO_LE(obj->n_blk);
+
+ /* update checksum table with final csum, if necessary */
+ if (obj->checked_bytes > 0)
+ obj_flush_csum(bo);
+
+ if (G_UNLIKELY(obj->csum_idx != obj->n_blk)) {
+ applog(LOG_ERR, "BUG(%s): csum_idx/n_blk mismatch: %u/%u",
+ obj->out_fn, obj->csum_idx, obj->n_blk);
+ return false;
+ }
+
+ obj->csum_idx = 0;
/* go back to beginning of file */
if (lseek(obj->out_fd, 0, SEEK_SET) < 0) {
@@ -567,7 +674,9 @@ bool fs_obj_write_commit(struct backend_obj *bo, const char *user,
iov[0].iov_len = sizeof(hdr);
iov[1].iov_base = bo->key;
iov[1].iov_len = bo->key_len;
- total_wr_len = iov[0].iov_len + iov[1].iov_len;
+ iov[2].iov_base = obj->csum_tbl;
+ iov[2].iov_len = obj->csum_tbl_sz;
+ total_wr_len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
/* write object header segments */
wrc = writev(obj->out_fd, iov, ARRAY_SIZE(iov));
prev parent reply other threads:[~2010-07-15 18:21 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-07-15 18:18 [PATCH 0/3] update chunkd checksum verification scheme Jeff Garzik
2010-07-15 18:20 ` [PATCH 1/3] chunkd: remove sendfile(2) support Jeff Garzik
2010-07-15 18:20 ` [PATCH 2/3] chunkd: pass data-length to PUT backend init Jeff Garzik
2010-07-15 18:21 ` Jeff Garzik [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20100715182128.GC16808@havoc.gtf.org \
--to=jeff@garzik.org \
--cc=hail-devel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for read-only IMAP folder(s) and NNTP newsgroup(s).