INN commit: trunk (3 files)
INN Commit
Russ_Allbery at isc.org
Sat Nov 29 11:07:03 UTC 2008
Date: Saturday, November 29, 2008 @ 03:07:02
Author: iulius
Revision: 8195
Allow the use of buffers larger than 2 GB with buffindexed.
Patch from Kirill Berezin.
Idea of the patch:
if (mmapwrite(ovbuff->fd, &ovindexhead, sizeof(OVINDEXHEAD),
ovbuff->base + ov.blocknum * OV_BLOCKSIZE) != sizeof(OVINDEXHEAD)) {
This is a part of current ovsetcurindexblock function. Third argument is
the offset from the beginning of buffer: ovbuff->base is off_t and
ov.blocknum is unsigned int. For the case of INN compiled without
--enable-largefiles it works fine, but in other case the size of off_t
is 8 bytes and unsigned int still uses 4 bytes.
Now say we have a 5Gb buffer, which is roughly equal to 625000 8000 byte
blocks, and we are going to access to block # 620000 that is equal to
offset of 4960000000 bytes, or about at very end of the buffer. BUT C
standard does not require to cast all parts of statement to type of
argument with longest size before calculation of result, instead is
requires a cast of arguments of current operation only.
As a result, because multiplication is operation with higher priority
and the size of blocknum is 4 bytes and constant have no size hints, we
will have an offset somewhere in the beginning of buffer instead of very
end of it.
To resolve such a limitation, a macro (OV_OFFSET) now calculates the
offset (changing blocknum type to off_t instead of unsigned int).
Modified:
trunk/doc/pod/buffindexed.conf.pod
trunk/doc/pod/news.pod
trunk/storage/buffindexed/buffindexed.c
-----------------------------------+
doc/pod/buffindexed.conf.pod | 6 ++----
doc/pod/news.pod | 7 +++++++
storage/buffindexed/buffindexed.c | 20 +++++++++++---------
3 files changed, 20 insertions(+), 13 deletions(-)
Modified: doc/pod/buffindexed.conf.pod
===================================================================
--- doc/pod/buffindexed.conf.pod 2008-11-29 10:39:08 UTC (rev 8194)
+++ doc/pod/buffindexed.conf.pod 2008-11-29 11:07:02 UTC (rev 8195)
@@ -49,9 +49,7 @@
<size> is the length of the buffer in kilobytes (S<1 KB = 1024 bytes>). If
<filename> does not specify a special device, the file size of the buffer
must be S<< <size> * 1024 bytes >>. If it does specify a special device, that
-device must have at least <size> space available. Buffers over S<2 GB> are
-not supported (regardless of whether INN was compiled with large file
-support); this limitation may be fixed in the future. For more
+device must have at least <size> space available. For more
information on setting up the buffers, see L<CREATING BUFFERS>.
An example of F<buffindexed.conf> file can be:
@@ -99,7 +97,7 @@
locking on block devices, and therefore this method should not be used on
Solaris.
-Partition the disk to make each partition equal to or smaller than S<2 GB>.
+Partition the disk.
If you're using Solaris, set up your partitions to avoid the first
cylinder of the disk (or otherwise the buffindexed header will overwrite
the disk partition table and render the buffers inaccessible). Then,
Modified: doc/pod/news.pod
===================================================================
--- doc/pod/news.pod 2008-11-29 10:39:08 UTC (rev 8194)
+++ doc/pod/news.pod 2008-11-29 11:07:02 UTC (rev 8195)
@@ -61,6 +61,13 @@
=item *
+Thanks to Kirill Berezin, the buffindexed overview method now supports buffers
+larger than S<2 GB>. It is not necessary to compile INN with large file support
+to use such large buffers with buffindexed. Buffindexed is also more robust
+with mmaped files.
+
+=item *
+
B<tinyleaf>, a miniature IHAVE-only leaf server, is now included. See the
tinyleaf(8) man page for more information.
Modified: storage/buffindexed/buffindexed.c
===================================================================
--- storage/buffindexed/buffindexed.c 2008-11-29 10:39:08 UTC (rev 8194)
+++ storage/buffindexed/buffindexed.c 2008-11-29 11:07:02 UTC (rev 8195)
@@ -69,6 +69,7 @@
#define OV_BEFOREBITF (1 * OV_BLOCKSIZE)
#define OV_BLOCKSIZE 8192
#define OV_FUDGE 1024
+#define OV_OFFSET(block) (block*(off_t) OV_BLOCKSIZE)
/* ovblock pointer */
typedef struct _OV {
@@ -96,7 +97,7 @@
/* ovbuff info */
typedef struct _OVBUFF {
- unsigned int index; /* ovbuff index */
+ unsigned int index; /* ovbuff (partition or file) */
char path[OVBUFFPASIZ]; /* Path to file */
int fd; /* file descriptor for this
ovbuff */
@@ -1330,7 +1331,7 @@
ovindexhead.next = ovnull;
ovindexhead.low = 0;
ovindexhead.high = 0;
- if (PWRITE(ovbuff->fd, &ovindexhead, sizeof(OVINDEXHEAD), ovbuff->base + ov.blocknum * OV_BLOCKSIZE) != sizeof(OVINDEXHEAD)) {
+ if (PWRITE(ovbuff->fd, &ovindexhead, sizeof(OVINDEXHEAD), ovbuff->base + OV_OFFSET(ov.blocknum)) != sizeof(OVINDEXHEAD)) {
syswarn("buffindexed: could not write index record index '%d', blocknum"
" '%d'", ge->curindex.index, ge->curindex.blocknum);
return true;
@@ -1352,7 +1353,7 @@
ovindexhead.next = ov;
ovindexhead.low = ge->curlow;
ovindexhead.high = ge->curhigh;
- if (PWRITE(ovbuff->fd, &ovindexhead, sizeof(OVINDEXHEAD), ovbuff->base + ge->curindex.blocknum * OV_BLOCKSIZE) != sizeof(OVINDEXHEAD)) {
+ if (PWRITE(ovbuff->fd, &ovindexhead, sizeof(OVINDEXHEAD), ovbuff->base + OV_OFFSET(ge->curindex.blocknum)) != sizeof(OVINDEXHEAD)) {
syswarn("buffindexed: could not write index record index '%d', blocknum"
" '%d'", ge->curindex.index, ge->curindex.blocknum);
return false;
@@ -1432,7 +1433,7 @@
#endif /* OV_DEBUG */
}
- if (PWRITE(ovbuff->fd, data, len, ovbuff->base + ge->curdata.blocknum * OV_BLOCKSIZE + ge->curoffset) != len) {
+ if (PWRITE(ovbuff->fd, data, len, ovbuff->base + OV_OFFSET(ge->curdata.blocknum) + ge->curoffset) != len) {
syswarn("buffindexed: could not append overview record index '%d',"
" blocknum '%d'", ge->curdata.index, ge->curdata.blocknum);
return false;
@@ -1469,7 +1470,7 @@
ovusedblock(ovbuff, ge->curindex.blocknum, true, true);
#endif /* OV_DEBUG */
}
- if (PWRITE(ovbuff->fd, &ie, sizeof(ie), ovbuff->base + ge->curindex.blocknum * OV_BLOCKSIZE + sizeof(OVINDEXHEAD) + sizeof(ie) * ge->curindexoffset) != sizeof(ie)) {
+ if (PWRITE(ovbuff->fd, &ie, sizeof(ie), ovbuff->base + OV_OFFSET(ge->curindex.blocknum) + sizeof(OVINDEXHEAD) + sizeof(ie) * ge->curindexoffset) != sizeof(ie)) {
syswarn("buffindexed: could not write index record index '%d', blocknum"
" '%d'", ge->curindex.index, ge->curindex.blocknum);
return true;
@@ -1486,7 +1487,7 @@
ovindexhead.next = ovnull;
ovindexhead.low = ge->curlow;
ovindexhead.high = ge->curhigh;
- if (PWRITE(ovbuff->fd, &ovindexhead, sizeof(OVINDEXHEAD), ovbuff->base + ge->curindex.blocknum * OV_BLOCKSIZE) != sizeof(OVINDEXHEAD)) {
+ if (PWRITE(ovbuff->fd, &ovindexhead, sizeof(OVINDEXHEAD), ovbuff->base + OV_OFFSET(ge->curindex.blocknum)) != sizeof(OVINDEXHEAD)) {
syswarn("buffindexed: could not write index record index '%d', blocknum"
" '%d'", ge->curindex.index, ge->curindex.blocknum);
return true;
@@ -1653,7 +1654,7 @@
ovgroupunmap();
return false;
}
- offset = ovbuff->base + (ov.blocknum * OV_BLOCKSIZE);
+ offset = ovbuff->base + OV_OFFSET(ov.blocknum);
pagefudge = offset % pagesize;
mmapoffset = offset - pagefudge;
len = pagefudge + OV_BLOCKSIZE;
@@ -1717,11 +1718,12 @@
if (count * OV_BLOCKSIZE > innconf->keepmmappedthreshold * 1024)
/* large retrieval, mmap is done in ovsearch() */
return true;
+ /* Data blocks are being mmapped, not copied. */
for (i = 0 ; i < GROUPDATAHASHSIZE ; i++) {
for (gdb = groupdatablock[i] ; gdb != NULL ; gdb = gdb->next) {
ov = gdb->datablk;
ovbuff = getovbuff(ov);
- offset = ovbuff->base + (ov.blocknum * OV_BLOCKSIZE);
+ offset = ovbuff->base + OV_OFFSET(ov.blocknum);
pagefudge = offset % pagesize;
mmapoffset = offset - pagefudge;
gdb->len = pagefudge + OV_BLOCKSIZE;
@@ -1857,7 +1859,7 @@
search->gdb.datablk.blocknum = srchov.blocknum;
search->gdb.datablk.index = srchov.index;
ovbuff = getovbuff(srchov);
- offset = ovbuff->base + (srchov.blocknum * OV_BLOCKSIZE);
+ offset = ovbuff->base + OV_OFFSET(srchov.blocknum);
pagefudge = offset % pagesize;
mmapoffset = offset - pagefudge;
search->gdb.len = pagefudge + OV_BLOCKSIZE;
More information about the inn-committers
mailing list