[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Aoetools-discuss] write buffering and the AoE kernel module
> Yikes! That's creepy. I'll be modifying my copy of the vblade software
> locally to round (down) to the nearest 4KB for the device size, ensuring
> the largest possible frame sizes (assuming Gig-E), unless you think that
> is crazy-talk.
It's only really necessary if you intend to use the raw block device,
but it sure doesn't hurt. I usually create my vblades as multiples of
1M to avoid thinking too hard about it:
dd if=/dev/zero of=/tmp/foo bs=1M count=32
> Which leads me to my next question: could you give me a hint (or a
> source file and line number ;-) for how to increase the maximum frame
> size beyond a 4KB payload? I know the FAQ suggests that it's not done
> right now because performance seems to drop off after that size but I'd
> like to experiment with it anyway.
Unfortunately it's not that easy. I've attached a patch against
aoe6-45 that you can test with. It should apply to aoe6-47 with
minimal fuzz. As usual with alpha patches, be aware that there's a
nonzero probability for kernel panic.
Cheers,
Sam
diff -uprN aoe6-45/linux/drivers/block/aoe/aoecmd.c aoe6-45rc1/linux/drivers/block/aoe/aoecmd.c
--- aoe6-45/linux/drivers/block/aoe/aoecmd.c 2007-02-07 18:59:18.000000000 -0500
+++ aoe6-45rc1/linux/drivers/block/aoe/aoecmd.c 2007-02-08 10:49:19.000000000 -0500
@@ -203,6 +203,24 @@ gotone: skb_shinfo(skb)->nr_frags = s
return NULL;
}
+static void
+skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt)
+{
+ int frag = 0;
+ ulong fcnt;
+loop:
+ fcnt = bv->bv_len - (off - bv->bv_offset);
+ if (fcnt > cnt)
+ fcnt = cnt;
+ skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt);
+ cnt -= fcnt;
+ if (cnt <= 0)
+ return;
+ bv++;
+ off = bv->bv_offset;
+ goto loop;
+}
+
static int
aoecmd_ata_rw(struct aoedev *d)
{
@@ -213,7 +231,7 @@ aoecmd_ata_rw(struct aoedev *d)
struct bio_vec *bv;
struct aoetgt *t;
struct sk_buff *skb;
- ulong bcnt;
+ ulong bcnt, fbcnt;
char writebit, extbit;
writebit = 0x10;
@@ -228,8 +246,28 @@ aoecmd_ata_rw(struct aoedev *d)
bcnt = t->ifp->maxbcnt;
if (bcnt == 0)
bcnt = DEFAULTBCNT;
- if (bcnt > buf->bv_resid)
- bcnt = buf->bv_resid;
+ if (bcnt > buf->resid)
+ bcnt = buf->resid;
+ fbcnt = bcnt;
+ f->bv = buf->bv;
+ f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid);
+ do {
+ if (fbcnt < buf->bv_resid) {
+ buf->bv_resid -= fbcnt;
+ buf->resid -= fbcnt;
+ break;
+ }
+ fbcnt -= buf->bv_resid;
+ buf->resid -= buf->bv_resid;
+ if (buf->resid == 0) {
+ d->inprocess = NULL;
+ break;
+ }
+ buf->bv++;
+ buf->bv_resid = buf->bv->bv_len;
+ WARN_ON(buf->bv_resid == 0);
+ } while (fbcnt);
+
/* initialize the headers & frame */
skb = f->skb;
h = (struct aoe_hdr *) skb->mac.raw;
@@ -255,7 +293,7 @@ aoecmd_ata_rw(struct aoedev *d)
ah->lba3 |= 0xe0; /* LBA bit + obsolete 0xa0 */
}
if (bio_data_dir(buf->bio) == WRITE) {
- skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
+ skb_fillup(skb, f->bv, f->bv_off, bcnt);
ah->aflags |= AOEAFL_WRITE;
skb->len += bcnt;
skb->data_len = bcnt;
@@ -269,18 +307,7 @@ aoecmd_ata_rw(struct aoedev *d)
/* mark all tracking fields and load out */
buf->nframesout += 1;
- buf->bv_off += bcnt;
- buf->bv_resid -= bcnt;
- buf->resid -= bcnt;
buf->sector += bcnt >> 9;
- if (buf->resid == 0) {
- d->inprocess = NULL;
- } else if (buf->bv_resid == 0) {
- buf->bv = ++bv;
- buf->bv_resid = bv->bv_len;
- WARN_ON(buf->bv_resid == 0);
- buf->bv_off = bv->bv_offset;
- }
skb->dev = t->ifp->nd;
skb = skb_clone(skb, GFP_ATOMIC);
@@ -380,12 +407,9 @@ resend(struct aoedev *d, struct aoetgt *
put_lba(ah, f->lba);
n = f->bcnt;
- if (n > DEFAULTBCNT)
- n = DEFAULTBCNT;
ah->scnt = n >> 9;
if (ah->aflags & AOEAFL_WRITE) {
- skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
- offset_in_page(f->bufaddr), n);
+ skb_fillup(skb, f->bv, f->bv_off, n);
skb->len = sizeof *h + sizeof *ah + n;
skb->data_len = n;
}
@@ -542,6 +566,7 @@ rexmit_timer(ulong vp)
ifp = NULL;
}
+/*
if (ATASCNT(f->skb->mac.raw) > DEFAULTBCNT / 512)
if (ifp && ++ifp->lostjumbo > (t->nframes << 1))
if (ifp->maxbcnt != DEFAULTBCNT) {
@@ -552,6 +577,7 @@ rexmit_timer(ulong vp)
DEFAULTBCNT);
ifp->maxbcnt = 0;
}
+*/
resend(d, t, f);
}
@@ -734,6 +760,42 @@ diskstats(struct gendisk *disk, struct b
disk_stat_add(disk, io_ticks, duration);
}
+static void
+bvcpy(struct bio_vec *bv, ulong off, char *p, ulong cnt)
+{
+ ulong fcnt;
+loop:
+ fcnt = bv->bv_len - (off - bv->bv_offset);
+ if (fcnt > cnt)
+ fcnt = cnt;
+ memcpy(page_address(bv->bv_page) + off, p, fcnt);
+ cnt -= fcnt;
+ if (cnt <= 0)
+ return;
+ p += fcnt;
+ bv++;
+ off = bv->bv_offset;
+ goto loop;
+}
+
+static void
+fadvance(struct frame *f, ulong cnt)
+{
+ ulong fcnt;
+
+ f->lba += cnt >> 9;
+loop:
+ fcnt = f->bv->bv_len - (f->bv_off - f->bv->bv_offset);
+ if (fcnt > cnt) {
+ f->bv_off += cnt;
+ return;
+ }
+ cnt -= fcnt;
+ f->bv++;
+ f->bv_off = f->bv->bv_offset;
+ goto loop;
+}
+
void
aoecmd_ata_rsp(struct sk_buff *skb)
{
@@ -813,7 +875,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
spin_unlock_irqrestore(&d->lock, flags);
return;
}
- memcpy(f->bufaddr, ahin+1, n);
+ bvcpy(f->bv, f->bv_off, (char *) (ahin+1), n);
case WIN_WRITE:
case WIN_WRITE_EXT:
ifp = getif(t, skb->dev);
@@ -823,8 +885,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
ifp->lostjumbo = 0;
}
if (f->bcnt -= n) {
- f->lba += n >> 9;
- f->bufaddr += n;
+ fadvance(f, n);
resend(d, t, f);
goto xmit;
}
diff -uprN aoe6-45/linux/drivers/block/aoe/aoe.h aoe6-45rc1/linux/drivers/block/aoe/aoe.h
--- aoe6-45/linux/drivers/block/aoe/aoe.h 2007-02-07 18:59:18.000000000 -0500
+++ aoe6-45rc1/linux/drivers/block/aoe/aoe.h 2007-02-06 17:38:59.000000000 -0500
@@ -1,5 +1,5 @@
/* Copyright (c) 2006 Coraid, Inc. See COPYING for GPL terms. */
-#define VERSION "45"
+#define VERSION "45-fatjumbo"
#define AOE_MAJOR 152
#define DEVICE_NAME "aoe"
@@ -119,6 +119,8 @@ struct frame {
ulong bcnt;
sector_t lba;
struct sk_buff *skb;
+ struct bio_vec *bv;
+ ulong bv_off;
};
struct aoeif {
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Aoetools-discuss mailing list
Aoetools-discuss@xxxxxxxxxxxxxxxxxxxxx
https://lists.sourceforge.net/lists/listinfo/aoetools-discuss
This mailing list archive is a service of Copilotco.