[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Aoetools-discuss] write buffering and the AoE kernel module


> Yikes! That's creepy. I'll be modifying my copy of the vblade software 
> locally to round (down) to the nearest 4KB for the device size, ensuring 
> the largest possible frame sizes (assuming Gig-E), unless you think that 
> is crazy-talk.

It's only really necessary if you intend to use the raw block device,
but it sure doesn't hurt.  I usually create my vblades as multiples of
1M to avoid thinking too hard about it:

dd if=/dev/zero of=/tmp/foo bs=1M count=32

> Which leads me to my next question: could you give me a hint (or a 
> source file and line number ;-) for how to increase the maximum frame 
> size beyond a 4KB payload? I know the FAQ suggests that it's not done 
> right now because performance seems to drop off after that size but I'd 
> like to experiment with it anyway.

Unfortunately it's not that easy.  I've attached a patch against
aoe6-45 that you can test with.  It should apply to aoe6-47 with
minimal fuzz.  As usual with alpha patches, be aware that there's a
nonzero probability for kernel panic.

Cheers,

Sam
diff -uprN aoe6-45/linux/drivers/block/aoe/aoecmd.c aoe6-45rc1/linux/drivers/block/aoe/aoecmd.c
--- aoe6-45/linux/drivers/block/aoe/aoecmd.c	2007-02-07 18:59:18.000000000 -0500
+++ aoe6-45rc1/linux/drivers/block/aoe/aoecmd.c	2007-02-08 10:49:19.000000000 -0500
@@ -203,6 +203,24 @@ gotone:				skb_shinfo(skb)->nr_frags = s
 	return NULL;
 }
 
+static void
+skb_fillup(struct sk_buff *skb, struct bio_vec *bv, ulong off, ulong cnt)
+{
+	int frag = 0;
+	ulong fcnt;
+loop:
+	fcnt = bv->bv_len - (off - bv->bv_offset);
+	if (fcnt > cnt)
+		fcnt = cnt;
+	skb_fill_page_desc(skb, frag++, bv->bv_page, off, fcnt);
+	cnt -= fcnt;
+	if (cnt <= 0)
+		return;
+	bv++;
+	off = bv->bv_offset;
+	goto loop;
+}
+
 static int
 aoecmd_ata_rw(struct aoedev *d)
 {
@@ -213,7 +231,7 @@ aoecmd_ata_rw(struct aoedev *d)
 	struct bio_vec *bv;
 	struct aoetgt *t;
 	struct sk_buff *skb;
-	ulong bcnt;
+	ulong bcnt, fbcnt;
 	char writebit, extbit;
 
 	writebit = 0x10;
@@ -228,8 +246,28 @@ aoecmd_ata_rw(struct aoedev *d)
 	bcnt = t->ifp->maxbcnt;
 	if (bcnt == 0)
 		bcnt = DEFAULTBCNT;
-	if (bcnt > buf->bv_resid)
-		bcnt = buf->bv_resid;
+	if (bcnt > buf->resid)
+		bcnt = buf->resid;
+	fbcnt = bcnt;
+	f->bv = buf->bv;
+	f->bv_off = f->bv->bv_offset + (f->bv->bv_len - buf->bv_resid);
+	do {
+		if (fbcnt < buf->bv_resid) {
+			buf->bv_resid -= fbcnt;
+			buf->resid -= fbcnt;
+			break;
+		}
+		fbcnt -= buf->bv_resid;
+		buf->resid -= buf->bv_resid;
+		if (buf->resid == 0) {
+			d->inprocess = NULL;
+			break;
+		}
+		buf->bv++;
+		buf->bv_resid = buf->bv->bv_len;
+		WARN_ON(buf->bv_resid == 0);
+	} while (fbcnt);
+
 	/* initialize the headers & frame */
 	skb = f->skb;
 	h = (struct aoe_hdr *) skb->mac.raw;
@@ -255,7 +293,7 @@ aoecmd_ata_rw(struct aoedev *d)
 		ah->lba3 |= 0xe0;	/* LBA bit + obsolete 0xa0 */
 	}
 	if (bio_data_dir(buf->bio) == WRITE) {
-		skb_fill_page_desc(skb, 0, bv->bv_page, buf->bv_off, bcnt);
+		skb_fillup(skb, f->bv, f->bv_off, bcnt);
 		ah->aflags |= AOEAFL_WRITE;
 		skb->len += bcnt;
 		skb->data_len = bcnt;
@@ -269,18 +307,7 @@ aoecmd_ata_rw(struct aoedev *d)
 
 	/* mark all tracking fields and load out */
 	buf->nframesout += 1;
-	buf->bv_off += bcnt;
-	buf->bv_resid -= bcnt;
-	buf->resid -= bcnt;
 	buf->sector += bcnt >> 9;
-	if (buf->resid == 0) {
-		d->inprocess = NULL;
-	} else if (buf->bv_resid == 0) {
-		buf->bv = ++bv;
-		buf->bv_resid = bv->bv_len;
-		WARN_ON(buf->bv_resid == 0);
-		buf->bv_off = bv->bv_offset;
-	}
 
 	skb->dev = t->ifp->nd;
 	skb = skb_clone(skb, GFP_ATOMIC);
@@ -380,12 +407,9 @@ resend(struct aoedev *d, struct aoetgt *
 		put_lba(ah, f->lba);
 
 		n = f->bcnt;
-		if (n > DEFAULTBCNT)
-			n = DEFAULTBCNT;
 		ah->scnt = n >> 9;
 		if (ah->aflags & AOEAFL_WRITE) {
-			skb_fill_page_desc(skb, 0, virt_to_page(f->bufaddr),
-				offset_in_page(f->bufaddr), n);
+			skb_fillup(skb, f->bv, f->bv_off, n);
 			skb->len = sizeof *h + sizeof *ah + n;
 			skb->data_len = n;
 		}
@@ -542,6 +566,7 @@ rexmit_timer(ulong vp)
 				ifp = NULL;
 			}
 
+/*
 			if (ATASCNT(f->skb->mac.raw) > DEFAULTBCNT / 512)
 			if (ifp && ++ifp->lostjumbo > (t->nframes << 1))
 			if (ifp->maxbcnt != DEFAULTBCNT) {
@@ -552,6 +577,7 @@ rexmit_timer(ulong vp)
 					DEFAULTBCNT);
 				ifp->maxbcnt = 0;
 			}
+*/
 			resend(d, t, f);
 		}
 
@@ -734,6 +760,42 @@ diskstats(struct gendisk *disk, struct b
 	disk_stat_add(disk, io_ticks, duration);
 }
 
+static void
+bvcpy(struct bio_vec *bv, ulong off, char *p, ulong cnt)
+{
+	ulong fcnt;
+loop:
+	fcnt = bv->bv_len - (off - bv->bv_offset);
+	if (fcnt > cnt)
+		fcnt = cnt;
+	memcpy(page_address(bv->bv_page) + off, p, fcnt);
+	cnt -= fcnt;
+	if (cnt <= 0)
+		return;
+	p += fcnt;
+	bv++;
+	off = bv->bv_offset;
+	goto loop;
+}
+
+static void
+fadvance(struct frame *f, ulong cnt)
+{
+	ulong fcnt;
+
+	f->lba += cnt >> 9;
+loop:
+	fcnt = f->bv->bv_len - (f->bv_off - f->bv->bv_offset);
+	if (fcnt > cnt) {
+		f->bv_off += cnt;
+		return;
+	}
+	cnt -= fcnt;
+	f->bv++;
+	f->bv_off = f->bv->bv_offset;
+	goto loop;
+}
+
 void
 aoecmd_ata_rsp(struct sk_buff *skb)
 {
@@ -813,7 +875,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 				spin_unlock_irqrestore(&d->lock, flags);
 				return;
 			}
-			memcpy(f->bufaddr, ahin+1, n);
+			bvcpy(f->bv, f->bv_off, (char *) (ahin+1), n);
 		case WIN_WRITE:
 		case WIN_WRITE_EXT:
 			ifp = getif(t, skb->dev);
@@ -823,8 +885,7 @@ aoecmd_ata_rsp(struct sk_buff *skb)
 					ifp->lostjumbo = 0;
 			}
 			if (f->bcnt -= n) {
-				f->lba += n >> 9;
-				f->bufaddr += n;
+				fadvance(f, n);
 				resend(d, t, f);
 				goto xmit;
 			}
diff -uprN aoe6-45/linux/drivers/block/aoe/aoe.h aoe6-45rc1/linux/drivers/block/aoe/aoe.h
--- aoe6-45/linux/drivers/block/aoe/aoe.h	2007-02-07 18:59:18.000000000 -0500
+++ aoe6-45rc1/linux/drivers/block/aoe/aoe.h	2007-02-06 17:38:59.000000000 -0500
@@ -1,5 +1,5 @@
 /* Copyright (c) 2006 Coraid, Inc.  See COPYING for GPL terms. */
-#define VERSION "45"
+#define VERSION "45-fatjumbo"
 #define AOE_MAJOR 152
 #define DEVICE_NAME "aoe"
 
@@ -119,6 +119,8 @@ struct frame {
 	ulong bcnt;
 	sector_t lba;
 	struct sk_buff *skb;
+	struct bio_vec *bv;
+	ulong bv_off;
 };
 
 struct aoeif {
 
-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
Aoetools-discuss mailing list
Aoetools-discuss@xxxxxxxxxxxxxxxxxxxxx
https://lists.sourceforge.net/lists/listinfo/aoetools-discuss


This mailing list archive is a service of Copilotco.