devaoe.c - vx32 - Local 9vx git repository for patches.
(HTM) git clone git://r-36.net/vx32
(DIR) Log
(DIR) Files
(DIR) Refs
---
devaoe.c (43271B)
---
1 /*
2 * © 2005-8 coraid
3 * aoe storage initiator
4 */
5
6 #include "u.h"
7 #include "lib.h"
8 #include "mem.h"
9 #include "dat.h"
10 #include "fns.h"
11 #include "io.h"
12 #include "ureg.h"
13 #include "error.h"
14 #include "netif.h"
15 #include "etherif.h"
16 #include "ip/ip.h"
17 #include "aoe.h"
18
19 #define WAKEUP(x) wakeup(&((x)->rend))
20 #define SLEEP(a,b,c) sleep(&(a->rend), b, c)
21
22 //#pragma varargck argpos eventlog 1
23
24 #define dprint(...) if(debug) eventlog(__VA_ARGS__); else USED(debug);
25 #define uprint(...) snprint(up->genbuf, sizeof up->genbuf, __VA_ARGS__);
26
27 enum {
28 Maxunits = 0xff,
29 Maxframes = 128,
30 Maxmtu = 100000,
31 Ndevlink = 6,
32 Nea = 6,
33 Nnetlink = 6,
34 };
35
36 #define TYPE(q) ((ulong)(q).path & 0xf)
37 #define UNIT(q) (((ulong)(q).path>>4) & 0xff)
38 #define L(q) (((ulong)(q).path>>12) & 0xf)
39 #define QID(u, t) ((u)<<4 | (t))
40 #define Q3(l, u, t) ((l)<<8 | QID(u, t))
41 #define UP(d) ((d)->flag & Dup)
42
43 #define Ticks msec()
44 #define Ms2tk(t) (((t)*HZ)/1000)
45 #define Tk2ms(t) (((t)*1000)/HZ)
46
47 enum {
48 Qzero,
49 Qtopdir = 1,
50 Qtopbase,
51 Qtopctl = Qtopbase,
52 Qtoplog,
53 Qtopend,
54
55 Qunitdir,
56 Qunitbase,
57 Qctl = Qunitbase,
58 Qdata,
59 Qconfig,
60 Qident,
61
62 Qdevlinkdir,
63 Qdevlinkbase,
64 Qdevlink = Qdevlinkbase,
65 Qdevlinkend,
66
67 Qtopfiles = Qtopend-Qtopbase,
68 Qdevlinkfiles = Qdevlinkend-Qdevlinkbase,
69
70 Eventlen = 256,
71 Nevents = 64,
72
73 Fread = 0,
74 Fwrite,
75 Tfree = -1,
76 Tmgmt,
77
78 /* round trip bounds, timeouts, in ticks */
79 Rtmax = Ms2tk(320),
80 Rtmin = Ms2tk(20),
81 Srbtimeout = 45*HZ,
82
83 Dbcnt = 1024,
84
85 Crd = 0x20,
86 Crdext = 0x24,
87 Cwr = 0x30,
88 Cwrext = 0x34,
89 Cid = 0xec,
90 };
91
92 enum {
93 Read,
94 Write,
95 };
96
97 /*
98 * unified set of flags
99 * a Netlink + Aoedev most both be jumbo capable
100 * to send jumbograms to that interface.
101 */
102 enum {
103 /* sync with ahci.h */
104 Dllba = 1<<0,
105 Dsmart = 1<<1,
106 Dpower = 1<<2,
107 Dnop = 1<<3,
108 Datapi = 1<<4,
109 Datapi16= 1<<5,
110
111 /* aoe specific */
112 Dup = 1<<6,
113 Djumbo = 1<<7,
114 };
115
116 static char *flagname[] = {
117 "llba",
118 "smart",
119 "power",
120 "nop",
121 "atapi",
122 "atapi16",
123
124 "up",
125 "jumbo",
126 };
127
128 typedef struct {
129 uchar flag;
130 uchar lostjumbo;
131 int datamtu;
132
133 Chan *cc;
134 Chan *dc;
135 Chan *mtu; /* open early to prevent bind issues. */
136 char path[Maxpath];
137 uchar ea[Eaddrlen];
138 } Netlink;
139
140 typedef struct {
141 Netlink *nl;
142 int nea;
143 ulong eaidx;
144 uchar eatab[Nea][Eaddrlen];
145 int datamtu;
146 ulong npkt;
147 ulong resent;
148 uchar flag;
149
150 ulong rttavg;
151 ulong mintimer;
152 } Devlink;
153
154 typedef struct Srb Srb;
155 struct Srb {
156 Rendez rend;
157 Srb *next;
158 ulong ticksent;
159 ulong len;
160 vlong sector;
161 short write;
162 short nout;
163 char *error;
164 void *dp;
165 void *data;
166 };
167
168 typedef struct {
169 int tag;
170 ulong bcnt;
171 ulong dlen;
172 vlong lba;
173 ulong ticksent;
174 int nhdr;
175 uchar hdr[ETHERMINTU];
176 void *dp;
177 Devlink *dl;
178 Netlink *nl;
179 int eaidx;
180 Srb *srb;
181 } Frame;
182
183 typedef struct Aoedev Aoedev;
184 struct Aoedev {
185 QLock qlock;
186 Aoedev *next;
187
188 ulong vers;
189
190 int ndl;
191 ulong dlidx;
192 Devlink *dl;
193 Devlink dltab[Ndevlink];
194
195 ushort fwver;
196 uchar flag;
197 int nopen;
198 int major;
199 int minor;
200 int unit;
201 int lasttag;
202 int nframes;
203 Frame *frames;
204 vlong bsize;
205 vlong realbsize;
206
207 uint maxbcnt;
208 uint maxmtu;
209 ulong lostjumbo;
210 ushort nout;
211 ushort maxout;
212 ulong lastwadj;
213 Srb *head;
214 Srb *tail;
215 Srb *inprocess;
216
217 char serial[20+1];
218 char firmware[8+1];
219 char model[40+1];
220 int nconfig;
221 uchar config[1024];
222 uchar ident[512];
223 };
224
225 //#pragma varargck type "æ" Aoedev*
226
227 static struct {
228 Lock lk;
229 QLock qlock;
230 Rendez rend;
231 char buf[Eventlen*Nevents];
232 char *rp;
233 char *wp;
234 } events;
235
236 static struct {
237 RWlock rwlock;
238 int nd;
239 Aoedev *d;
240 } devs;
241
242 static struct {
243 Lock lk;
244 int reader[Nnetlink]; /* reader is running. */
245 Rendez rendez[Nnetlink]; /* confirm exit. */
246 Netlink nl[Nnetlink];
247 } netlinks;
248
249 extern Dev aoedevtab;
250 static Ref units;
251 static Ref drivevers;
252 static int debug;
253 static int autodiscover = 1;
254 static int rediscover;
255 char Enotup[] = "aoe device is down";
256 char Echange[] = "media or partition has changed";
257
258 static Srb*
259 srballoc(ulong sz)
260 {
261 Srb *srb;
262
263 srb = malloc(sizeof *srb+sz);
264 srb->dp = srb->data = srb+1;
265 srb->ticksent = Ticks;
266 return srb;
267 }
268
269 static Srb*
270 srbkalloc(void *db, ulong dummy)
271 {
272 Srb *srb;
273
274 srb = malloc(sizeof *srb);
275 srb->dp = srb->data = db;
276 srb->ticksent = Ticks;
277 return srb;
278 }
279
280 #define srbfree(srb) free(srb)
281
282 static void
283 srberror(Srb *srb, char *s)
284 {
285 srb->error = s;
286 srb->nout--;
287 WAKEUP(srb);
288 }
289
290 static void
291 frameerror(Aoedev *d, Frame *f, char *s)
292 {
293 Srb *srb;
294
295 srb = f->srb;
296 if(f->tag == Tfree)
297 return;
298 f->srb = nil;
299 f->tag = Tfree; /* don't get fooled by way-slow responses */
300 if(!srb)
301 return;
302 srberror(srb, s);
303 d->nout--;
304 }
305
306 static char*
307 unitname(Aoedev *d)
308 {
309 uprint("%d.%d", d->major, d->minor);
310 return up->genbuf;
311 }
312
313 static long
314 eventlogread(void *a, long n)
315 {
316 int len;
317 char *p, *buf;
318
319 buf = smalloc(Eventlen);
320 QLOCK(&events);
321 LOCK(&events);
322 p = events.rp;
323 len = *p;
324 if(len == 0){
325 n = 0;
326 UNLOCK(&events);
327 } else {
328 if(n > len)
329 n = len;
330 /* can't move directly into pageable space with events lock held */
331 memmove(buf, p+1, n);
332 *p = 0;
333 events.rp = p += Eventlen;
334 if(p >= events.buf + sizeof events.buf)
335 events.rp = events.buf;
336 UNLOCK(&events);
337
338 /* the concern here is page faults in memmove below */
339 if(waserror()){
340 free(buf);
341 QUNLOCK(&events);
342 nexterror();
343 }
344 memmove(a, buf, n);
345 poperror();
346 }
347 free(buf);
348 QUNLOCK(&events);
349 return n;
350 }
351
352 static int
353 eventlog(char *fmt, ...)
354 {
355 int dragrp, n;
356 char *p;
357 va_list arg;
358
359 LOCK(&events);
360 p = events.wp;
361 dragrp = *p++;
362 va_start(arg, fmt);
363 n = vsnprint(p, Eventlen-1, fmt, arg);
364 *--p = n;
365 p = events.wp += Eventlen;
366 if(p >= events.buf + sizeof events.buf)
367 p = events.wp = events.buf;
368 if(dragrp)
369 events.rp = p;
370 UNLOCK(&events);
371 WAKEUP(&events);
372 return n;
373 }
374
375 static int
376 eventcount(void)
377 {
378 int n;
379
380 LOCK(&events);
381 if(*events.rp == 0)
382 n = 0;
383 else if(events.wp < events.rp)
384 n = Nevents - (events.rp - events.wp);
385 else
386 n = events.wp - events.rp;
387 UNLOCK(&events);
388 return n/Eventlen;
389 }
390
391 static int
392 tsince(int tag)
393 {
394 int n;
395
396 n = Ticks & 0xffff;
397 n -= tag & 0xffff;
398 if(n < 0)
399 n += 1<<16;
400 return n;
401 }
402
403 static int
404 newtag(Aoedev *d)
405 {
406 int t;
407
408 do {
409 t = ++d->lasttag << 16;
410 t |= Ticks & 0xffff;
411 } while (t == Tfree || t == Tmgmt);
412 return t;
413 }
414
415 static void
416 downdev(Aoedev *d, char *err)
417 {
418 Frame *f, *e;
419
420 d->flag &= ~Dup;
421 f = d->frames;
422 e = f + d->nframes;
423 for(; f < e; f->tag = Tfree, f->srb = nil, f++)
424 frameerror(d, f, Enotup);
425 d->inprocess = nil;
426 eventlog("%æ: removed; %s\n", d, err);
427 }
428
429 static Block*
430 allocfb(Frame *f)
431 {
432 int len;
433 Block *b;
434
435 len = f->nhdr + f->dlen;
436 if(len < ETHERMINTU)
437 len = ETHERMINTU;
438 b = allocb(len);
439 memmove(b->wp, f->hdr, f->nhdr);
440 if(f->dlen)
441 memmove(b->wp + f->nhdr, f->dp, f->dlen);
442 b->wp += len;
443 return b;
444 }
445
446 static void
447 putlba(Aoeata *a, vlong lba)
448 {
449 uchar *c;
450
451 c = a->lba;
452 c[0] = lba;
453 c[1] = lba >> 8;
454 c[2] = lba >> 16;
455 c[3] = lba >> 24;
456 c[4] = lba >> 32;
457 c[5] = lba >> 40;
458 }
459
460 static Devlink*
461 pickdevlink(Aoedev *d)
462 {
463 ulong i, n;
464 Devlink *l;
465
466 for(i = 0; i < d->ndl; i++){
467 n = d->dlidx++ % d->ndl;
468 l = d->dl + n;
469 if(l && l->flag & Dup)
470 return l;
471 }
472 return 0;
473 }
474
475 static int
476 pickea(Devlink *l)
477 {
478 if(l == 0)
479 return -1;
480 if(l->nea == 0)
481 return -1;
482 return l->eaidx++ % l->nea;
483 }
484
485 static int
486 hset(Aoedev *d, Frame *f, Aoehdr *h, int cmd)
487 {
488 int i;
489 Devlink *l;
490
491 if(f->srb)
492 if((long)(Ticks-f->srb->ticksent) > Srbtimeout){
493 eventlog("%æ: srb timeout\n", d);
494 frameerror(d, f, Etimedout);
495 return -1;
496 }
497 l = pickdevlink(d);
498 i = pickea(l);
499 if(i == -1){
500 downdev(d, "resend fails; no netlink/ea");
501 return -1;
502 }
503 memmove(h->dst, l->eatab[i], Eaddrlen);
504 memmove(h->src, l->nl->ea, sizeof h->src);
505 hnputs(h->type, Aoetype);
506 h->verflag = Aoever << 4;
507 h->error = 0;
508 hnputs(h->major, d->major);
509 h->minor = d->minor;
510 h->cmd = cmd;
511
512 hnputl(h->tag, f->tag = newtag(d));
513 f->dl = l;
514 f->nl = l->nl;
515 f->eaidx = i;
516 f->ticksent = Ticks;
517
518 return f->tag;
519 }
520
521 static int
522 resend(Aoedev *d, Frame *f)
523 {
524 ulong n;
525 Aoeata *a;
526
527 a = (Aoeata*)f->hdr;
528 if(hset(d, f, (Aoehdr*)a, a->cmd) == -1)
529 return -1;
530 n = f->bcnt;
531 if(n > d->maxbcnt){
532 n = d->maxbcnt; /* mtu mismatch (jumbo fail?) */
533 if(f->dlen > n)
534 f->dlen = n;
535 }
536 a->scnt = n / Aoesectsz;
537 f->dl->resent++;
538 f->dl->npkt++;
539 if(waserror())
540 /* should remove the netlink */
541 return -1;
542 devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
543 poperror();
544 return 0;
545 }
546
547 static void
548 discover(int major, int minor)
549 {
550 Aoehdr *h;
551 Block *b;
552 Netlink *nl, *e;
553
554 nl = netlinks.nl;
555 e = nl + nelem(netlinks.nl);
556 for(; nl < e; nl++){
557 if(nl->cc == nil)
558 continue;
559 b = allocb(ETHERMINTU);
560 if(waserror()){
561 freeb(b);
562 nexterror();
563 }
564 b->wp = b->rp + ETHERMINTU;
565 memset(b->rp, 0, ETHERMINTU);
566 h = (Aoehdr*)b->rp;
567 memset(h->dst, 0xff, sizeof h->dst);
568 memmove(h->src, nl->ea, sizeof h->src);
569 hnputs(h->type, Aoetype);
570 h->verflag = Aoever << 4;
571 hnputs(h->major, major);
572 h->minor = minor;
573 h->cmd = ACconfig;
574 poperror();
575 devtab[nl->dc->type]->bwrite(nl->dc, b, 0);
576 }
577 }
578
579 /*
580 * Check all frames on device and resend any frames that have been
581 * outstanding for 200% of the device round trip time average.
582 */
583 static void
584 aoesweepproc(void *dummy)
585 {
586 ulong i, tx, timeout, nbc;
587 vlong starttick;
588 enum { Nms = 100, Nbcms = 30*1000, };
589 uchar *ea;
590 Aoeata *a;
591 Aoedev *d;
592 Devlink *l;
593 Frame *f, *e;
594
595 nbc = Nbcms/Nms;
596 loop:
597 if(nbc-- == 0){
598 if(rediscover && !waserror()){
599 discover(0xffff, 0xff);
600 poperror();
601 }
602 nbc = Nbcms/Nms;
603 }
604 starttick = Ticks;
605 RLOCK(&devs);
606 for(d = devs.d; d; d = d->next){
607 if(!CANQLOCK(d))
608 continue;
609 if(!UP(d)){
610 QUNLOCK(d);
611 continue;
612 }
613 tx = 0;
614 f = d->frames;
615 e = f + d->nframes;
616 for (; f < e; f++){
617 if(f->tag == Tfree)
618 continue;
619 l = f->dl;
620 timeout = l->rttavg << 1;
621 i = tsince(f->tag);
622 if(i < timeout)
623 continue;
624 if(d->nout == d->maxout){
625 if(d->maxout > 1)
626 d->maxout--;
627 d->lastwadj = Ticks;
628 }
629 a = (Aoeata*)f->hdr;
630 if(a->scnt > Dbcnt / Aoesectsz &&
631 ++f->nl->lostjumbo > (d->nframes << 1)){
632 ea = f->dl->eatab[f->eaidx];
633 eventlog("%æ: jumbo failure on %s:%E; lba%lld\n",
634 d, f->nl->path, ea, f->lba);
635 d->maxbcnt = Dbcnt;
636 d->flag &= ~Djumbo;
637 }
638 resend(d, f);
639 if(tx++ == 0){
640 if((l->rttavg <<= 1) > Rtmax)
641 l->rttavg = Rtmax;
642 eventlog("%æ: rtt %ldms\n", d, Tk2ms(l->rttavg));
643 }
644 }
645 if(d->nout == d->maxout && d->maxout < d->nframes &&
646 TK2MS(Ticks-d->lastwadj) > 10*1000){
647 d->maxout++;
648 d->lastwadj = Ticks;
649 }
650 QUNLOCK(d);
651 }
652 RUNLOCK(&devs);
653 i = Nms - TK2MS(Ticks - starttick);
654 if(i > 0)
655 tsleep(&up->sleep, return0, 0, i);
656 goto loop;
657 }
658
659 static int
660 fmtaoe(Fmt *f)
661 {
662 char buf[16];
663 Aoedev *d;
664
665 d = va_arg(f->args, Aoedev*);
666 snprint(buf, sizeof buf, "aoe%d.%d", d->major, d->minor);
667 return fmtstrcpy(f, buf);
668 }
669
670 static void netbind(char *path);
671
672 static void
673 aoecfg(void)
674 {
675 int n, i;
676 char *p, *f[32], buf[24];
677
678 if(1)
679 // if((p = getconf("aoeif")) == nil || (n = tokenize(p, f, nelem(f))) < 1)
680 return;
681 /* goo! */
682 for(i = 0; i < n; i++){
683 p = f[i];
684 if(strncmp(p, "ether", 5) == 0)
685 snprint(buf, sizeof buf, "#l%c/ether%c", p[5], p[5]);
686 else if(strncmp(p, "#l", 2) == 0)
687 snprint(buf, sizeof buf, "#l%c/ether%c", p[2], p[2]);
688 else
689 continue;
690 if(!waserror()){
691 netbind(buf);
692 poperror();
693 }
694 }
695 }
696
697 static void
698 aoeinit(void)
699 {
700 static int init;
701 static QLock l;
702
703 if(!canqlock(&l))
704 return;
705 if(init == 0){
706 fmtinstall(L'æ', fmtaoe);
707 events.rp = events.wp = events.buf;
708 kproc("aoesweep", aoesweepproc, nil);
709 aoecfg();
710 init = 1;
711 }
712 qunlock(&l);
713 }
714
715 static Chan*
716 aoeattach(char *spec)
717 {
718 Chan *c;
719
720 if(*spec)
721 error(Enonexist);
722 aoeinit();
723 c = devattach(L'æ', spec);
724 mkqid(&c->qid, Qzero, 0, QTDIR);
725 return c;
726 }
727
728 static Aoedev*
729 unitseq(ulong unit)
730 {
731 int i;
732 Aoedev *d;
733
734 i = 0;
735 RLOCK(&devs);
736 for(d = devs.d; d; d = d->next)
737 if(i++ == unit)
738 break;
739 RUNLOCK(&devs);
740 return d;
741 }
742
743 static Aoedev*
744 unit2dev(ulong unit)
745 {
746 Aoedev *d;
747
748 RLOCK(&devs);
749 for(d = devs.d; d; d = d->next)
750 if(d->unit == unit){
751 RUNLOCK(&devs);
752 return d;
753 }
754 RUNLOCK(&devs);
755 error("unit lookup failure");
756 return nil;
757 }
758
759 static int
760 unitgen(Chan *c, ulong type, Dir *dp)
761 {
762 int perm, t;
763 ulong vers;
764 vlong size;
765 char *p;
766 Aoedev *d;
767 Qid q;
768
769 d = unit2dev(UNIT(c->qid));
770 perm = 0644;
771 size = 0;
772 vers = d->vers;
773 t = QTFILE;
774
775 switch(type){
776 default:
777 return -1;
778 case Qctl:
779 p = "ctl";
780 break;
781 case Qdata:
782 p = "data";
783 perm = 0640;
784 if(UP(d))
785 size = d->bsize;
786 break;
787 case Qconfig:
788 p = "config";
789 if(UP(d))
790 size = d->nconfig;
791 break;
792 case Qident:
793 p = "ident";
794 if(UP(d))
795 size = sizeof d->ident;
796 break;
797 case Qdevlinkdir:
798 p = "devlink";
799 t = QTDIR;
800 perm = 0555;
801 break;
802 }
803 mkqid(&q, QID(UNIT(c->qid), type), vers, t);
804 devdir(c, q, p, size, eve, perm, dp);
805 return 1;
806 }
807
808 static int
809 topgen(Chan *c, ulong type, Dir *d)
810 {
811 int perm;
812 vlong size;
813 char *p;
814 Qid q;
815
816 perm = 0444;
817 size = 0;
818 switch(type){
819 default:
820 return -1;
821 case Qtopctl:
822 p = "ctl";
823 perm = 0644;
824 break;
825 case Qtoplog:
826 p = "log";
827 size = eventcount();
828 break;
829 }
830 mkqid(&q, type, 0, QTFILE);
831 devdir(c, q, p, size, eve, perm, d);
832 return 1;
833 }
834
835 static int
836 aoegen(Chan *c, char *d0, Dirtab *d1, int d2, int s, Dir *dp)
837 {
838 int i;
839 Aoedev *d;
840 Qid q;
841
842 if(c->qid.path == 0){
843 switch(s){
844 case DEVDOTDOT:
845 q.path = 0;
846 q.type = QTDIR;
847 devdir(c, q, "#æ", 0, eve, 0555, dp);
848 break;
849 case 0:
850 q.path = Qtopdir;
851 q.type = QTDIR;
852 devdir(c, q, "aoe", 0, eve, 0555, dp);
853 break;
854 default:
855 return -1;
856 }
857 return 1;
858 }
859
860 switch(TYPE(c->qid)){
861 default:
862 return -1;
863 case Qtopdir:
864 if(s == DEVDOTDOT){
865 mkqid(&q, Qzero, 0, QTDIR);
866 devdir(c, q, "aoe", 0, eve, 0555, dp);
867 return 1;
868 }
869 if(s < Qtopfiles)
870 return topgen(c, Qtopbase + s, dp);
871 s -= Qtopfiles;
872 if((d = unitseq(s)) == 0)
873 return -1;
874 mkqid(&q, QID(d->unit, Qunitdir), 0, QTDIR);
875 devdir(c, q, unitname(d), 0, eve, 0555, dp);
876 return 1;
877 case Qtopctl:
878 case Qtoplog:
879 return topgen(c, TYPE(c->qid), dp);
880 case Qunitdir:
881 if(s == DEVDOTDOT){
882 mkqid(&q, QID(0, Qtopdir), 0, QTDIR);
883 uprint("%uld", UNIT(c->qid));
884 devdir(c, q, up->genbuf, 0, eve, 0555, dp);
885 return 1;
886 }
887 return unitgen(c, Qunitbase+s, dp);
888 case Qctl:
889 case Qdata:
890 case Qconfig:
891 case Qident:
892 return unitgen(c, TYPE(c->qid), dp);
893 case Qdevlinkdir:
894 i = UNIT(c->qid);
895 if(s == DEVDOTDOT){
896 mkqid(&q, QID(i, Qunitdir), 0, QTDIR);
897 devdir(c, q, "devlink", 0, eve, 0555, dp);
898 return 1;
899 }
900 if(i >= units.ref)
901 return -1;
902 d = unit2dev(i);
903 if(s >= d->ndl)
904 return -1;
905 uprint("%d", s);
906 mkqid(&q, Q3(s, i, Qdevlink), 0, QTFILE);
907 devdir(c, q, up->genbuf, 0, eve, 0755, dp);
908 return 1;
909 case Qdevlink:
910 uprint("%d", s);
911 mkqid(&q, Q3(s, UNIT(c->qid), Qdevlink), 0, QTFILE);
912 devdir(c, q, up->genbuf, 0, eve, 0755, dp);
913 return 1;
914 }
915 }
916
917 static Walkqid*
918 aoewalk(Chan *c, Chan *nc, char **name, int nname)
919 {
920 return devwalk(c, nc, name, nname, nil, 0, aoegen);
921 }
922
923 static int
924 aoestat(Chan *c, uchar *db, int n)
925 {
926 return devstat(c, db, n, nil, 0, aoegen);
927 }
928
929 static Chan*
930 aoeopen(Chan *c, int omode)
931 {
932 Aoedev *d;
933
934 if(TYPE(c->qid) != Qdata)
935 return devopen(c, omode, 0, 0, aoegen);
936
937 d = unit2dev(UNIT(c->qid));
938 QLOCK(d);
939 if(waserror()){
940 QUNLOCK(d);
941 nexterror();
942 }
943 if(!UP(d))
944 error(Enotup);
945 c = devopen(c, omode, 0, 0, aoegen);
946 d->nopen++;
947 poperror();
948 QUNLOCK(d);
949 return c;
950 }
951
952 static void
953 aoeclose(Chan *c)
954 {
955 Aoedev *d;
956
957 if(TYPE(c->qid) != Qdata || (c->flag&COPEN) == 0)
958 return;
959
960 d = unit2dev(UNIT(c->qid));
961 QLOCK(d);
962 if(--d->nopen == 0 && !waserror()){
963 discover(d->major, d->minor);
964 poperror();
965 }
966 QUNLOCK(d);
967 }
968
969 static void
970 atarw(Aoedev *d, Frame *f)
971 {
972 ulong bcnt;
973 char extbit, writebit;
974 Aoeata *ah;
975 Srb *srb;
976
977 extbit = 0x4;
978 writebit = 0x10;
979
980 srb = d->inprocess;
981 bcnt = d->maxbcnt;
982 if(bcnt > srb->len)
983 bcnt = srb->len;
984 f->nhdr = Szaoeata;
985 memset(f->hdr, 0, f->nhdr);
986 ah = (Aoeata*)f->hdr;
987 if(hset(d, f, (Aoehdr*)ah, ACata) == -1)
988 return;
989 f->dp = srb->dp;
990 f->bcnt = bcnt;
991 f->lba = srb->sector;
992 f->srb = srb;
993
994 ah->scnt = bcnt / Aoesectsz;
995 putlba(ah, f->lba);
996 if(d->flag & Dllba)
997 ah->aflag |= AAFext;
998 else {
999 extbit = 0;
1000 ah->lba[3] &= 0x0f;
1001 ah->lba[3] |= 0xe0; /* LBA bit+obsolete 0xa0 */
1002 }
1003 if(srb->write){
1004 ah->aflag |= AAFwrite;
1005 f->dlen = bcnt;
1006 }else{
1007 writebit = 0;
1008 f->dlen = 0;
1009 }
1010 ah->cmdstat = 0x20 | writebit | extbit;
1011
1012 /* mark tracking fields and load out */
1013 srb->nout++;
1014 srb->dp = (uchar*)srb->dp + bcnt;
1015 srb->len -= bcnt;
1016 srb->sector += bcnt / Aoesectsz;
1017 if(srb->len == 0)
1018 d->inprocess = nil;
1019 d->nout++;
1020 f->dl->npkt++;
1021 if(waserror()){
1022 f->tag = Tfree;
1023 d->inprocess = nil;
1024 nexterror();
1025 }
1026 devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
1027 poperror();
1028 }
1029
1030 static char*
1031 aoeerror(Aoehdr *h)
1032 {
1033 int n;
1034 static char *errs[] = {
1035 "aoe protocol error: unknown",
1036 "aoe protocol error: bad command code",
1037 "aoe protocol error: bad argument param",
1038 "aoe protocol error: device unavailable",
1039 "aoe protocol error: config string present",
1040 "aoe protocol error: unsupported version",
1041 };
1042
1043 if((h->verflag & AFerr) == 0)
1044 return 0;
1045 n = h->error;
1046 if(n > nelem(errs))
1047 n = 0;
1048 return errs[n];
1049 }
1050
1051 static void
1052 rtupdate(Devlink *l, int rtt)
1053 {
1054 int n;
1055
1056 n = rtt;
1057 if(rtt < 0){
1058 n = -rtt;
1059 if(n < Rtmin)
1060 n = Rtmin;
1061 else if(n > Rtmax)
1062 n = Rtmax;
1063 l->mintimer += (n - l->mintimer) >> 1;
1064 } else if(n < l->mintimer)
1065 n = l->mintimer;
1066 else if(n > Rtmax)
1067 n = Rtmax;
1068
1069 /* g == .25; cf. Congestion Avoidance and Control, Jacobson&Karels; 1988 */
1070 n -= l->rttavg;
1071 l->rttavg += n >> 2;
1072 }
1073
1074 static int
1075 srbready(void *v)
1076 {
1077 Srb *s;
1078
1079 s = v;
1080 return s->error || (!s->nout && !s->len);
1081 }
1082
1083 static Frame*
1084 getframe(Aoedev *d, int tag)
1085 {
1086 Frame *f, *e;
1087
1088 f = d->frames;
1089 e = f + d->nframes;
1090 for(; f < e; f++)
1091 if(f->tag == tag)
1092 return f;
1093 return nil;
1094 }
1095
1096 static Frame*
1097 freeframe(Aoedev *d)
1098 {
1099 if(d->nout < d->maxout)
1100 return getframe(d, Tfree);
1101 return nil;
1102 }
1103
1104 static void
1105 work(Aoedev *d)
1106 {
1107 Frame *f;
1108
1109 while(f = freeframe(d)) {
1110 if(d->inprocess == nil){
1111 if(d->head == nil)
1112 return;
1113 d->inprocess = d->head;
1114 d->head = d->head->next;
1115 if(d->head == nil)
1116 d->tail = nil;
1117 }
1118 atarw(d, f);
1119 }
1120 }
1121
1122 static void
1123 strategy(Aoedev *d, Srb *srb)
1124 {
1125 QLOCK(d);
1126 if(waserror()){
1127 QUNLOCK(d);
1128 nexterror();
1129 }
1130 srb->next = nil;
1131 if(d->tail)
1132 d->tail->next = srb;
1133 d->tail = srb;
1134 if(d->head == nil)
1135 d->head = srb;
1136 work(d);
1137 poperror();
1138 QUNLOCK(d);
1139
1140 while(waserror())
1141 ;
1142 SLEEP(srb, srbready, srb);
1143 poperror();
1144 }
1145
1146 #define iskaddr(a) (!up || (uintptr)(a) > up->pmmu.uzero+USTKTOP)
1147
1148 static long
1149 rw(Aoedev *d, int write, uchar *db, long len, uvlong off)
1150 {
1151 long n, nlen, copy;
1152 enum { Srbsz = 1<<19, };
1153 Srb *srb;
1154
1155 if((off|len) & (Aoesectsz-1))
1156 error("offset and length must be sector multiple.\n");
1157 if(off >= d->bsize)
1158 return 0;
1159 if(off + len > d->bsize)
1160 len = d->bsize - off;
1161 copy = 0;
1162 if(iskaddr(db)){
1163 panic("iskaddr %p %p\n", db);
1164 srb = srbkalloc(db, len);
1165 copy = 1;
1166 }else
1167 srb = srballoc(Srbsz <= len? Srbsz: len);
1168 if(waserror()){
1169 srbfree(srb);
1170 nexterror();
1171 }
1172 srb->write = write;
1173 for(nlen = len; nlen; nlen -= n){
1174 if(!UP(d))
1175 error(Eio);
1176 srb->sector = off / Aoesectsz;
1177 srb->dp = srb->data;
1178 n = nlen;
1179 if(n > Srbsz)
1180 n = Srbsz;
1181 srb->len = n;
1182 if(write && !copy)
1183 memmove(srb->data, db, n);
1184 strategy(d, srb);
1185 if(srb->error)
1186 error(srb->error);
1187 if(!write && !copy)
1188 memmove(db, srb->data, n);
1189 db += n;
1190 off += n;
1191 }
1192 poperror();
1193 srbfree(srb);
1194 return len;
1195 }
1196
1197 static long
1198 readmem(ulong off, void *dst, long n, void *src, long size)
1199 {
1200 if(off >= size)
1201 return 0;
1202 if(off + n > size)
1203 n = size - off;
1204 memmove(dst, (uchar*)src + off, n);
1205 return n;
1206 }
1207
1208 static char*
1209 pflag(char *s, char *e, uchar f)
1210 {
1211 uchar i;
1212
1213 for(i = 0; i < nelem(flagname); i++)
1214 if(f & 1 << i)
1215 s = seprint(s, e, "%s ", flagname[i]);
1216 return seprint(s, e, "\n");
1217 }
1218
1219 static int
1220 pstat(Aoedev *d, char *db, int len, int off)
1221 {
1222 int i;
1223 char *state, *s, *p, *e;
1224
1225 s = p = malloc(1024);
1226 e = p + 1024;
1227
1228 state = "down";
1229 if(UP(d))
1230 state = "up";
1231
1232 p = seprint(p, e,
1233 "state: %s\n" "nopen: %d\n" "nout: %d\n"
1234 "nmaxout: %d\n" "nframes: %d\n" "maxbcnt: %d [maxmtu %d]\n"
1235 "fw: %.4ux\n"
1236 "model: %s\n" "serial: %s\n" "firmware: %s\n",
1237 state, d->nopen, d->nout,
1238 d->maxout, d->nframes, d->maxbcnt, d->maxmtu,
1239 d->fwver,
1240 d->model, d->serial, d->firmware);
1241 p = seprint(p, e, "flag: ");
1242 p = pflag(p, e, d->flag);
1243
1244 if(p - s < len)
1245 len = p - s;
1246 i = readstr(off, db, len, s);
1247 free(s);
1248 return i;
1249 }
1250
1251 static long
1252 unitread(Chan *c, void *db, long len, vlong off)
1253 {
1254 Aoedev *d;
1255
1256 d = unit2dev(UNIT(c->qid));
1257 if(d->vers != c->qid.vers)
1258 error(Echange);
1259 switch(TYPE(c->qid)){
1260 default:
1261 error(Ebadarg);
1262 case Qctl:
1263 return pstat(d, db, len, off);
1264 case Qdata:
1265 return rw(d, Read, db, len, off);
1266 case Qconfig:
1267 if(!UP(d))
1268 error(Enotup);
1269 return readmem(off, db, len, d->config, d->nconfig);
1270 case Qident:
1271 if(!UP(d))
1272 error(Enotup);
1273 return readmem(off, db, len, d->ident, sizeof d->ident);
1274 }
1275 }
1276
1277 static int
1278 devlinkread(Chan *c, void *db, int len, int off)
1279 {
1280 int i;
1281 char *s, *p, *e;
1282 Aoedev *d;
1283 Devlink *l;
1284
1285 d = unit2dev(UNIT(c->qid));
1286 i = L(c->qid);
1287 if(i >= d->ndl)
1288 return 0;
1289 l = d->dl + i;
1290
1291 s = p = malloc(1024);
1292 e = s + 1024;
1293
1294 p = seprint(p, e, "addr: ");
1295 for(i = 0; i < l->nea; i++)
1296 p = seprint(p, e, "%E ", l->eatab[i]);
1297 p = seprint(p, e, "\n");
1298 p = seprint(p, e, "npkt: %uld\n", l->npkt);
1299 p = seprint(p, e, "resent: %uld\n", l->resent);
1300 p = seprint(p, e, "flag: "); p = pflag(p, e, l->flag);
1301 p = seprint(p, e, "rttavg: %uld\n", Tk2ms(l->rttavg));
1302 p = seprint(p, e, "mintimer: %uld\n", Tk2ms(l->mintimer));
1303
1304 p = seprint(p, e, "nl path: %s\n", l->nl->path);
1305 p = seprint(p, e, "nl ea: %E\n", l->nl->ea);
1306 p = seprint(p, e, "nl flag: "); p = pflag(p, e, l->flag);
1307 p = seprint(p, e, "nl lostjumbo: %d\n", l->nl->lostjumbo);
1308 p = seprint(p, e, "nl datamtu: %d\n", l->nl->datamtu);
1309
1310 if(p - s < len)
1311 len = p - s;
1312 i = readstr(off, db, len, s);
1313 free(s);
1314 return i;
1315 }
1316
1317 static long
1318 topctlread(Chan *d0, void *db, int len, int off)
1319 {
1320 int i;
1321 char *s, *p, *e;
1322 Netlink *n;
1323
1324 s = p = malloc(1024);
1325 e = s + 1024;
1326
1327 p = seprint(p, e, "debug: %d\n", debug);
1328 p = seprint(p, e, "autodiscover: %d\n", autodiscover);
1329 p = seprint(p, e, "rediscover: %d\n", rediscover);
1330
1331 for(i = 0; i < Nnetlink; i++){
1332 n = netlinks.nl+i;
1333 if(n->cc == 0)
1334 continue;
1335 p = seprint(p, e, "if%d path: %s\n", i, n->path);
1336 p = seprint(p, e, "if%d ea: %E\n", i, n->ea);
1337 p = seprint(p, e, "if%d flag: ", i); p = pflag(p, e, n->flag);
1338 p = seprint(p, e, "if%d lostjumbo: %d\n", i, n->lostjumbo);
1339 p = seprint(p, e, "if%d datamtu: %d\n", i, n->datamtu);
1340 }
1341
1342 if(p - s < len)
1343 len = p - s;
1344 i = readstr(off, db, len, s);
1345 free(s);
1346 return i;
1347 }
1348
1349 static long
1350 aoeread(Chan *c, void *db, long n, vlong off)
1351 {
1352 switch(TYPE(c->qid)){
1353 default:
1354 error(Eperm);
1355 case Qzero:
1356 case Qtopdir:
1357 case Qunitdir:
1358 case Qdevlinkdir:
1359 return devdirread(c, db, n, 0, 0, aoegen);
1360 case Qtopctl:
1361 return topctlread(c, db, n, off);
1362 case Qtoplog:
1363 return eventlogread(db, n);
1364 case Qctl:
1365 case Qdata:
1366 case Qconfig:
1367 case Qident:
1368 return unitread(c, db, n, off);
1369 case Qdevlink:
1370 return devlinkread(c, db, n, off);
1371 }
1372 }
1373
1374 static long
1375 configwrite(Aoedev *d, void *db, long len)
1376 {
1377 char *s;
1378 Aoeqc *ch;
1379 Frame *f;
1380 Srb *srb;
1381
1382 if(!UP(d))
1383 error(Enotup);
1384 if(len > sizeof d->config)
1385 error(Etoobig);
1386 srb = srballoc(len);
1387 s = malloc(len);
1388 memmove(s, db, len);
1389 if(waserror()){
1390 srbfree(srb);
1391 free(s);
1392 nexterror();
1393 }
1394 for (;;) {
1395 QLOCK(d);
1396 if(waserror()){
1397 QUNLOCK(d);
1398 nexterror();
1399 }
1400 f = freeframe(d);
1401 if(f != nil)
1402 break;
1403 poperror();
1404 QUNLOCK(d);
1405 if(waserror())
1406 nexterror();
1407 tsleep(&up->sleep, return0, 0, 100);
1408 poperror();
1409 }
1410 f->nhdr = Szaoeqc;
1411 memset(f->hdr, 0, f->nhdr);
1412 ch = (Aoeqc*)f->hdr;
1413 if(hset(d, f, (Aoehdr*)ch, ACconfig) == -1)
1414 return 0;
1415 f->srb = srb;
1416 f->dp = s;
1417 ch->verccmd = AQCfset;
1418 hnputs(ch->cslen, len);
1419 d->nout++;
1420 srb->nout++;
1421 f->dl->npkt++;
1422 f->dlen = len;
1423 /*
1424 * these refer to qlock & waserror in the above for loop.
1425 * there's still the first waserror outstanding.
1426 */
1427 poperror();
1428 QUNLOCK(d);
1429
1430 devtab[f->nl->dc->type]->bwrite(f->nl->dc, allocfb(f), 0);
1431 SLEEP(srb, srbready, srb);
1432 if(srb->error)
1433 error(srb->error);
1434
1435 QLOCK(d);
1436 if(waserror()){
1437 QUNLOCK(d);
1438 nexterror();
1439 }
1440 memmove(d->config, s, len);
1441 d->nconfig = len;
1442 poperror();
1443 QUNLOCK(d);
1444
1445 poperror(); /* pop first waserror */
1446
1447 srbfree(srb);
1448 memmove(db, s, len);
1449 free(s);
1450 return len;
1451 }
1452
1453 static int
1454 getmtu(Chan *m)
1455 {
1456 int n, mtu;
1457 char buf[36];
1458
1459 mtu = 1514;
1460 if(m == nil || waserror())
1461 return mtu;
1462 n = devtab[m->type]->read(m, buf, sizeof buf - 1, 0);
1463 poperror();
1464 if(n > 12){
1465 buf[n] = 0;
1466 mtu = strtoul(buf + 12, 0, 0);
1467 }
1468 return mtu;
1469 }
1470
1471 static int
1472 devmaxdata(Aoedev *d)
1473 {
1474 int i, m, mtu;
1475 Devlink *l;
1476 Netlink *n;
1477
1478 mtu = 100000;
1479 for(i = 0; i < d->ndl; i++){
1480 l = d->dl + i;
1481 n = l->nl;
1482 if((l->flag & Dup) == 0 || (n->flag & Dup) == 0)
1483 continue;
1484 m = getmtu(n->mtu);
1485 if(m > l->datamtu)
1486 m = l->datamtu;
1487 if(m < mtu)
1488 mtu = m;
1489 }
1490 if(mtu == 100000)
1491 mtu = 1514;
1492 mtu -= Szaoeata;
1493 mtu -= mtu % Aoesectsz;
1494 return mtu;
1495 }
1496
1497 static int
1498 toggle(char *s, int init)
1499 {
1500 if(s == nil)
1501 return init ^ 1;
1502 return strcmp(s, "on") == 0;
1503 }
1504
1505 static void ataident(Aoedev*);
1506
1507 static long
1508 unitctlwrite(Aoedev *d, void *db, long n)
1509 {
1510 uint maxbcnt, m;
1511 uvlong bsize;
1512 enum {
1513 Failio,
1514 Ident,
1515 Jumbo,
1516 Maxbno,
1517 Mtu,
1518 Setsize,
1519 };
1520 Cmdbuf *cb;
1521 Cmdtab *ct;
1522 static Cmdtab cmds[] = {
1523 {Failio, "failio", 1 },
1524 {Ident, "identify", 1 },
1525 {Jumbo, "jumbo", 0 },
1526 {Maxbno, "maxbno", 0 },
1527 {Mtu, "mtu", 0 },
1528 {Setsize, "setsize", 0 },
1529 };
1530
1531 cb = parsecmd(db, n);
1532 QLOCK(d);
1533 if(waserror()){
1534 QUNLOCK(d);
1535 free(cb);
1536 nexterror();
1537 }
1538 ct = lookupcmd(cb, cmds, nelem(cmds));
1539 switch(ct->index){
1540 case Failio:
1541 downdev(d, "i/o failure");
1542 break;
1543 case Ident:
1544 ataident(d);
1545 break;
1546 case Jumbo:
1547 m = 0;
1548 if(d->flag & Djumbo)
1549 m = 1;
1550 toggle(cb->f[1], m);
1551 if(m)
1552 d->flag |= Djumbo;
1553 else
1554 d->flag &= ~Djumbo;
1555 break;
1556 case Maxbno:
1557 case Mtu:
1558 maxbcnt = devmaxdata(d);
1559 if(cb->nf > 2)
1560 error(Ecmdargs);
1561 if(cb->nf == 2){
1562 m = strtoul(cb->f[1], 0, 0);
1563 if(ct->index == Maxbno)
1564 m *= Aoesectsz;
1565 else{
1566 m -= Szaoeata;
1567 m &= ~(Aoesectsz-1);
1568 }
1569 if(m == 0 || m > maxbcnt)
1570 cmderror(cb, "invalid mtu");
1571 maxbcnt = m;
1572 d->maxmtu = m;
1573 } else
1574 d->maxmtu = Maxmtu;
1575 d->maxbcnt = maxbcnt;
1576 break;
1577 case Setsize:
1578 bsize = d->realbsize;
1579 if(cb->nf > 2)
1580 error(Ecmdargs);
1581 if(cb->nf == 2){
1582 bsize = strtoull(cb->f[1], 0, 0);
1583 if(bsize % Aoesectsz)
1584 cmderror(cb, "disk size must be sector aligned");
1585 }
1586 d->bsize = bsize;
1587 break;
1588 default:
1589 cmderror(cb, "unknown aoe control message");
1590 }
1591 poperror();
1592 QUNLOCK(d);
1593 free(cb);
1594 return n;
1595 }
1596
1597 static long
1598 unitwrite(Chan *c, void *db, long n, vlong off)
1599 {
1600 long rv;
1601 char *buf;
1602 Aoedev *d;
1603
1604 d = unit2dev(UNIT(c->qid));
1605 switch(TYPE(c->qid)){
1606 default:
1607 error(Ebadarg);
1608 case Qctl:
1609 return unitctlwrite(d, db, n);
1610 case Qident:
1611 error(Eperm);
1612 case Qdata:
1613 return rw(d, Write, db, n, off);
1614 case Qconfig:
1615 if(off + n > sizeof d->config)
1616 error(Etoobig);
1617 buf = malloc(sizeof d->config);
1618 if(waserror()){
1619 free(buf);
1620 nexterror();
1621 }
1622 memmove(buf, d->config, d->nconfig);
1623 memmove(buf + off, db, n);
1624 rv = configwrite(d, buf, n + off);
1625 poperror();
1626 free(buf);
1627 return rv;
1628 }
1629 }
1630
1631 static Netlink*
1632 addnet(char *path, Chan *cc, Chan *dc, Chan *mtu, uchar *ea)
1633 {
1634 Netlink *nl, *e;
1635
1636 LOCK(&netlinks);
1637 if(waserror()){
1638 UNLOCK(&netlinks);
1639 nexterror();
1640 }
1641 nl = netlinks.nl;
1642 e = nl + nelem(netlinks.nl);
1643 for(; nl < e && nl->cc; nl++)
1644 continue;
1645 if(nl == e)
1646 error("out of netlink structures");
1647 nl->cc = cc;
1648 nl->dc = dc;
1649 nl->mtu = mtu;
1650 strncpy(nl->path, path, sizeof nl->path);
1651 memmove(nl->ea, ea, sizeof nl->ea);
1652 poperror();
1653 nl->flag |= Dup;
1654 UNLOCK(&netlinks);
1655 return nl;
1656 }
1657
1658 static int
1659 newunit(void)
1660 {
1661 int x;
1662
1663 LOCK(&units);
1664 if(units.ref == Maxunits)
1665 x = -1;
1666 else
1667 x = units.ref++;
1668 UNLOCK(&units);
1669 return x;
1670 }
1671
1672 static int
1673 dropunit(void)
1674 {
1675 int x;
1676
1677 LOCK(&units);
1678 x = --units.ref;
1679 UNLOCK(&units);
1680 return x;
1681 }
1682
1683 /*
1684 * always allocate max frames. maxout may change.
1685 */
1686 static Aoedev*
1687 newdev(long major, long minor, int n)
1688 {
1689 Aoedev *d;
1690 Frame *f, *e;
1691
1692 d = malloc(sizeof *d);
1693 f = malloc(sizeof *f*Maxframes);
1694 if(!d || !f) {
1695 free(d);
1696 free(f);
1697 error("aoe device allocation failure");
1698 }
1699 d->nframes = n;
1700 d->frames = f;
1701 for (e = f + Maxframes; f < e; f++)
1702 f->tag = Tfree;
1703 d->maxout = n;
1704 d->major = major;
1705 d->minor = minor;
1706 d->maxbcnt = Dbcnt;
1707 d->flag = Djumbo;
1708 d->maxmtu = Maxmtu;
1709 d->unit = newunit(); /* bzzt. inaccurate if units removed */
1710 if(d->unit == -1){
1711 free(d);
1712 free(d->frames);
1713 error("too many units");
1714 }
1715 d->dl = d->dltab;
1716 return d;
1717 }
1718
1719 static Aoedev*
1720 mm2dev(int major, int minor)
1721 {
1722 Aoedev *d;
1723
1724 RLOCK(&devs);
1725 for(d = devs.d; d; d = d->next)
1726 if(d->major == major && d->minor == minor){
1727 RUNLOCK(&devs);
1728 return d;
1729 }
1730 RUNLOCK(&devs);
1731 eventlog("mm2dev: %d.%d not found\n", major, minor);
1732 return nil;
1733 }
1734
1735 /* Find the device in our list. If not known, add it */
1736 static Aoedev*
1737 getdev(long major, long minor, int n)
1738 {
1739 Aoedev *d;
1740
1741 if(major == 0xffff || minor == 0xff)
1742 return 0;
1743 WLOCK(&devs);
1744 if(waserror()){
1745 WUNLOCK(&devs);
1746 nexterror();
1747 }
1748 for(d = devs.d; d; d = d->next)
1749 if(d->major == major && d->minor == minor)
1750 break;
1751 if(d == nil) {
1752 d = newdev(major, minor, n);
1753 d->next = devs.d;
1754 devs.d = d;
1755 }
1756 poperror();
1757 WUNLOCK(&devs);
1758 return d;
1759 }
1760
1761 static ushort
1762 gbit16(void *a)
1763 {
1764 uchar *i;
1765
1766 i = a;
1767 return i[1] << 8 | i[0];
1768 }
1769
1770 static ulong
1771 gbit32(void *a)
1772 {
1773 ulong j;
1774 uchar *i;
1775
1776 i = a;
1777 j = i[3] << 24;
1778 j |= i[2] << 16;
1779 j |= i[1] << 8;
1780 j |= i[0];
1781 return j;
1782 }
1783
1784 static uvlong
1785 gbit64(void *a)
1786 {
1787 uchar *i;
1788
1789 i = a;
1790 return (uvlong)gbit32(i+4) << 32 | gbit32(a);
1791 }
1792
1793 static void
1794 ataident(Aoedev *d)
1795 {
1796 Aoeata *a;
1797 Block *b;
1798 Frame *f;
1799
1800 f = freeframe(d);
1801 if(f == nil)
1802 return;
1803 f->nhdr = Szaoeata;
1804 memset(f->hdr, 0, f->nhdr);
1805 a = (Aoeata*)f->hdr;
1806 if(hset(d, f, (Aoehdr*)a, ACata) == -1)
1807 return;
1808 f->srb = srbkalloc(0, 0);
1809 a->cmdstat = Cid; /* ata 6, page 110 */
1810 a->scnt = 1;
1811 a->lba[3] = 0xa0;
1812 d->nout++;
1813 f->dl->npkt++;
1814 f->bcnt = 512;
1815 f->dlen = 0;
1816 b = allocfb(f);
1817 devtab[f->nl->dc->type]->bwrite(f->nl->dc, b, 0);
1818 }
1819
1820 static int
1821 newdlea(Devlink *l, uchar *ea)
1822 {
1823 int i;
1824 uchar *t;
1825
1826 for(i = 0; i < Nea; i++){
1827 t = l->eatab[i];
1828 if(i == l->nea){
1829 memmove(t, ea, Eaddrlen);
1830 return l->nea++;
1831 }
1832 if(memcmp(t, ea, Eaddrlen) == 0)
1833 return i;
1834 }
1835 return -1;
1836 }
1837
1838 static Devlink*
1839 newdevlink(Aoedev *d, Netlink *n, Aoeqc *c)
1840 {
1841 int i;
1842 Devlink *l;
1843
1844 for(i = 0; i < Ndevlink; i++){
1845 l = d->dl + i;
1846 if(i == d->ndl){
1847 d->ndl++;
1848 newdlea(l, c->src);
1849 l->datamtu = c->scnt*Aoesectsz;
1850 l->nl = n;
1851 l->flag |= Dup;
1852 l->mintimer = Rtmin;
1853 l->rttavg = Rtmax;
1854 return l;
1855 }
1856 if(l->nl == n){
1857 newdlea(l, c->src);
1858 l->datamtu = c->scnt*Aoesectsz;
1859 l->flag |= Dup;
1860 return l;
1861 }
1862 }
1863 eventlog("%æ: out of links: %s:%E to %E\n", d, n->path, n->ea, c->src);
1864 return 0;
1865 }
1866
1867 static void
1868 errrsp(Block *b, char *s)
1869 {
1870 int n;
1871 Aoedev *d;
1872 Aoehdr *h;
1873 Frame *f;
1874
1875 h = (Aoehdr*)b->rp;
1876 n = nhgetl(h->tag);
1877 if(n == Tmgmt || n == Tfree)
1878 return;
1879 d = mm2dev(nhgets(h->major), h->minor);
1880 if(d == 0)
1881 return;
1882 if(f = getframe(d, n))
1883 frameerror(d, f, s);
1884 }
1885
1886 static void
1887 qcfgrsp(Block *b, Netlink *nl)
1888 {
1889 int major, cmd, cslen, blen;
1890 unsigned n;
1891 Aoedev *d;
1892 Aoeqc *ch;
1893 Devlink *l;
1894 Frame *f;
1895
1896 ch = (Aoeqc*)b->rp;
1897 major = nhgets(ch->major);
1898 n = nhgetl(ch->tag);
1899 if(n != Tmgmt){
1900 d = mm2dev(major, ch->minor);
1901 if(d == nil)
1902 return;
1903 QLOCK(d);
1904 f = getframe(d, n);
1905 if(f == nil){
1906 QUNLOCK(d);
1907 eventlog("%æ: unknown response tag %ux\n", d, n);
1908 return;
1909 }
1910 cslen = nhgets(ch->cslen);
1911 blen = BLEN(b) - Szaoeqc;
1912 if(cslen < blen)
1913 eventlog("%æ: cfgrsp: tag %.8ux oversized %d %d\n",
1914 d, n, cslen, blen);
1915 if(cslen > blen){
1916 eventlog("%æ: cfgrsp: tag %.8ux runt %d %d\n",
1917 d, n, cslen, blen);
1918 cslen = blen;
1919 }
1920 memmove(f->dp, ch + 1, cslen);
1921 f->srb->nout--;
1922 WAKEUP(f->srb);
1923 d->nout--;
1924 f->srb = nil;
1925 f->tag = Tfree;
1926 QUNLOCK(d);
1927 return;
1928 }
1929
1930 cmd = ch->verccmd & 0xf;
1931 if(cmd != 0){
1932 eventlog("aoe%d.%d: cfgrsp: bad command %d\n", major, ch->minor, cmd);
1933 return;
1934 }
1935 n = nhgets(ch->bufcnt);
1936 if(n > Maxframes)
1937 n = Maxframes;
1938
1939 if(waserror()){
1940 eventlog("getdev: %d.%d ignored: %s\n", major, ch->minor, up->errstr);
1941 return;
1942 }
1943 d = getdev(major, ch->minor, n);
1944 poperror();
1945 if(d == 0)
1946 return;
1947
1948 QLOCK(d);
1949 *up->errstr = 0;
1950 if(waserror()){
1951 QUNLOCK(d);
1952 eventlog("%æ: %s\n", d, up->errstr);
1953 nexterror();
1954 }
1955
1956 l = newdevlink(d, nl, ch); /* add this interface. */
1957
1958 d->fwver = nhgets(ch->fwver);
1959 n = nhgets(ch->cslen);
1960 if(n > sizeof d->config)
1961 n = sizeof d->config;
1962 d->nconfig = n;
1963 memmove(d->config, ch + 1, n);
1964
1965 /* manually set mtu may be reset lower if conditions warrant */
1966 if(l){
1967 n = devmaxdata(d);
1968 if(!(d->flag & Djumbo))
1969 n = Dbcnt;
1970 if(n > d->maxmtu)
1971 n = d->maxmtu;
1972 if(n != d->maxbcnt){
1973 eventlog("%æ: setting %d byte mtu on %s:%E\n",
1974 d, n, nl->path, nl->ea);
1975 d->maxbcnt = n;
1976 }
1977 }
1978 if(d->nopen == 0)
1979 ataident(d);
1980 poperror();
1981 QUNLOCK(d);
1982 }
1983
1984 static void
1985 idmove(char *p, ushort *a, unsigned n)
1986 {
1987 int i;
1988 char *op, *e;
1989
1990 op = p;
1991 for(i = 0; i < n / 2; i++){
1992 *p++ = a[i] >> 8;
1993 *p++ = a[i];
1994 }
1995 *p = 0;
1996 while(p > op && *--p == ' ')
1997 *p = 0;
1998 e = p;
1999 p = op;
2000 while(*p == ' ')
2001 p++;
2002 memmove(op, p, n - (e - p));
2003 }
2004
2005 static vlong
2006 aoeidentify(Aoedev *d, ushort *id)
2007 {
2008 int i;
2009 vlong s;
2010
2011 d->flag &= ~(Dllba|Dpower|Dsmart|Dnop|Dup);
2012
2013 i = gbit16(id+83) | gbit16(id+86);
2014 if(i & (1<<10)){
2015 d->flag |= Dllba;
2016 s = gbit64(id+100);
2017 }else
2018 s = gbit32(id+60);
2019
2020 i = gbit16(id+83);
2021 if((i>>14) == 1) {
2022 if(i & (1<<3))
2023 d->flag |= Dpower;
2024 i = gbit16(id+82);
2025 if(i & 1)
2026 d->flag |= Dsmart;
2027 if(i & (1<<14))
2028 d->flag |= Dnop;
2029 }
2030 // eventlog("%æ up\n", d);
2031 d->flag |= Dup;
2032 memmove(d->ident, id, sizeof d->ident);
2033 return s;
2034 }
2035
2036 static void
2037 newvers(Aoedev *d)
2038 {
2039 LOCK(&drivevers);
2040 d->vers = drivevers.ref++;
2041 UNLOCK(&drivevers);
2042 }
2043
2044 static int
2045 identify(Aoedev *d, ushort *id)
2046 {
2047 vlong osectors, s;
2048 uchar oserial[21];
2049
2050 s = aoeidentify(d, id);
2051 if(s == -1)
2052 return -1;
2053 osectors = d->realbsize;
2054 memmove(oserial, d->serial, sizeof d->serial);
2055
2056 idmove(d->serial, id+10, 20);
2057 idmove(d->firmware, id+23, 8);
2058 idmove(d->model, id+27, 40);
2059
2060 s *= Aoesectsz;
2061 if(osectors != s || memcmp(oserial, d->serial, sizeof oserial)){
2062 d->bsize = s;
2063 d->realbsize = s;
2064 // d->mediachange = 1;
2065 newvers(d);
2066 }
2067 return 0;
2068 }
2069
2070 static void
2071 atarsp(Block *b)
2072 {
2073 unsigned n;
2074 short major;
2075 Aoeata *ahin, *ahout;
2076 Aoedev *d;
2077 Frame *f;
2078 Srb *srb;
2079
2080 ahin = (Aoeata*)b->rp;
2081 major = nhgets(ahin->major);
2082 d = mm2dev(major, ahin->minor);
2083 if(d == nil)
2084 return;
2085 QLOCK(d);
2086 if(waserror()){
2087 QUNLOCK(d);
2088 nexterror();
2089 }
2090 n = nhgetl(ahin->tag);
2091 f = getframe(d, n);
2092 if(f == nil){
2093 dprint("%æ: unexpected response; tag %ux\n", d, n);
2094 goto bail;
2095 }
2096 rtupdate(f->dl, tsince(f->tag));
2097 ahout = (Aoeata*)f->hdr;
2098 srb = f->srb;
2099
2100 if(ahin->cmdstat & 0xa9){
2101 eventlog("%æ: ata error cmd %.2ux stat %.2ux\n",
2102 d, ahout->cmdstat, ahin->cmdstat);
2103 if(srb)
2104 srb->error = Eio;
2105 } else {
2106 n = ahout->scnt * Aoesectsz;
2107 switch(ahout->cmdstat){
2108 case Crd:
2109 case Crdext:
2110 if(BLEN(b) - Szaoeata < n){
2111 eventlog("%æ: runt read blen %ld expect %d\n",
2112 d, BLEN(b), n);
2113 goto bail;
2114 }
2115 memmove(f->dp, b->rp + Szaoeata, n);
2116 case Cwr:
2117 case Cwrext:
2118 if(n > Dbcnt)
2119 f->nl->lostjumbo = 0;
2120 if(f->bcnt -= n){
2121 f->lba += n / Aoesectsz;
2122 f->dp = (uchar*)f->dp + n;
2123 resend(d, f);
2124 goto bail;
2125 }
2126 break;
2127 case Cid:
2128 if(BLEN(b) - Szaoeata < 512){
2129 eventlog("%æ: runt identify blen %ld expect %d\n",
2130 d, BLEN(b), n);
2131 goto bail;
2132 }
2133 identify(d, (ushort*)(b->rp + Szaoeata));
2134 break;
2135 default:
2136 eventlog("%æ: unknown ata command %.2ux \n",
2137 d, ahout->cmdstat);
2138 }
2139 }
2140
2141 if(srb && --srb->nout == 0 && srb->len == 0)
2142 WAKEUP(srb);
2143 f->srb = nil;
2144 f->tag = Tfree;
2145 d->nout--;
2146
2147 work(d);
2148 bail:
2149 poperror();
2150 QUNLOCK(d);
2151 }
2152
2153 static void
2154 netrdaoeproc(void *v)
2155 {
2156 int idx;
2157 char name[Maxpath+1], *s;
2158 Aoehdr *h;
2159 Block *b;
2160 Netlink *nl;
2161
2162 nl = (Netlink*)v;
2163 idx = nl - netlinks.nl;
2164 netlinks.reader[idx] = 1;
2165 kstrcpy(name, nl->path, Maxpath);
2166
2167 if(waserror()){
2168 eventlog("netrdaoe@%s: exiting: %s\n", name, up->errstr);
2169 netlinks.reader[idx] = 0;
2170 wakeup(netlinks.rendez + idx);
2171 pexit(up->errstr, 1);
2172 }
2173 if(autodiscover)
2174 discover(0xffff, 0xff);
2175 for (;;) {
2176 if(!(nl->flag & Dup))
2177 error("netlink is down");
2178 if(nl->dc == nil)
2179 panic("netrdaoe: nl->dc == nil");
2180 b = devtab[nl->dc->type]->bread(nl->dc, 1<<16, 0);
2181 if(b == nil)
2182 error("network read");
2183 h = (Aoehdr*)b->rp;
2184 if(h->verflag & AFrsp)
2185 if(s = aoeerror(h)){
2186 eventlog("%s: %s\n", nl->path, s);
2187 errrsp(b, s);
2188 }else if(h->cmd == ACata)
2189 atarsp(b);
2190 else if(h->cmd == ACconfig)
2191 qcfgrsp(b, nl);
2192 else if((h->cmd & 0xf0) == 0){
2193 eventlog("%s: unknown cmd %d\n",
2194 nl->path, h->cmd);
2195 errrsp(b, "unknown command");
2196 }
2197 freeb(b);
2198 }
2199 }
2200
2201 static void
2202 getaddr(char *path, uchar *ea)
2203 {
2204 int n;
2205 char buf[2*Eaddrlen+1];
2206 Chan *c;
2207
2208 uprint("%s/addr", path);
2209 c = namec(up->genbuf, Aopen, OREAD, 0);
2210 if(waserror()) {
2211 cclose(c);
2212 nexterror();
2213 }
2214 if(c == nil)
2215 panic("æ: getaddr: c == nil");
2216 n = devtab[c->type]->read(c, buf, sizeof buf-1, 0);
2217 poperror();
2218 cclose(c);
2219 buf[n] = 0;
2220 if(parseether(ea, buf) < 0)
2221 error("parseether failure");
2222 }
2223
2224 static void
2225 netbind(char *path)
2226 {
2227 char addr[Maxpath];
2228 uchar ea[2*Eaddrlen+1];
2229 Chan *dc, *cc, *mtu;
2230 Netlink *nl;
2231
2232 snprint(addr, sizeof addr, "%s!0x%x", path, Aoetype);
2233 dc = chandial(addr, nil, nil, &cc);
2234 snprint(addr, sizeof addr, "%s/mtu", path);
2235 if(waserror())
2236 mtu = nil;
2237 else {
2238 mtu = namec(addr, Aopen, OREAD, 0);
2239 poperror();
2240 }
2241
2242 if(waserror()){
2243 cclose(dc);
2244 cclose(cc);
2245 if(mtu)
2246 cclose(mtu);
2247 nexterror();
2248 }
2249 if(dc == nil || cc == nil)
2250 error(Enonexist);
2251 getaddr(path, ea);
2252 nl = addnet(path, cc, dc, mtu, ea);
2253 snprint(addr, sizeof addr, "netrdaoe@%s", path);
2254 kproc(addr, netrdaoeproc, nl);
2255 poperror();
2256 }
2257
2258 static int
2259 unbound(void *v)
2260 {
2261 return *(int*)v != 0;
2262 }
2263
2264 static void
2265 netunbind(char *path)
2266 {
2267 int i, idx;
2268 Aoedev *d, *p, *next;
2269 Chan *dc, *cc;
2270 Devlink *l;
2271 Frame *f;
2272 Netlink *n, *e;
2273
2274 n = netlinks.nl;
2275 e = n + nelem(netlinks.nl);
2276
2277 LOCK(&netlinks);
2278 for(; n < e; n++)
2279 if(n->dc && strcmp(n->path, path) == 0)
2280 break;
2281 UNLOCK(&netlinks);
2282 if(n == e)
2283 error("device not bound");
2284
2285 /*
2286 * hunt down devices using this interface; disable
2287 * this also terminates the reader.
2288 */
2289 idx = n - netlinks.nl;
2290 WLOCK(&devs);
2291 for(d = devs.d; d; d = d->next){
2292 QLOCK(d);
2293 for(i = 0; i < d->ndl; i++){
2294 l = d->dl + i;
2295 if(l->nl == n)
2296 l->flag &= ~Dup;
2297 }
2298 QUNLOCK(d);
2299 }
2300 n->flag &= ~Dup;
2301 WUNLOCK(&devs);
2302
2303 /* confirm reader is down. */
2304 while(waserror())
2305 ;
2306 sleep(netlinks.rendez + idx, unbound, netlinks.reader + idx);
2307 poperror();
2308
2309 /* reschedule packets. */
2310 WLOCK(&devs);
2311 for(d = devs.d; d; d = d->next){
2312 QLOCK(d);
2313 for(i = 0; i < d->nframes; i++){
2314 f = d->frames + i;
2315 if(f->tag != Tfree && f->nl == n)
2316 resend(d, f);
2317 }
2318 QUNLOCK(d);
2319 }
2320 WUNLOCK(&devs);
2321
2322 /* squeeze devlink pool. (we assert nobody is using them now) */
2323 WLOCK(&devs);
2324 for(d = devs.d; d; d = d->next){
2325 QLOCK(d);
2326 for(i = 0; i < d->ndl; i++){
2327 l = d->dl + i;
2328 if(l->nl == n)
2329 memmove(l, l + 1, sizeof *l * (--d->ndl - i));
2330 }
2331 QUNLOCK(d);
2332 }
2333 WUNLOCK(&devs);
2334
2335 /* close device link. */
2336 LOCK(&netlinks);
2337 dc = n->dc;
2338 cc = n->cc;
2339 if(n->mtu)
2340 cclose(n->mtu);
2341 memset(n, 0, sizeof *n);
2342 UNLOCK(&netlinks);
2343
2344 cclose(dc);
2345 cclose(cc);
2346
2347 /* squeeze orphan devices */
2348 WLOCK(&devs);
2349 for(p = d = devs.d; d; d = next){
2350 next = d->next;
2351 if(d->ndl > 0){
2352 p = d;
2353 continue;
2354 }
2355 QLOCK(d);
2356 downdev(d, "orphan");
2357 QUNLOCK(d);
2358 if(p != devs.d)
2359 p->next = next;
2360 else{
2361 devs.d = next;
2362 p = devs.d;
2363 }
2364 free(d->frames);
2365 free(d);
2366 dropunit();
2367 }
2368 WUNLOCK(&devs);
2369 }
2370
2371 static void
2372 strtoss(char *f, ushort *shelf, ushort *slot)
2373 {
2374 ulong sh;
2375 char *s;
2376
2377 *shelf = 0xffff;
2378 *slot = 0xff;
2379 if(!f)
2380 return;
2381 *shelf = sh = strtol(f, &s, 0);
2382 if(s == f || sh > 0xffff)
2383 error("bad shelf");
2384 f = s;
2385 if(*f++ == '.'){
2386 *slot = strtol(f, &s, 0);
2387 if(s == f || *slot > 0xff)
2388 error("bad shelf");
2389 }else
2390 *slot = 0xff;
2391 }
2392
2393 static void
2394 discoverstr(char *f)
2395 {
2396 ushort shelf, slot;
2397
2398 strtoss(f, &shelf, &slot);
2399 discover(shelf, slot);
2400 }
2401
2402 static void
2403 removedev(Aoedev *d)
2404 {
2405 int i;
2406 Aoedev *p;
2407
2408 WLOCK(&devs);
2409 p = 0;
2410 if(d != devs.d)
2411 for(p = devs.d; p; p = p->next)
2412 if(p->next == d)
2413 break;
2414 QLOCK(d);
2415 d->flag &= ~Dup;
2416 newvers(d);
2417 d->ndl = 0;
2418 QUNLOCK(d);
2419 for(i = 0; i < d->nframes; i++)
2420 frameerror(d, d->frames+i, Enotup);
2421
2422 if(p)
2423 p->next = d->next;
2424 else
2425 devs.d = d->next;
2426 free(d->frames);
2427 free(d);
2428 dropunit();
2429 WUNLOCK(&devs);
2430 }
2431
2432
2433 static void
2434 aoeremove(Chan *c)
2435 {
2436 switch(TYPE(c->qid)){
2437 default:
2438 case Qzero:
2439 case Qtopdir:
2440 case Qtoplog:
2441 case Qtopctl:
2442 case Qctl:
2443 case Qdata:
2444 case Qconfig:
2445 case Qident:
2446 error(Eperm);
2447 case Qunitdir:
2448 removedev(unit2dev(UNIT(c->qid)));
2449 break;
2450 }
2451 }
2452
2453 static void
2454 removestr(char *f)
2455 {
2456 ushort shelf, slot;
2457 Aoedev *d;
2458
2459 strtoss(f, &shelf, &slot);
2460 WLOCK(&devs);
2461 for(d = devs.d; d; d = d->next)
2462 if(shelf == d->major && slot == d->minor){
2463 WUNLOCK(&devs); /* BOTCH */
2464 removedev(d);
2465 return;
2466 }
2467 WUNLOCK(&devs);
2468 error("device not bound");
2469 }
2470
2471 static long
2472 topctlwrite(void *db, long n)
2473 {
2474 enum {
2475 Autodiscover,
2476 Bind,
2477 Debug,
2478 Discover,
2479 Closewait,
2480 Rediscover,
2481 Remove,
2482 Unbind,
2483 };
2484 char *f;
2485 Cmdbuf *cb;
2486 Cmdtab *ct;
2487 static Cmdtab cmds[] = {
2488 { Autodiscover, "autodiscover", 0 },
2489 { Bind, "bind", 2 },
2490 { Debug, "debug", 0 },
2491 { Discover, "discover", 0 },
2492 { Rediscover, "rediscover", 0 },
2493 { Remove, "remove", 2 },
2494 { Unbind, "unbind", 2 },
2495 };
2496
2497 cb = parsecmd(db, n);
2498 if(waserror()){
2499 free(cb);
2500 nexterror();
2501 }
2502 ct = lookupcmd(cb, cmds, nelem(cmds));
2503 f = cb->f[1];
2504 switch(ct->index){
2505 case Autodiscover:
2506 autodiscover = toggle(f, autodiscover);
2507 break;
2508 case Bind:
2509 netbind(f);
2510 break;
2511 case Debug:
2512 debug = toggle(f, debug);
2513 break;
2514 case Discover:
2515 discoverstr(f);
2516 break;
2517 case Rediscover:
2518 rediscover = toggle(f, rediscover);
2519 break;
2520 case Remove:
2521 removestr(f); /* depricated */
2522 break;
2523 case Unbind:
2524 netunbind(f);
2525 break;
2526 default:
2527 cmderror(cb, "unknown aoe control message");
2528 }
2529 poperror();
2530 free(cb);
2531 return n;
2532 }
2533
2534 static long
2535 aoewrite(Chan *c, void *db, long n, vlong off)
2536 {
2537 switch(TYPE(c->qid)){
2538 default:
2539 case Qzero:
2540 case Qtopdir:
2541 case Qunitdir:
2542 case Qtoplog:
2543 error(Eperm);
2544 case Qtopctl:
2545 return topctlwrite(db, n);
2546 case Qctl:
2547 case Qdata:
2548 case Qconfig:
2549 case Qident:
2550 return unitwrite(c, db, n, off);
2551 }
2552 }
2553
2554 Dev aoedevtab = {
2555 L'æ',
2556 "aoe",
2557
2558 devreset,
2559 devinit,
2560 devshutdown,
2561 aoeattach,
2562 aoewalk,
2563 aoestat,
2564 aoeopen,
2565 devcreate,
2566 aoeclose,
2567 aoeread,
2568 devbread,
2569 aoewrite,
2570 devbwrite,
2571 aoeremove,
2572 devwstat,
2573 devpower,
2574 devconfig,
2575 };