ref: 9b69f546334e94ec191d35b15dcaace98fbdcb1b
dir: /sys/src/9/port/segment.c/
#include "u.h"
#include "../port/lib.h"
#include "mem.h"
#include "dat.h"
#include "fns.h"
#include "../port/error.h"
/*
* Attachable segment types
*/
static Physseg physseg[10] = {
{ SG_SHARED, "shared", 0, SEGMAXSIZE },
{ SG_BSS, "memory", 0, SEGMAXSIZE },
{ 0, 0, 0, 0 },
};
static Lock physseglock;
#define IHASHSIZE 64
#define ihash(s) imagealloc.hash[s%IHASHSIZE]
static struct Imagealloc
{
Lock;
QLock ireclaim; /* mutex on reclaiming idle images */
ulong pgidle; /* pages in idle list (reclaimable) */
ulong nidle;
Image *idle;
Image *hash[IHASHSIZE];
}imagealloc;
Segment* (*_globalsegattach)(char*);
Image*
newimage(ulong pages)
{
ulong pghsize;
Image *i;
/* make power of two */
pghsize = pages-1;
pghsize |= pghsize >> 16;
pghsize |= pghsize >> 8;
pghsize |= pghsize >> 4;
pghsize |= pghsize >> 2;
pghsize |= pghsize >> 1;
pghsize++;
if(pghsize > 1024)
pghsize >>= 4;
i = malloc(sizeof(Image) + pghsize * sizeof(Page*));
if(i == nil)
return nil;
i->ref = 1;
i->pghsize = pghsize;
return i;
}
void
initseg(void)
{
}
Segment *
newseg(int type, uintptr base, ulong size)
{
Segment *s;
int mapsize;
if(size > (SEGMAPSIZE*PTEPERTAB))
error(Enovmem);
s = malloc(sizeof(Segment));
if(s == nil)
error(Enomem);
s->ref = 1;
s->type = type;
s->size = size;
s->base = base;
s->top = base+(size*BY2PG);
s->used = s->swapped = 0;
s->sema.prev = &s->sema;
s->sema.next = &s->sema;
if((type & SG_TYPE) == SG_PHYSICAL){
s->map = nil;
s->mapsize = 0;
return s;
}
mapsize = ROUND(size, PTEPERTAB)/PTEPERTAB;
if(mapsize > nelem(s->ssegmap)){
s->map = malloc(mapsize*sizeof(Pte*));
if(s->map == nil){
free(s);
error(Enomem);
}
s->mapsize = mapsize;
}
else{
s->map = s->ssegmap;
s->mapsize = nelem(s->ssegmap);
}
return s;
}
void
putseg(Segment *s)
{
Image *i;
if(s == nil)
return;
i = s->image;
if(i != nil) {
/*
* We must hold image lock here during
* decref() to prevent someone from taking
* a reference to our segment from the cache.
* Just letting decref(s) drop to zero *before*
* and then checking s->ref again under image
* lock is not sufficient, as someone can grab
* a reference and then call putseg() again;
* freeing segment. By the time we hold image lock,
* the segment would be freed from under us.
*/
lock(i);
if(decref(s) != 0){
unlock(i);
return;
}
if(i->s == s)
i->s = nil;
putimage(i);
} else if(decref(s) != 0)
return;
assert(s->sema.prev == &s->sema);
assert(s->sema.next == &s->sema);
if(s->mapsize > 0){
Pte **pte, **emap;
Page *fh, *ft;
ulong np;
np = 0;
fh = ft = nil;
emap = &s->map[s->mapsize];
for(pte = s->map; pte < emap; pte++){
Page **pg, **pe, *entry;
if(*pte == nil)
continue;
pg = (*pte)->first;
pe = (*pte)->last;
while(pg <= pe){
entry = *pg++;
if(entry == nil)
continue;
if(onswap(entry)){
putswap(entry);
continue;
}
entry = deadpage(entry);
if(entry == nil)
continue;
if(fh != nil)
ft->next = entry;
else
fh = entry;
ft = entry;
np++;
}
free(*pte);
}
freepages(fh, ft, np);
if(s->map != s->ssegmap)
free(s->map);
}
if(s->profile != nil)
free(s->profile);
free(s);
}
Pte*
ptealloc(void)
{
Pte *new;
new = malloc(sizeof(Pte));
if(new != nil){
new->first = &new->pages[PTEPERTAB];
new->last = new->pages;
}
return new;
}
static Pte*
ptecpy(Pte *new, Pte *old)
{
Page **src, **dst, *entry;
dst = &new->pages[old->first-old->pages];
new->first = dst;
for(src = old->first; src <= old->last; src++, dst++){
if((entry = *src) == nil)
continue;
if(onswap(entry))
dupswap(entry);
else
incref(entry);
new->last = dst;
*dst = entry;
}
return new;
}
Segment*
dupseg(Segment **seg, int segno, int share)
{
int i;
Pte *pte;
Segment *n, *s;
s = seg[segno];
qlock(s);
if(waserror()){
qunlock(s);
nexterror();
}
switch(s->type&SG_TYPE) {
case SG_TEXT: /* New segment shares pte set */
case SG_SHARED:
case SG_PHYSICAL:
case SG_FIXED:
case SG_STICKY:
default:
goto sameseg;
case SG_STACK:
n = newseg(s->type, s->base, s->size);
break;
case SG_BSS: /* Just copy on write */
if(share)
goto sameseg;
n = newseg(s->type, s->base, s->size);
break;
case SG_DATA: /* Copy on write plus demand load info */
if(segno == TSEG){
n = data2txt(s);
qunlock(s);
poperror();
return n;
}
if(share)
goto sameseg;
n = newseg(s->type, s->base, s->size);
n->image = s->image;
n->fstart = s->fstart;
n->flen = s->flen;
incref(s->image);
break;
}
for(i = 0; i < s->mapsize; i++){
if(s->map[i] != nil){
pte = ptealloc();
if(pte == nil){
qunlock(s);
poperror();
putseg(n);
error(Enomem);
}
n->map[i] = ptecpy(pte, s->map[i]);
}
}
n->used = s->used;
n->swapped = s->swapped;
n->flushme = s->flushme;
if(s->ref > 1)
procflushseg(s);
qunlock(s);
poperror();
return n;
sameseg:
incref(s);
qunlock(s);
poperror();
return s;
}
/*
* segpage inserts Page p into Segmnet s.
* on error, calls putpage() on p.
*/
void
segpage(Segment *s, Page *p)
{
Pte **pte, *etp;
uintptr soff;
Page **pg;
qlock(s);
if(p->va < s->base || p->va >= s->top || s->mapsize == 0)
panic("segpage");
soff = p->va - s->base;
pte = &s->map[soff/PTEMAPMEM];
if((etp = *pte) == nil){
etp = ptealloc();
if(etp == nil){
qunlock(s);
putpage(p);
error(Enomem);
}
*pte = etp;
}
pg = &etp->pages[(soff&(PTEMAPMEM-1))/BY2PG];
assert(*pg == nil);
settxtflush(p, s->flushme);
*pg = p;
s->used++;
if(pg < etp->first)
etp->first = pg;
if(pg > etp->last)
etp->last = pg;
qunlock(s);
}
void
relocateseg(Segment *s, uintptr offset)
{
Pte **pte, **emap;
Page **pg, **pe;
emap = &s->map[s->mapsize];
for(pte = s->map; pte < emap; pte++) {
if(*pte == nil)
continue;
pe = (*pte)->last;
for(pg = (*pte)->first; pg <= pe; pg++) {
if(!pagedout(*pg))
(*pg)->va += offset;
}
}
}
Image*
attachimage(Chan *c, ulong pages)
{
Image *i, **l;
retry:
lock(&imagealloc);
/*
* Search the image cache for remains of the text from a previous
* or currently running incarnation
*/
for(i = ihash(c->qid.path); i != nil; i = i->hash){
if(eqchantdqid(c, i->type, i->dev, i->qid, 0)){
incref(i);
goto found;
}
}
if(imagealloc.nidle > conf.nimage
|| (i = newimage(pages)) == nil) {
unlock(&imagealloc);
if(imagealloc.nidle == 0)
error(Enomem);
if(imagereclaim(0) == 0)
freebroken(); /* can use the memory */
goto retry;
}
i->type = c->type;
i->dev = c->dev;
i->qid = c->qid;
l = &ihash(c->qid.path);
i->hash = *l;
*l = i;
found:
i->nattach++;
unlock(&imagealloc);
lock(i);
if(i->c == nil){
i->c = c;
incref(c);
}
return i;
}
/* remove from idle list */
static void
busyimage(Image *i)
{
/* not on idle list? */
if(i->link == nil)
return;
lock(&imagealloc);
if((*i->link = i->next) != nil)
i->next->link = i->link;
i->link = nil;
i->next = nil;
imagealloc.pgidle -= i->pgref;
imagealloc.nidle--;
unlock(&imagealloc);
}
/* insert into idle list */
static void
idleimage(Image *i)
{
Image **l, *j;
/* already on idle list? */
if(i->link != nil)
return;
lock(&imagealloc);
l = &imagealloc.idle;
j = imagealloc.idle;
/* sort by least frequenty and most pages used first */
for(; j != nil; l = &j->next, j = j->next){
long c = j->nattach - i->nattach;
if(c < 0)
continue;
if(c > 0)
break;
if(j->pgref < i->pgref)
break;
}
if((i->next = j) != nil)
j->link = &i->next;
*(i->link = l) = i;
imagealloc.pgidle += i->pgref;
imagealloc.nidle++;
unlock(&imagealloc);
}
/* putimage(): called with image locked and unlocks */
void
putimage(Image *i)
{
Chan *c;
long r;
r = decref(i);
if(i->notext){
unlock(i);
return;
}
if(r == 0){
assert(i->pgref == 0);
assert(i->s == nil);
c = i->c;
i->c = nil;
busyimage(i);
lock(&imagealloc);
r = i->ref;
if(r == 0){
Image *f, **l;
l = &ihash(i->qid.path);
for(f = *l; f != nil; f = f->hash) {
if(f == i) {
*l = i->hash;
break;
}
l = &f->hash;
}
}
unlock(&imagealloc);
} else if(r == i->pgref) {
assert(i->pgref > 0);
assert(i->s == nil);
c = i->c;
i->c = nil;
idleimage(i);
} else {
c = nil;
busyimage(i);
}
unlock(i);
if(r == 0)
free(i);
if(c != nil)
ccloseq(c); /* does not block */
}
ulong
imagecached(void)
{
return imagealloc.pgidle;
}
ulong
imagereclaim(ulong pages)
{
ulong np;
Image *i;
eqlock(&imagealloc.ireclaim);
lock(&imagealloc);
np = 0;
while(np < pages || imagealloc.nidle > conf.nimage) {
i = imagealloc.idle;
if(i == nil)
break;
incref(i);
unlock(&imagealloc);
np += pagereclaim(i);
lock(i);
busyimage(i); /* force re-insert into idle list */
putimage(i);
lock(&imagealloc);
}
imagealloc.pgidle -= np;
unlock(&imagealloc);
qunlock(&imagealloc.ireclaim);
return np;
}
uintptr
ibrk(uintptr addr, int seg)
{
Segment *s, *ns;
uintptr newtop;
ulong newsize;
int i, mapsize;
Pte **map;
s = up->seg[seg];
if(s == nil)
error(Ebadarg);
if(addr == 0)
return s->base;
qlock(s);
/* We may start with the bss overlapping the data */
if(addr < s->base) {
if(seg != BSEG || up->seg[DSEG] == nil || addr < up->seg[DSEG]->base) {
qunlock(s);
error(Enovmem);
}
addr = s->base;
}
newtop = PGROUND(addr);
newsize = (newtop-s->base)/BY2PG;
if(newtop < s->top) {
/*
* do not shrink a segment shared with other procs, as the
* to-be-freed address space may have been passed to the kernel
* already by another proc and is past the validaddr stage.
*/
if(s->ref > 1){
qunlock(s);
error(Einuse);
}
mfreeseg(s, newtop, (s->top-newtop)/BY2PG);
s->top = newtop;
s->size = newsize;
qunlock(s);
flushmmu();
return 0;
}
for(i = 0; i < NSEG; i++) {
ns = up->seg[i];
if(ns == nil || ns == s)
continue;
if(newtop > ns->base && s->base < ns->top) {
qunlock(s);
error(Esoverlap);
}
}
if(newsize > (SEGMAPSIZE*PTEPERTAB)) {
qunlock(s);
error(Enovmem);
}
mapsize = ROUND(newsize, PTEPERTAB)/PTEPERTAB;
if(mapsize > s->mapsize){
map = malloc(mapsize*sizeof(Pte*));
if(map == nil){
qunlock(s);
error(Enomem);
}
memmove(map, s->map, s->mapsize*sizeof(Pte*));
if(s->map != s->ssegmap)
free(s->map);
s->map = map;
s->mapsize = mapsize;
}
s->top = newtop;
s->size = newsize;
qunlock(s);
return 0;
}
/*
* Must be called with s locked.
* This relies on s->ref > 1 indicating that
* the segment is shared with other processes
* different from the calling one.
* The calling process (up) is responsible for
* flushing its own TBL by calling flushmmu()
* afterwards.
*/
void
mfreeseg(Segment *s, uintptr start, ulong pages)
{
uintptr off;
Pte **pte, **emap;
Page **pg, **pe, *entry;
if(pages == 0)
return;
switch(s->type&SG_TYPE){
case SG_PHYSICAL:
case SG_FIXED:
case SG_STICKY:
return;
}
/*
* we have to make sure other processors flush the
* entry from their TLBs before the page is freed.
*/
if(s->ref > 1)
procflushseg(s);
off = start-s->base;
pte = &s->map[off/PTEMAPMEM];
off = (off&(PTEMAPMEM-1))/BY2PG;
for(emap = &s->map[s->mapsize]; pte < emap; pte++, off = 0) {
if(*pte == nil) {
off = PTEPERTAB - off;
if(off >= pages)
return;
pages -= off;
continue;
}
pg = &(*pte)->pages[off];
for(pe = &(*pte)->pages[PTEPERTAB]; pg < pe; pg++) {
if((entry = *pg) != nil){
*pg = nil;
if(onswap(entry)){
putswap(entry);
s->swapped--;
} else
putpage(entry);
s->used--;
}
if(--pages == 0)
return;
}
}
}
Segment*
isoverlap(uintptr va, uintptr len)
{
int i;
Segment *ns;
uintptr newtop;
newtop = va+len;
for(i = 0; i < NSEG; i++) {
ns = up->seg[i];
if(ns == nil)
continue;
if(newtop > ns->base && va < ns->top)
return ns;
}
return nil;
}
Physseg*
addphysseg(Physseg* new)
{
Physseg *ps;
/*
* Check not already entered and there is room
* for a new entry and the terminating null entry.
*/
lock(&physseglock);
for(ps = physseg; ps->name; ps++){
if(strcmp(ps->name, new->name) == 0){
unlock(&physseglock);
return nil;
}
}
if(ps-physseg >= nelem(physseg)-2){
unlock(&physseglock);
return nil;
}
*ps = *new;
unlock(&physseglock);
return ps;
}
Physseg*
findphysseg(char *name)
{
Physseg *ps;
for(ps = physseg; ps->name; ps++)
if(strcmp(ps->name, name) == 0)
return ps;
return nil;
}
uintptr
segattach(int attr, char *name, uintptr va, uintptr len)
{
int sno;
Segment *s, *os;
Physseg *ps;
if(va != 0 && va >= USTKTOP)
error(Ebadarg);
qlock(&up->seglock);
if(waserror()){
qunlock(&up->seglock);
nexterror();
}
for(sno = 0; sno < NSEG; sno++)
if(up->seg[sno] == nil && sno != ESEG)
break;
if(sno == NSEG)
error(Enovmem);
/*
* first look for a global segment with the
* same name
*/
if(_globalsegattach != nil){
s = (*_globalsegattach)(name);
if(s != nil){
va = s->base;
len = s->top - va;
if(isoverlap(va, len) != nil){
putseg(s);
error(Esoverlap);
}
up->seg[sno] = s;
goto done;
}
}
/* round up va+len */
len += va & (BY2PG-1);
len = PGROUND(len);
if(len == 0)
error(Ebadarg);
/*
* Find a hole in the address space.
* Starting at the lowest possible stack address - len,
* check for an overlapping segment, and repeat at the
* base of that segment - len until either a hole is found
* or the address space is exhausted. Ensure that we don't
* map the zero page.
*/
if(va == 0) {
for (os = up->seg[SSEG]; os != nil; os = isoverlap(va, len)) {
va = os->base;
if(len >= va)
error(Enovmem);
va -= len;
}
}
va &= ~(BY2PG-1);
if(va == 0 || (va+len) > USTKTOP || (va+len) < va)
error(Ebadarg);
if(isoverlap(va, len) != nil)
error(Esoverlap);
ps = findphysseg(name);
if(ps == nil)
error(Ebadarg);
if(len > ps->size)
error(Enovmem);
/* Turn off what is not allowed */
attr &= ~(SG_TYPE | SG_CACHED | SG_DEVICE);
/* Copy in defaults */
attr |= ps->attr;
s = newseg(attr, va, len/BY2PG);
s->pseg = ps;
up->seg[sno] = s;
done:
qunlock(&up->seglock);
poperror();
return va;
}
static void
segflush(void *va, uintptr len)
{
uintptr from, to, off;
Segment *s;
Pte *pte;
Page **pg, **pe;
from = (uintptr)va;
to = from + len;
to = PGROUND(to);
from &= ~(BY2PG-1);
if(to < from)
error(Ebadarg);
while(from < to) {
s = seg(up, from, 1);
if(s == nil)
error(Ebadarg);
s->flushme = 1;
if(s->ref > 1)
procflushseg(s);
more:
len = (s->top < to ? s->top : to) - from;
if(s->mapsize > 0){
off = from-s->base;
pte = s->map[off/PTEMAPMEM];
off &= PTEMAPMEM-1;
if(off+len > PTEMAPMEM)
len = PTEMAPMEM-off;
if(pte != nil) {
pg = &pte->pages[off/BY2PG];
pe = pg + len/BY2PG;
while(pg < pe) {
settxtflush(*pg, !pagedout(*pg));
pg++;
}
}
}
from += len;
if(from < to && from < s->top)
goto more;
qunlock(s);
}
}
uintptr
syssegflush(va_list list)
{
void *va;
ulong len;
va = va_arg(list, void*);
len = va_arg(list, ulong);
segflush(va, len);
flushmmu();
return 0;
}
void
segclock(uintptr pc)
{
Segment *s;
s = up->seg[TSEG];
if(s == nil || s->profile == nil)
return;
s->profile[0] += TK2MS(1);
if(pc >= s->base && pc < s->top) {
pc -= s->base;
s->profile[pc>>LRESPROF] += TK2MS(1);
}
}
Segment*
txt2data(Segment *s)
{
Segment *ps;
ps = newseg(SG_DATA, s->base, s->size);
ps->image = s->image;
ps->fstart = s->fstart;
ps->flen = s->flen;
ps->flushme = 1;
incref(s->image);
return ps;
}
Segment*
data2txt(Segment *s)
{
Segment *ps;
Image *i;
i = s->image;
lock(i);
if((ps = i->s) != nil && ps->flen == s->flen){
assert(ps->image == i);
incref(ps);
unlock(i);
return ps;
}
if(waserror()){
unlock(i);
nexterror();
}
ps = newseg(SG_TEXT | SG_RONLY, s->base, s->size);
ps->image = i;
ps->fstart = s->fstart;
ps->flen = s->flen;
ps->flushme = 1;
if(i->s == nil)
i->s = ps;
incref(i);
unlock(i);
poperror();
return ps;
}
enum {
/* commands to segmentioproc */
Cnone=0,
Cread,
Cwrite,
Cdie,
};
static int
cmddone(void *arg)
{
Segio *sio = arg;
return sio->cmd == Cnone;
}
static void
docmd(Segio *sio, int cmd)
{
sio->err = nil;
sio->cmd = cmd;
while(waserror())
;
wakeup(&sio->cmdwait);
sleep(&sio->replywait, cmddone, sio);
poperror();
if(sio->err != nil)
error(sio->err);
}
static int
cmdready(void *arg)
{
Segio *sio = arg;
return sio->cmd != Cnone;
}
static void
segmentioproc(void *arg)
{
Segio *sio = arg;
int done;
int sno;
qlock(&up->seglock);
for(sno = 0; sno < NSEG; sno++)
if(up->seg[sno] == nil && sno != ESEG)
break;
if(sno == NSEG)
panic("segmentkproc");
sio->p = up;
incref(sio->s);
up->seg[sno] = sio->s;
qunlock(&up->seglock);
while(waserror())
;
for(done = 0; !done;){
sleep(&sio->cmdwait, cmdready, sio);
if(waserror())
sio->err = up->errstr;
else {
if(sio->s != nil && up->seg[sno] != sio->s){
Segment *tmp;
qlock(&up->seglock);
incref(sio->s);
tmp = up->seg[sno];
up->seg[sno] = sio->s;
putseg(tmp);
qunlock(&up->seglock);
flushmmu();
}
switch(sio->cmd){
case Cread:
memmove(sio->data, sio->addr, sio->dlen);
break;
case Cwrite:
memmove(sio->addr, sio->data, sio->dlen);
if(sio->s->flushme)
segflush(sio->addr, sio->dlen);
break;
case Cdie:
done = 1;
break;
}
poperror();
}
sio->cmd = Cnone;
wakeup(&sio->replywait);
}
pexit("done", 1);
}
long
segio(Segio *sio, Segment *s, void *a, long n, vlong off, int read)
{
uintptr m;
void *b;
b = a;
if(s != nil){
m = s->top - s->base;
if(off < 0 || off >= m){
if(!read)
error(Ebadarg);
return 0;
}
if(off+n > m){
if(!read)
error(Ebadarg);
n = m - off;
}
if((uintptr)a < KZERO) {
b = smalloc(n);
if(waserror()){
free(b);
nexterror();
}
if(!read)
memmove(b, a, n);
}
}
eqlock(sio);
if(waserror()){
qunlock(sio);
nexterror();
}
sio->s = s;
if(s == nil){
if(sio->p != nil){
docmd(sio, Cdie);
sio->p = nil;
}
qunlock(sio);
poperror();
return 0;
}
if(sio->p == nil){
sio->cmd = Cnone;
kproc("segmentio", segmentioproc, sio);
}
sio->addr = (char*)s->base + off;
sio->data = b;
sio->dlen = n;
docmd(sio, read ? Cread : Cwrite);
qunlock(sio);
poperror();
if(a != b){
if(read)
memmove(a, b, n);
free(b);
poperror();
}
return n;
}