[patch] POSIX advisory mode lock panic fix by Dfly
Devon H. O'Dell
dodell at sitetronics.com
Tue Apr 20 13:53:45 PDT 2004
Matthew Dillon wrote:
Sometimes these things just fall into place, other times they are
predetermined to be ugly no matter what you do :-).
If its going to be ugly it is best to put the ugliness all in one place.
So, for example, it is generally better to pass the governing structure
to a wrapper procedure with ugly insides then it is to strew 'pp' all over
the source file. Sometimes special cases prevent it from working out,
and sometimes things just fall into place and you get an elegant solution.
-Matt
Well here's my pre-bedtime attempt at fixing sys/, anyway. I'm going to
be gone for the next 8 hours, but please let me know what you think of
this new version. (Note: it's not guaranteed to work ;)). I'll comment
it tomorrow :).
--Devon
diff -ur sys/kern/kern_lockf.c sys_lockfix/kern/kern_lockf.c
--- sys/kern/kern_lockf.c Tue Apr 20 21:38:13 2004
+++ sys_lockfix/kern/kern_lockf.c Tue Apr 20 22:51:14 2004
@@ -51,6 +51,7 @@
#include <sys/fcntl.h>
#include <sys/lockf.h>
+#include <sys/resourcevar.h>
/*
* This variable controls the maximum number of processes that will
@@ -80,12 +81,58 @@
struct lockf *, int, struct lockf ***, struct lockf **);
static struct lockf *
lf_getblock (struct lockf *);
+struct lockf *lf_alloc (int, caddr_t);
static int lf_getlock (struct lockf *, struct flock *);
+static int lf_res_exceeded (struct proc *);
static int lf_setlock (struct lockf *);
-static void lf_split (struct lockf *, struct lockf *);
+static int lf_split (struct lockf *, struct lockf *);
+static void lf_free (struct lockf *);
static void lf_wakelock (struct lockf *);
/*
+ * Allocate space for a struct lockf and upgrade the user/process lock count
+ */
+struct lockf *
+lf_alloc(int flags, caddr_t id) {
+ struct lockf *lock;
+
+ if ((flags & F_POSIX) != 0)
+ incposixlockcnt((struct proc *)id);
+
+ MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+ return (lock);
+}
+
+/*
+ * Free memory allocated for a struct lockf and decrement the user/process
+ * lock count.
+ */
+static void
+lf_free (struct lockf *lock)
+{
+
+ if ((lock->lf_flags & F_POSIX) != 0)
+ decposixlockcnt((struct proc *)lock->lf_id);
+
+ free(lock, M_LOCKF)
+ return;
+}
+
+/*
+ * Determine if the user has exceeded their allowed POSIX locks
+ */
+static int
+lf_res_exceeded (struct proc *p)
+{
+ struct uidinfo *ui = p->p_ucred->cr_uidinfo;
+
+ if (ui->ui_posixlocks > p->p_rlimit[RLIMIT_POSIXLOCK].rlim_max)
+ return (-1);
+
+ return 0;
+}
+
+/*
* Advisory record locking support
*/
int
@@ -147,7 +194,7 @@
/*
* Create the lockf structure
*/
- MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+ lock = lf_alloc(ap->a_flags, ap->a_id);
lock->lf_start = start;
lock->lf_end = end;
lock->lf_id = ap->a_id;
@@ -162,20 +209,23 @@
*/
switch(ap->a_op) {
case F_SETLK:
- return (lf_setlock(lock));
+ if (lf_res_exceeded((struct proc *)ap->a_id) != 0)
+ return (lf_setlock(lock));
+ else
+ return (ENOLCK);
case F_UNLCK:
error = lf_clearlock(lock);
- FREE(lock, M_LOCKF);
+ lf_free(lock);
return (error);
case F_GETLK:
error = lf_getlock(lock, fl);
- FREE(lock, M_LOCKF);
+ lf_free(lock);
return (error);
default:
- free(lock, M_LOCKF);
+ lf_free(lock);
return (EINVAL);
}
/* NOTREACHED */
@@ -207,7 +257,7 @@
* Free the structure and return if nonblocking.
*/
if ((lock->lf_flags & F_WAIT) == 0) {
- FREE(lock, M_LOCKF);
+ lf_free(lock);
return (EAGAIN);
}
/*
@@ -238,7 +288,7 @@
break;
wproc = (struct proc *)waitblock->lf_id;
if (wproc == (struct proc *)lock->lf_id) {
- free(lock, M_LOCKF);
+ lf_free(lock);
return (EDEADLK);
}
}
@@ -280,7 +330,7 @@
lock->lf_next = NOLOCKF;
}
if (error) {
- free(lock, M_LOCKF);
+ lf_free(lock);
return (error);
}
}
@@ -325,7 +375,7 @@
overlap->lf_type == F_WRLCK)
lf_wakelock(overlap);
overlap->lf_type = lock->lf_type;
- FREE(lock, M_LOCKF);
+ lf_free(lock);
lock = overlap; /* for debug output below */
break;
@@ -334,7 +384,7 @@
* Check for common starting point and different types.
*/
if (overlap->lf_type == lock->lf_type) {
- free(lock, M_LOCKF);
+ lf_free(lock);
lock = overlap; /* for debug output below */
break;
}
@@ -342,8 +392,12 @@
*prev = lock;
lock->lf_next = overlap;
overlap->lf_start = lock->lf_end + 1;
- } else
- lf_split(overlap, lock);
+ } else {
+ error = lf_split(overlap, lock);
+ if (error)
+ return (error);
+ }
+
lf_wakelock(overlap);
break;
@@ -375,7 +429,7 @@
needtolink = 0;
} else
*prev = overlap->lf_next;
- free(overlap, M_LOCKF);
+ lf_free(overlap);
continue;
case 4: /* overlap starts before lock */
@@ -447,7 +501,7 @@
case 1: /* overlap == lock */
*prev = overlap->lf_next;
- FREE(overlap, M_LOCKF);
+ lf_free(overlap);
break;
case 2: /* overlap contains lock: split it */
@@ -455,14 +509,16 @@
overlap->lf_start = unlock->lf_end + 1;
break;
}
- lf_split(overlap, unlock);
+ error = lf_split(overlap, unlock);
+ if (error)
+ return (error);
overlap->lf_next = unlock->lf_next;
break;
case 3: /* lock contains overlap */
*prev = overlap->lf_next;
lf = overlap->lf_next;
- free(overlap, M_LOCKF);
+ lf_free(overlap);
continue;
case 4: /* overlap starts before lock */
@@ -662,7 +718,7 @@
* Split a lock and a contained region into
* two or three locks as necessary.
*/
-static void
+static int
lf_split(lock1, lock2)
struct lockf *lock1;
struct lockf *lock2;
@@ -681,19 +737,23 @@
if (lock1->lf_start == lock2->lf_start) {
lock1->lf_start = lock2->lf_end + 1;
lock2->lf_next = lock1;
- return;
+ return (0);
}
if (lock1->lf_end == lock2->lf_end) {
lock1->lf_end = lock2->lf_start - 1;
lock2->lf_next = lock1->lf_next;
lock1->lf_next = lock2;
- return;
+ return (0);
}
+
+ if (lf_res_exceeded((struct proc *)lock1->lf_id) != 0)
+ return (ENOLCK);
+
+ splitlock = lf_alloc(lock1->lf_flags, lock1->lf_id);
/*
* Make a new lock consisting of the last part of
* the encompassing lock
*/
- MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock);
splitlock->lf_start = lock2->lf_end + 1;
TAILQ_INIT(&splitlock->lf_blkhd);
@@ -704,6 +764,7 @@
splitlock->lf_next = lock1->lf_next;
lock2->lf_next = splitlock;
lock1->lf_next = lock2;
+ return (0);
}
/*
diff -ur sys/kern/kern_mib.c sys_lockfix/kern/kern_mib.c
--- sys/kern/kern_mib.c Tue Apr 20 21:38:13 2004
+++ sys_lockfix/kern/kern_mib.c Tue Apr 20 21:40:14 2004
@@ -47,6 +47,7 @@
#include <sys/sysctl.h>
#include <sys/proc.h>
#include <sys/jail.h>
+#include <sys/fcntl.h>
#include <machine/smp.h>
SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW, 0,
@@ -101,6 +102,9 @@
SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW,
&maxprocperuid, 0, "Maximum processes allowed per userid");
+
+SYSCTL_INT(_kern, KERN_MAXPOSIXLOCKPERUID, maxposixlocksperuid, CTLFLAG_RW,
+ &maxposixlocksperuid, 0, "Maximum number of POSIX-type locks per user id");
SYSCTL_INT(_kern, OID_AUTO, maxusers, CTLFLAG_RD,
&maxusers, 0, "Hint for kernel tuning");
diff -ur sys/kern/kern_prot.c sys_lockfix/kern/kern_prot.c
--- sys/kern/kern_prot.c Tue Apr 20 21:38:13 2004
+++ sys_lockfix/kern/kern_prot.c Tue Apr 20 21:40:14 2004
@@ -1108,8 +1108,10 @@
cr = cratom(&p->p_ucred);
(void)chgproccnt(cr->cr_ruidinfo, -1, 0);
+ (void)chgposixlockcnt(p, -(p->p_numposixlocks), 0);
/* It is assumed that pcred is not shared between processes */
cr->cr_ruid = ruid;
uireplace(&cr->cr_ruidinfo, uifind(ruid));
(void)chgproccnt(cr->cr_ruidinfo, 1, 0);
+ (void)chgposixlockcnt(p, p->p_numposixlocks, 0);
}
diff -ur sys/kern/kern_resource.c sys_lockfix/kern/kern_resource.c
--- sys/kern/kern_resource.c Tue Apr 20 21:38:13 2004
+++ sys_lockfix/kern/kern_resource.c Tue Apr 20 22:48:13 2004
@@ -46,6 +46,7 @@
#include <sys/systm.h>
#include <sys/sysproto.h>
#include <sys/file.h>
+#include <sys/fcntl.h>
#include <sys/kern_syscall.h>
#include <sys/kernel.h>
#include <sys/resourcevar.h>
@@ -387,6 +388,12 @@
if (limp->rlim_max < 1)
limp->rlim_max = 1;
break;
+ case RLIMIT_POSIXLOCK:
+ if (limp->rlim_cur > maxposixlocksperuid)
+ limp->rlim_cur = maxposixlocksperuid;
+ if (limp->lim_max > maxposixlocksperuid)
+ limp->rlim_max = maxposixlocksperuid;
+ break;
}
*alimp = *limp;
return (0);
@@ -626,6 +633,38 @@
if (uip->ui_proccnt < 0)
printf("negative proccnt for uid = %d\n", uip->ui_uid);
return (1);
+}
+
+/*
+ * Increment the count associated with the number of POSIX locks
+ * in use by a user and process at any given time.
+ */
+void
+incposixlockcnt (struct proc *p)
+{
+ struct uidinfo *uip = pp->p_ucred->cr_uidinfo;
+
+ uip->ui_posixlocks++;
+ p->p_posixlocks++;
+ return;
+}
+
+/*
+ * Increment the count associated with the number of POSIX locks
+ * in use by a user and process at any given time.
+ */
+void
+decposixlockcnt (struct proc *p)
+{
+ struct uidinfo *uip = pp->p_ucred->cr_uidinfo;
+
+ uip->ui_posixlocks--;
+ KASSERT(uip->ui_advlocks < 0, ("Negative number of POSIX locks held by
+ user."));
+ p->p_posixlocks--;
+ KASSERT(pp->p_numposixlocks < 0, ("Negative number of POSIX locks held
+ by process."));
+ return;
}
/*
diff -ur sys/kern/subr_param.c sys_lockfix/kern/subr_param.c
--- sys/kern/subr_param.c Tue Apr 20 21:38:13 2004
+++ sys_lockfix/kern/subr_param.c Tue Apr 20 21:40:14 2004
@@ -66,6 +66,9 @@
#ifndef NSFBUFS
#define NSFBUFS (512 + maxusers * 16)
#endif
+#ifndef MAXPOSIXLOCKSPERUID
+#define MAXPOSIXLOCKSPERUID (maxusers * 64) /* Should be a safe value */
+#endif
int hz;
int stathz;
@@ -77,6 +80,7 @@
int maxprocperuid; /* max # of procs per user */
int maxfiles; /* sys. wide open files limit */
int maxfilesperproc; /* per-proc open files limit */
+int maxposixlocksperuid; /* max # POSIX locks per uid */
int ncallout; /* maximum # of timer events */
int mbuf_wait = 32; /* mbuf sleep time in ticks */
int nbuf;
@@ -122,6 +126,8 @@
#endif
TUNABLE_INT_FETCH("kern.maxbcache", &maxbcache);
+ maxposixlocksperuid = MAXPOSIXLOCKSPERUID;
+ TUNABLE_INT_FETCH("kern.maxposixlocksperuid", &maxposixlocksperuid);
maxtsiz = MAXTSIZ;
TUNABLE_QUAD_FETCH("kern.maxtsiz", &maxtsiz);
dfldsiz = DFLDSIZ;
diff -ur sys/sys/fcntl.h sys_lockfix/sys/fcntl.h
--- sys/sys/fcntl.h Tue Apr 20 21:38:12 2004
+++ sys_lockfix/sys/fcntl.h Tue Apr 20 21:40:14 2004
@@ -223,4 +223,8 @@
__END_DECLS
#endif
+#ifdef _KERNEL
+extern int maxposixlocksperuid;
+#endif
+
#endif /* !_SYS_FCNTL_H_ */
Only in sys_lockfix/sys: fcntl.h.orig
diff -ur sys/sys/proc.h sys_lockfix/sys/proc.h
--- sys/sys/proc.h Tue Apr 20 21:38:12 2004
+++ sys_lockfix/sys/proc.h Tue Apr 20 22:51:53 2004
@@ -235,6 +235,7 @@
struct thread *p_thread; /* temporarily embed thread struct in proc */
struct upcall *p_upcall; /* USERLAND POINTER! registered upcall */
struct sched *p_sched; /* work-in-progress / Peter Kadau */
+ int p_posixlocks /* number of POSIX locks */
};
#if defined(_KERNEL)
Only in sys_lockfix/sys: proc.h.orig
diff -ur sys/sys/resource.h sys_lockfix/sys/resource.h
--- sys/sys/resource.h Tue Apr 20 21:38:12 2004
+++ sys_lockfix/sys/resource.h Tue Apr 20 21:40:14 2004
@@ -90,8 +90,9 @@
#define RLIMIT_NOFILE 8 /* number of open files */
#define RLIMIT_SBSIZE 9 /* maximum size of all socket buffers */
#define RLIMIT_VMEM 10 /* virtual process size (inclusive of mmap) */
+#define RLIMIT_POSIXLOCK 11 /* maximum number of POSIX locks per user */
-#define RLIM_NLIMITS 11 /* number of resource limits */
+#define RLIM_NLIMITS 12 /* number of resource limits */
#define RLIM_INFINITY ((rlim_t)(((u_quad_t)1 << 63) - 1))
@@ -113,6 +114,7 @@
"nofile",
"sbsize",
"vmem",
+ "posixlock",
};
#endif
Only in sys_lockfix/sys: resource.h.orig
diff -ur sys/sys/resourcevar.h sys_lockfix/sys/resourcevar.h
--- sys/sys/resourcevar.h Tue Apr 20 21:38:12 2004
+++ sys_lockfix/sys/resourcevar.h Tue Apr 20 22:52:11 2004
@@ -95,6 +95,7 @@
long ui_proccnt; /* number of processes */
uid_t ui_uid; /* uid */
int ui_ref; /* reference count */
+ int ui_numposixlocks; /* number of POSIX locks */
struct varsymset ui_varsymset; /* variant symlinks */
};
@@ -107,6 +108,7 @@
void calcru (struct proc *p, struct timeval *up, struct timeval *sp,
struct timeval *ip);
int chgproccnt (struct uidinfo *uip, int diff, int max);
+int chgposixlockcnt (struct proc *p, int diff, int max);
int chgsbsize (struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max);
int fuswintr (void *base);
struct plimit *limcopy (struct plimit *lim);
Only in sys_lockfix/sys: resourcevar.h.orig
diff -ur sys/sys/sysctl.h sys_lockfix/sys/sysctl.h
--- sys/sys/sysctl.h Tue Apr 20 21:38:12 2004
+++ sys_lockfix/sys/sysctl.h Tue Apr 20 21:40:14 2004
@@ -342,6 +342,7 @@
#define KERN_USRSTACK 33 /* int: address of USRSTACK */
#define KERN_LOGSIGEXIT 34 /* int: do we log sigexit procs? */
#define KERN_MAXID 35 /* number of valid kern ids */
+#define KERN_MAXPOSIXLOCKSPERUID 36 /* int: max POSIX locks per uid */
#define CTL_KERN_NAMES { \
{ 0, 0 }, \
@@ -373,6 +374,7 @@
{ "bootfile", CTLTYPE_STRING }, \
{ "maxfilesperproc", CTLTYPE_INT }, \
{ "maxprocperuid", CTLTYPE_INT }, \
+ { "maxposixlocksperuid", CTLTYPE_INT }, \
{ "dumpdev", CTLTYPE_STRUCT }, /* we lie; don't print as int */ \
{ "ipc", CTLTYPE_NODE }, \
{ "dummy", CTLTYPE_INT }, \
Only in sys_lockfix/sys: sysctl.h.orig
More information about the Submit
mailing list