[patch] POSIX advisory mode lock panic fix by Dfly

Devon H. O'Dell dodell at sitetronics.com
Tue Apr 20 13:53:45 PDT 2004


Matthew Dillon wrote:
    Sometimes these things just fall into place, other times they are
    predetermined to be ugly no matter what you do :-).
    If its going to be ugly it is best to put the ugliness all in one place.
    So, for example, it is generally better to pass the governing structure 
    to a wrapper procedure with ugly insides then it is to strew 'pp' all over
    the source file.  Sometimes special cases prevent it from working out,
    and sometimes things just fall into place and you get an elegant solution.

						-Matt
Well here's my pre-bedtime attempt at fixing sys/, anyway. I'm going to 
be gone for the next 8 hours, but please let me know what you think of 
this new version. (Note: it's not guaranteed to work ;)). I'll comment 
it tomorrow :).

--Devon
diff -ur sys/kern/kern_lockf.c sys_lockfix/kern/kern_lockf.c
--- sys/kern/kern_lockf.c	Tue Apr 20 21:38:13 2004
+++ sys_lockfix/kern/kern_lockf.c	Tue Apr 20 22:51:14 2004
@@ -51,6 +51,7 @@
 #include <sys/fcntl.h>
 
 #include <sys/lockf.h>
+#include <sys/resourcevar.h>
 
 /*
  * This variable controls the maximum number of processes that will
@@ -80,12 +81,58 @@
 	    struct lockf *, int, struct lockf ***, struct lockf **);
 static struct lockf *
 	 lf_getblock (struct lockf *);
+struct lockf    *lf_alloc (int, caddr_t);
 static int	 lf_getlock (struct lockf *, struct flock *);
+static int	 lf_res_exceeded (struct proc *);
 static int	 lf_setlock (struct lockf *);
-static void	 lf_split (struct lockf *, struct lockf *);
+static int	 lf_split (struct lockf *, struct lockf *);
+static void	 lf_free (struct lockf *);
 static void	 lf_wakelock (struct lockf *);
 
 /*
+ * Allocate space for a struct lockf and upgrade the user/process lock count
+ */
+struct lockf *
+lf_alloc(int flags, caddr_t id) {
+	struct lockf *lock;
+
+	if ((flags & F_POSIX) != 0)
+		incposixlockcnt((struct proc *)id);
+
+	MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+	return (lock);
+}
+
+/*
+ * Free memory allocated for a struct lockf and decrement the user/process 
+ * lock count.
+ */
+static void
+lf_free (struct lockf *lock)
+{
+
+	if ((lock->lf_flags & F_POSIX) != 0)
+		decposixlockcnt((struct proc *)lock->lf_id);
+
+	free(lock, M_LOCKF)
+	return;
+}
+
+/*
+ * Determine if the user has exceeded their allowed POSIX locks
+ */
+static int
+lf_res_exceeded (struct proc *p)
+{
+	struct uidinfo *ui = p->p_ucred->cr_uidinfo;
+
+	if (ui->ui_posixlocks > p->p_rlimit[RLIMIT_POSIXLOCK].rlim_max)
+		return (-1);
+
+	return 0;
+}
+
+/*
  * Advisory record locking support
  */
 int
@@ -147,7 +194,7 @@
 	/*
 	 * Create the lockf structure
 	 */
-	MALLOC(lock, struct lockf *, sizeof *lock, M_LOCKF, M_WAITOK);
+	lock = lf_alloc(ap->a_flags, ap->a_id);
 	lock->lf_start = start;
 	lock->lf_end = end;
 	lock->lf_id = ap->a_id;
@@ -162,20 +209,23 @@
 	 */
 	switch(ap->a_op) {
 	case F_SETLK:
-		return (lf_setlock(lock));
+		if (lf_res_exceeded((struct proc *)ap->a_id) != 0)
+			return (lf_setlock(lock));
+		else
+			return (ENOLCK);
 
 	case F_UNLCK:
 		error = lf_clearlock(lock);
-		FREE(lock, M_LOCKF);
+		lf_free(lock);
 		return (error);
 
 	case F_GETLK:
 		error = lf_getlock(lock, fl);
-		FREE(lock, M_LOCKF);
+		lf_free(lock);
 		return (error);
 
 	default:
-		free(lock, M_LOCKF);
+		lf_free(lock);
 		return (EINVAL);
 	}
 	/* NOTREACHED */
@@ -207,7 +257,7 @@
 		 * Free the structure and return if nonblocking.
 		 */
 		if ((lock->lf_flags & F_WAIT) == 0) {
-			FREE(lock, M_LOCKF);
+			lf_free(lock);
 			return (EAGAIN);
 		}
 		/*
@@ -238,7 +288,7 @@
 					break;
 				wproc = (struct proc *)waitblock->lf_id;
 				if (wproc == (struct proc *)lock->lf_id) {
-					free(lock, M_LOCKF);
+					lf_free(lock);
 					return (EDEADLK);
 				}
 			}
@@ -280,7 +330,7 @@
 			lock->lf_next = NOLOCKF;
 		}
 		if (error) {
-			free(lock, M_LOCKF);
+			lf_free(lock);
 			return (error);
 		}
 	}
@@ -325,7 +375,7 @@
 			    overlap->lf_type == F_WRLCK)
 				lf_wakelock(overlap);
 			overlap->lf_type = lock->lf_type;
-			FREE(lock, M_LOCKF);
+			lf_free(lock);
 			lock = overlap; /* for debug output below */
 			break;
 
@@ -334,7 +384,7 @@
 			 * Check for common starting point and different types.
 			 */
 			if (overlap->lf_type == lock->lf_type) {
-				free(lock, M_LOCKF);
+				lf_free(lock);
 				lock = overlap; /* for debug output below */
 				break;
 			}
@@ -342,8 +392,12 @@
 				*prev = lock;
 				lock->lf_next = overlap;
 				overlap->lf_start = lock->lf_end + 1;
-			} else
-				lf_split(overlap, lock);
+			} else {
+				error = lf_split(overlap, lock);
+				if (error) 
+					return (error);
+			}
+
 			lf_wakelock(overlap);
 			break;
 
@@ -375,7 +429,7 @@
 				needtolink = 0;
 			} else
 				*prev = overlap->lf_next;
-			free(overlap, M_LOCKF);
+			lf_free(overlap);
 			continue;
 
 		case 4: /* overlap starts before lock */
@@ -447,7 +501,7 @@
 
 		case 1: /* overlap == lock */
 			*prev = overlap->lf_next;
-			FREE(overlap, M_LOCKF);
+			lf_free(overlap);
 			break;
 
 		case 2: /* overlap contains lock: split it */
@@ -455,14 +509,16 @@
 				overlap->lf_start = unlock->lf_end + 1;
 				break;
 			}
-			lf_split(overlap, unlock);
+			error = lf_split(overlap, unlock);
+			if (error)
+				return (error);
 			overlap->lf_next = unlock->lf_next;
 			break;
 
 		case 3: /* lock contains overlap */
 			*prev = overlap->lf_next;
 			lf = overlap->lf_next;
-			free(overlap, M_LOCKF);
+			lf_free(overlap);
 			continue;
 
 		case 4: /* overlap starts before lock */
@@ -662,7 +718,7 @@
  * Split a lock and a contained region into
  * two or three locks as necessary.
  */
-static void
+static int
 lf_split(lock1, lock2)
 	struct lockf *lock1;
 	struct lockf *lock2;
@@ -681,19 +737,23 @@
 	if (lock1->lf_start == lock2->lf_start) {
 		lock1->lf_start = lock2->lf_end + 1;
 		lock2->lf_next = lock1;
-		return;
+		return (0);
 	}
 	if (lock1->lf_end == lock2->lf_end) {
 		lock1->lf_end = lock2->lf_start - 1;
 		lock2->lf_next = lock1->lf_next;
 		lock1->lf_next = lock2;
-		return;
+		return (0);
 	}
+
+	if (lf_res_exceeded((struct proc *)lock1->lf_id) != 0)
+		return (ENOLCK);
+
+	splitlock = lf_alloc(lock1->lf_flags, lock1->lf_id);
 	/*
 	 * Make a new lock consisting of the last part of
 	 * the encompassing lock
 	 */
-	MALLOC(splitlock, struct lockf *, sizeof *splitlock, M_LOCKF, M_WAITOK);
 	bcopy((caddr_t)lock1, (caddr_t)splitlock, sizeof *splitlock);
 	splitlock->lf_start = lock2->lf_end + 1;
 	TAILQ_INIT(&splitlock->lf_blkhd);
@@ -704,6 +764,7 @@
 	splitlock->lf_next = lock1->lf_next;
 	lock2->lf_next = splitlock;
 	lock1->lf_next = lock2;
+	return (0);
 }
 
 /*
diff -ur sys/kern/kern_mib.c sys_lockfix/kern/kern_mib.c
--- sys/kern/kern_mib.c	Tue Apr 20 21:38:13 2004
+++ sys_lockfix/kern/kern_mib.c	Tue Apr 20 21:40:14 2004
@@ -47,6 +47,7 @@
 #include <sys/sysctl.h>
 #include <sys/proc.h>
 #include <sys/jail.h>
+#include <sys/fcntl.h>
 #include <machine/smp.h>
 
 SYSCTL_NODE(, 0,	  sysctl, CTLFLAG_RW, 0,
@@ -101,6 +102,9 @@
 
 SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW, 
     &maxprocperuid, 0, "Maximum processes allowed per userid");
+
+SYSCTL_INT(_kern, KERN_MAXPOSIXLOCKPERUID, maxposixlocksperuid, CTLFLAG_RW,
+    &maxposixlocksperuid, 0, "Maximum number of POSIX-type locks per user id");
 
 SYSCTL_INT(_kern, OID_AUTO, maxusers, CTLFLAG_RD, 
     &maxusers, 0, "Hint for kernel tuning");
diff -ur sys/kern/kern_prot.c sys_lockfix/kern/kern_prot.c
--- sys/kern/kern_prot.c	Tue Apr 20 21:38:13 2004
+++ sys_lockfix/kern/kern_prot.c	Tue Apr 20 21:40:14 2004
@@ -1108,8 +1108,10 @@
 
 	cr = cratom(&p->p_ucred);
 	(void)chgproccnt(cr->cr_ruidinfo, -1, 0);
+	(void)chgposixlockcnt(p, -(p->p_numposixlocks), 0);
 	/* It is assumed that pcred is not shared between processes */
 	cr->cr_ruid = ruid;
 	uireplace(&cr->cr_ruidinfo, uifind(ruid));
 	(void)chgproccnt(cr->cr_ruidinfo, 1, 0);
+	(void)chgposixlockcnt(p, p->p_numposixlocks, 0);
 }
diff -ur sys/kern/kern_resource.c sys_lockfix/kern/kern_resource.c
--- sys/kern/kern_resource.c	Tue Apr 20 21:38:13 2004
+++ sys_lockfix/kern/kern_resource.c	Tue Apr 20 22:48:13 2004
@@ -46,6 +46,7 @@
 #include <sys/systm.h>
 #include <sys/sysproto.h>
 #include <sys/file.h>
+#include <sys/fcntl.h>
 #include <sys/kern_syscall.h>
 #include <sys/kernel.h>
 #include <sys/resourcevar.h>
@@ -387,6 +388,12 @@
 		if (limp->rlim_max < 1)
 			limp->rlim_max = 1;
 		break;
+	case RLIMIT_POSIXLOCK:
+		if (limp->rlim_cur > maxposixlocksperuid)
+			limp->rlim_cur = maxposixlocksperuid;
+		if (limp->lim_max > maxposixlocksperuid)
+			limp->rlim_max = maxposixlocksperuid;
+		break;
 	}
 	*alimp = *limp;
 	return (0);
@@ -626,6 +633,38 @@
 	if (uip->ui_proccnt < 0)
 		printf("negative proccnt for uid = %d\n", uip->ui_uid);
 	return (1);
+}
+
+/*
+ * Increment the count associated with the number of POSIX locks
+ * in use by a user and process at any given time.
+ */
+void
+incposixlockcnt (struct proc *p)
+{
+	struct uidinfo *uip = pp->p_ucred->cr_uidinfo;
+
+	uip->ui_posixlocks++;
+	p->p_posixlocks++;
+	return;
+}
+
+/*
+ * Increment the count associated with the number of POSIX locks
+ * in use by a user and process at any given time.
+ */
+void
+decposixlockcnt (struct proc *p)
+{
+	struct uidinfo *uip = pp->p_ucred->cr_uidinfo;
+
+	uip->ui_posixlocks--;
+	KASSERT(uip->ui_advlocks < 0, ("Negative number of POSIX locks held by
+	    user."));
+	p->p_posixlocks--;
+	KASSERT(pp->p_numposixlocks < 0, ("Negative number of POSIX locks held
+	    by process."));
+	return;
 }
 
 /*
diff -ur sys/kern/subr_param.c sys_lockfix/kern/subr_param.c
--- sys/kern/subr_param.c	Tue Apr 20 21:38:13 2004
+++ sys_lockfix/kern/subr_param.c	Tue Apr 20 21:40:14 2004
@@ -66,6 +66,9 @@
 #ifndef NSFBUFS
 #define NSFBUFS (512 + maxusers * 16)
 #endif
+#ifndef MAXPOSIXLOCKSPERUID
+#define MAXPOSIXLOCKSPERUID (maxusers * 64) /* Should be a safe value */
+#endif
 
 int	hz;
 int	stathz;
@@ -77,6 +80,7 @@
 int	maxprocperuid;			/* max # of procs per user */
 int	maxfiles;			/* sys. wide open files limit */
 int	maxfilesperproc;		/* per-proc open files limit */
+int	maxposixlocksperuid;		/* max # POSIX locks per uid */
 int	ncallout;			/* maximum # of timer events */
 int	mbuf_wait = 32;			/* mbuf sleep time in ticks */
 int	nbuf;
@@ -122,6 +126,8 @@
 #endif
 	TUNABLE_INT_FETCH("kern.maxbcache", &maxbcache);
 
+	maxposixlocksperuid = MAXPOSIXLOCKSPERUID;
+	TUNABLE_INT_FETCH("kern.maxposixlocksperuid", &maxposixlocksperuid);
 	maxtsiz = MAXTSIZ;
 	TUNABLE_QUAD_FETCH("kern.maxtsiz", &maxtsiz);
 	dfldsiz = DFLDSIZ;
diff -ur sys/sys/fcntl.h sys_lockfix/sys/fcntl.h
--- sys/sys/fcntl.h	Tue Apr 20 21:38:12 2004
+++ sys_lockfix/sys/fcntl.h	Tue Apr 20 21:40:14 2004
@@ -223,4 +223,8 @@
 __END_DECLS
 #endif
 
+#ifdef _KERNEL
+extern int maxposixlocksperuid;
+#endif
+
 #endif /* !_SYS_FCNTL_H_ */
Only in sys_lockfix/sys: fcntl.h.orig
diff -ur sys/sys/proc.h sys_lockfix/sys/proc.h
--- sys/sys/proc.h	Tue Apr 20 21:38:12 2004
+++ sys_lockfix/sys/proc.h	Tue Apr 20 22:51:53 2004
@@ -235,6 +235,7 @@
 	struct thread *p_thread; /* temporarily embed thread struct in proc */
 	struct upcall *p_upcall; /* USERLAND POINTER! registered upcall */
 	struct sched *p_sched;	/* work-in-progress / Peter Kadau */
+	int	p_posixlocks	/* number of POSIX locks */
 };
 
 #if defined(_KERNEL)
Only in sys_lockfix/sys: proc.h.orig
diff -ur sys/sys/resource.h sys_lockfix/sys/resource.h
--- sys/sys/resource.h	Tue Apr 20 21:38:12 2004
+++ sys_lockfix/sys/resource.h	Tue Apr 20 21:40:14 2004
@@ -90,8 +90,9 @@
 #define	RLIMIT_NOFILE	8		/* number of open files */
 #define	RLIMIT_SBSIZE	9		/* maximum size of all socket buffers */
 #define	RLIMIT_VMEM	10		/* virtual process size (inclusive of mmap) */
+#define RLIMIT_POSIXLOCK 11		/* maximum number of POSIX locks per user */
 
-#define	RLIM_NLIMITS	11		/* number of resource limits */
+#define	RLIM_NLIMITS	12		/* number of resource limits */
 
 #define	RLIM_INFINITY	((rlim_t)(((u_quad_t)1 << 63) - 1))
 
@@ -113,6 +114,7 @@
 	"nofile",
 	"sbsize",
 	"vmem",
+	"posixlock",
 };
 #endif
 
Only in sys_lockfix/sys: resource.h.orig
diff -ur sys/sys/resourcevar.h sys_lockfix/sys/resourcevar.h
--- sys/sys/resourcevar.h	Tue Apr 20 21:38:12 2004
+++ sys_lockfix/sys/resourcevar.h	Tue Apr 20 22:52:11 2004
@@ -95,6 +95,7 @@
 	long	ui_proccnt;		/* number of processes */
 	uid_t	ui_uid;			/* uid */
 	int	ui_ref;			/* reference count */
+	int	ui_numposixlocks;	/* number of POSIX locks */
 	struct varsymset ui_varsymset;	/* variant symlinks */
 };
 
@@ -107,6 +108,7 @@
 void	calcru (struct proc *p, struct timeval *up, struct timeval *sp,
 	    struct timeval *ip);
 int	chgproccnt (struct uidinfo *uip, int diff, int max);
+int	chgposixlockcnt (struct proc *p, int diff, int max);
 int	chgsbsize (struct uidinfo *uip, u_long *hiwat, u_long to, rlim_t max);
 int	fuswintr (void *base);
 struct plimit *limcopy (struct plimit *lim);
Only in sys_lockfix/sys: resourcevar.h.orig
diff -ur sys/sys/sysctl.h sys_lockfix/sys/sysctl.h
--- sys/sys/sysctl.h	Tue Apr 20 21:38:12 2004
+++ sys_lockfix/sys/sysctl.h	Tue Apr 20 21:40:14 2004
@@ -342,6 +342,7 @@
 #define	KERN_USRSTACK		33	/* int: address of USRSTACK */
 #define	KERN_LOGSIGEXIT		34	/* int: do we log sigexit procs? */
 #define KERN_MAXID		35      /* number of valid kern ids */
+#define KERN_MAXPOSIXLOCKSPERUID 36	/* int: max POSIX locks per uid */
 
 #define CTL_KERN_NAMES { \
 	{ 0, 0 }, \
@@ -373,6 +374,7 @@
 	{ "bootfile", CTLTYPE_STRING }, \
 	{ "maxfilesperproc", CTLTYPE_INT }, \
 	{ "maxprocperuid", CTLTYPE_INT }, \
+	{ "maxposixlocksperuid", CTLTYPE_INT }, \
 	{ "dumpdev", CTLTYPE_STRUCT }, /* we lie; don't print as int */ \
 	{ "ipc", CTLTYPE_NODE }, \
 	{ "dummy", CTLTYPE_INT }, \
Only in sys_lockfix/sys: sysctl.h.orig




More information about the Submit mailing list