[patch] POSIX advisory mode lock panic fix by Dfly

Joerg Sonnenberger joerg at britannica.bec.de
Wed Apr 21 21:08:10 PDT 2004


OK, here is the updated version. I moved the actual handling into lockf.h,
only the rlimit check is left in resource.c and the call to lf_count_adjust.

Matt, what do you think?

Joerg
Index: kern/kern_lockf.c
===================================================================
RCS file: /home/joerg/wd/repo/dragonflybsd/src/sys/kern/kern_lockf.c,v
retrieving revision 1.6
diff -u -r1.6 kern_lockf.c
--- kern/kern_lockf.c	26 Aug 2003 21:09:02 -0000	1.6
+++ kern/kern_lockf.c	21 Apr 2004 16:05:14 -0000
@@ -45,6 +45,7 @@
 #include <sys/kernel.h>
 #include <sys/lock.h>
 #include <sys/proc.h>
+#include <sys/resourcevar.h>
 #include <sys/unistd.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
@@ -65,7 +66,6 @@
 #include <vfs/ufs/quota.h>
 #include <vfs/ufs/inode.h>
 
-
 static int	lockf_debug = 0;
 SYSCTL_INT(_debug, OID_AUTO, lockf_debug, CTLFLAG_RW, &lockf_debug, 0, "");
 #endif
@@ -82,9 +82,77 @@
 	 lf_getblock (struct lockf *);
 static int	 lf_getlock (struct lockf *, struct flock *);
 static int	 lf_setlock (struct lockf *);
-static void	 lf_split (struct lockf *, struct lockf *);
+static int	 lf_split (struct lockf *, struct lockf *);
 static void	 lf_wakelock (struct lockf *);
 
+static int	 lf_count_increment(struct lockf *);
+static void	 lf_count_decrement(struct lockf *);
+
+/*
+ * Change the POSIX lock accounting for the given process.
+ */
+void
+lf_count_adjust(struct proc *p, struct uidinfo *newowner)
+{
+	struct uidinfo *uip;
+
+	uip = p->p_ucred->cr_uidinfo;
+
+	newowner += p->p_numposixlocks;
+	uip->ui_posixlocks -= p->p_numposixlocks;
+
+	KASSERT(uip->ui_posixlocks < 0 || newowner->ui_posixlocks < 0,
+		("Negative number of POSIX locks held by user."));
+}
+
+static int
+lf_count_increment(struct lockf *lock)
+{
+	struct uidinfo *uip;
+	struct proc *pp;
+	int max;
+
+	/* no accounting for non-POSIX locks */
+	if ((lock->lf_flags & F_POSIX) == 0)
+		return(0);
+	
+	pp = (struct proc *)lock->lf_id;
+	uip = pp->p_ucred->cr_uidinfo;
+
+	max = pp->p_rlimit[RLIMIT_POSIXLOCK].rlim_cur;
+	if (uip->ui_posixlocks >= max) 
+		return(1);
+
+	uip->ui_posixlocks++;
+	pp->p_numposixlocks++;
+	KASSERT(uip->ui_posixlocks < 0,
+		("Negative number of POSIX locks held by user."));
+	KASSERT(pp->p_numposixlocks < 0,
+		("Negative number of POSIX locks held by process."));
+	return(0);
+}
+
+static void
+lf_count_decrement(struct lockf *lock)
+{
+	struct uidinfo *uip;
+	struct proc *pp;
+
+	/* no accounting for non-POSIX locks */
+	if ((lock->lf_flags & F_POSIX) == 0)
+		return;
+	
+	pp = (struct proc *)lock->lf_id;
+	uip = pp->p_ucred->cr_uidinfo;
+
+	uip->ui_posixlocks--;
+	pp->p_numposixlocks--;
+	KASSERT(uip->ui_posixlocks < 0,
+		("Negative number of POSIX locks held by user."));
+	KASSERT(pp->p_numposixlocks < 0,
+		("Negative number of POSIX locks held by process."));
+}
+
 /*
  * Advisory record locking support
  */
@@ -307,9 +375,16 @@
 		 *	3) lock contains overlap
 		 *	4) overlap starts before lock
 		 *	5) overlap ends after lock
+		 *
+		 * The POSIX lock counter has to be checked for
+		 * 0, 1, 2 (twice), 4 and 5.
+		 *
+		 * Case 2 is special, because splitting can happen.
 		 */
 		switch (ovcase) {
 		case 0: /* no overlap */
+			if (lf_count_increment(lock))
+				return (ENOLCK);
 			if (needtolink) {
 				*prev = lock;
 				lock->lf_next = overlap;
@@ -338,12 +413,19 @@
 				lock = overlap; /* for debug output below */
 				break;
 			}
+			if (lf_count_increment(lock))
+				return (ENOLCK);
 			if (overlap->lf_start == lock->lf_start) {
 				*prev = lock;
 				lock->lf_next = overlap;
 				overlap->lf_start = lock->lf_end + 1;
-			} else
-				lf_split(overlap, lock);
+			} else {
+				error = lf_split(overlap, lock);
+				if (error) {
+					lf_count_decrement(lock);
+					return(error);
+				}
+			}
 			lf_wakelock(overlap);
 			break;
 
@@ -382,6 +464,8 @@
 			/*
 			 * Add lock after overlap on the list.
 			 */
+			if (lf_count_increment(lock))
+				return (ENOLCK);
 			lock->lf_next = overlap->lf_next;
 			overlap->lf_next = lock;
 			overlap->lf_end = lock->lf_start - 1;
@@ -394,6 +478,8 @@
 			/*
 			 * Add the new lock before overlap.
 			 */
+			if (lf_count_increment(lock))
+				return (ENOLCK);
 			if (needtolink) {
 				*prev = lock;
 				lock->lf_next = overlap;
@@ -426,7 +512,9 @@
 	struct lockf **head = unlock->lf_head;
 	struct lockf *lf = *head;
 	struct lockf *overlap, **prev;
-	int ovcase;
+	int ovcase, error, decrement;
+
+	decrement = 1;
 
 	if (lf == NOLOCKF)
 		return (0);
@@ -455,8 +543,19 @@
 				overlap->lf_start = unlock->lf_end + 1;
 				break;
 			}
-			lf_split(overlap, unlock);
+			/*
+			 * This can fail when splitting a lock and the limit
+			 * for POSIX locks has been reached
+			 */
+			error = lf_split(overlap, unlock);
+			if (error)
+				return(error);
 			overlap->lf_next = unlock->lf_next;
+			/*
+			 * The locks are changed, but still exists.
+			 * Don't change the POSIX lock counter.
+			 */
+			decrement = 0;
 			break;
 
 		case 3: /* lock contains overlap */
@@ -477,6 +576,15 @@
 		}
 		break;
 	}
+
+	/*
+	 * The lock has been successfully released,
+	 * decrement the POSIX lock counter if necessary.
+	 */
+
+	if (decrement)
+		lf_count_decrement(unlock);
+
 #ifdef LOCKF_DEBUG
 	if (lockf_debug & 1)
 		lf_printlist("lf_clearlock", unlock);
@@ -662,7 +770,7 @@
  * Split a lock and a contained region into
  * two or three locks as necessary.
  */
-static void
+static int
 lf_split(lock1, lock2)
 	struct lockf *lock1;
 	struct lockf *lock2;
@@ -677,18 +785,21 @@
 #endif /* LOCKF_DEBUG */
 	/*
 	 * Check to see if spliting into only two pieces.
+	 * In that case, no additional POSIX locks are needed.
 	 */
 	if (lock1->lf_start == lock2->lf_start) {
 		lock1->lf_start = lock2->lf_end + 1;
 		lock2->lf_next = lock1;
-		return;
+		return(0);
 	}
 	if (lock1->lf_end == lock2->lf_end) {
 		lock1->lf_end = lock2->lf_start - 1;
 		lock2->lf_next = lock1->lf_next;
 		lock1->lf_next = lock2;
-		return;
+		return(0);
 	}
+	if (lf_count_increment(lock1))
+		return(ENOLCK);
 	/*
 	 * Make a new lock consisting of the last part of
 	 * the encompassing lock
@@ -704,6 +815,7 @@
 	splitlock->lf_next = lock1->lf_next;
 	lock2->lf_next = splitlock;
 	lock1->lf_next = lock2;
+	return(0);
 }
 
 /*
Index: kern/kern_mib.c
===================================================================
RCS file: /home/joerg/wd/repo/dragonflybsd/src/sys/kern/kern_mib.c,v
retrieving revision 1.7
diff -u -r1.7 kern_mib.c
--- kern/kern_mib.c	24 Oct 2003 17:19:12 -0000	1.7
+++ kern/kern_mib.c	21 Apr 2004 16:00:42 -0000
@@ -46,6 +46,7 @@
 #include <sys/systm.h>
 #include <sys/sysctl.h>
 #include <sys/proc.h>
+#include <sys/lockf.h>
 #include <sys/jail.h>
 #include <machine/smp.h>
 
@@ -102,6 +103,9 @@
 SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW, 
     &maxprocperuid, 0, "Maximum processes allowed per userid");
 
+SYSCTL_INT(_kern, KERN_MAXPOSIXLOCKSPERUID, maxposixlocksperuid, CTLFLAG_RW,
+    &maxposixlocksperuid, 0, "Maximum number of POSIX-type locks per user id");
+
 SYSCTL_INT(_kern, OID_AUTO, maxusers, CTLFLAG_RD, 
     &maxusers, 0, "Hint for kernel tuning");
 
Index: kern/kern_prot.c
===================================================================
RCS file: /home/joerg/wd/repo/dragonflybsd/src/sys/kern/kern_prot.c,v
retrieving revision 1.15
diff -u -r1.15 kern_prot.c
--- kern/kern_prot.c	5 Mar 2004 16:57:15 -0000	1.15
+++ kern/kern_prot.c	21 Apr 2004 15:52:30 -0000
@@ -57,6 +57,7 @@
 #include <sys/resourcevar.h>
 #include <sys/thread2.h>
 #include <sys/jail.h>
+#include <sys/lockf.h>
 
 static MALLOC_DEFINE(M_CRED, "cred", "credentials");
 
@@ -1108,6 +1109,7 @@
 
 	cr = cratom(&p->p_ucred);
 	(void)chgproccnt(cr->cr_ruidinfo, -1, 0);
+	lf_count_adjust(p, uifind(ruid));
 	/* It is assumed that pcred is not shared between processes */
 	cr->cr_ruid = ruid;
 	uireplace(&cr->cr_ruidinfo, uifind(ruid));
Index: kern/kern_resource.c
===================================================================
RCS file: /home/joerg/wd/repo/dragonflybsd/src/sys/kern/kern_resource.c,v
retrieving revision 1.19
diff -u -r1.19 kern_resource.c
--- kern/kern_resource.c	10 Apr 2004 20:55:23 -0000	1.19
+++ kern/kern_resource.c	21 Apr 2004 16:06:28 -0000
@@ -52,6 +52,7 @@
 #include <sys/malloc.h>
 #include <sys/proc.h>
 #include <sys/time.h>
+#include <sys/lockf.h>
 
 #include <vm/vm.h>
 #include <vm/vm_param.h>
@@ -387,6 +388,12 @@
 		if (limp->rlim_max < 1)
 			limp->rlim_max = 1;
 		break;
+	case RLIMIT_POSIXLOCK:
+		if (limp->rlim_cur > maxposixlocksperuid)
+			limp->rlim_cur = maxposixlocksperuid;
+		if (limp->rlim_max > maxposixlocksperuid)
+			limp->rlim_max = maxposixlocksperuid;
+		break;
 	}
 	*alimp = *limp;
 	return (0);
Index: kern/subr_param.c
===================================================================
RCS file: /home/joerg/wd/repo/dragonflybsd/src/sys/kern/subr_param.c,v
retrieving revision 1.3
diff -u -r1.3 subr_param.c
--- kern/subr_param.c	30 Jan 2004 05:42:17 -0000	1.3
+++ kern/subr_param.c	20 Apr 2004 14:19:33 -0000
@@ -66,6 +66,9 @@
 #ifndef NSFBUFS
 #define NSFBUFS (512 + maxusers * 16)
 #endif
+#ifndef MAXPOSIXLOCKSPERUID
+#define MAXPOSIXLOCKSPERUID (maxusers * 64) /* Should be a safe value */
+#endif
 
 int	hz;
 int	stathz;
@@ -77,6 +80,7 @@
 int	maxprocperuid;			/* max # of procs per user */
 int	maxfiles;			/* sys. wide open files limit */
 int	maxfilesperproc;		/* per-proc open files limit */
+int	maxposixlocksperuid;		/* max # POSIX locks per uid */
 int	ncallout;			/* maximum # of timer events */
 int	mbuf_wait = 32;			/* mbuf sleep time in ticks */
 int	nbuf;
@@ -122,6 +126,8 @@
 #endif
 	TUNABLE_INT_FETCH("kern.maxbcache", &maxbcache);
 
+	maxposixlocksperuid = MAXPOSIXLOCKSPERUID;
+	TUNABLE_INT_FETCH("kern.maxposixlocksperuid", &maxposixlocksperuid);
 	maxtsiz = MAXTSIZ;
 	TUNABLE_QUAD_FETCH("kern.maxtsiz", &maxtsiz);
 	dfldsiz = DFLDSIZ;
Index: sys/lockf.h
===================================================================
RCS file: /home/joerg/wd/repo/dragonflybsd/src/sys/sys/lockf.h,v
retrieving revision 1.3
diff -u -r1.3 lockf.h
--- sys/lockf.h	20 Aug 2003 07:31:21 -0000	1.3
+++ sys/lockf.h	21 Apr 2004 16:00:25 -0000
@@ -71,6 +71,12 @@
 
 int	 lf_advlock (struct vop_advlock_args *, struct lockf **, u_quad_t);
 
+
+#ifdef _KERNEL
+extern int maxposixlocksperuid;
+#endif
+void	lf_count_adjust(struct proc *, struct uidinfo *);
+
 #ifdef LOCKF_DEBUG
 void	lf_print (char *, struct lockf *);
 void	lf_printlist (char *, struct lockf *);
Index: sys/proc.h
===================================================================
RCS file: /home/joerg/wd/repo/dragonflybsd/src/sys/sys/proc.h,v
retrieving revision 1.48
diff -u -r1.48 proc.h
--- sys/proc.h	10 Apr 2004 20:55:24 -0000	1.48
+++ sys/proc.h	20 Apr 2004 16:01:40 -0000
@@ -235,6 +235,7 @@
 	struct thread *p_thread; /* temporarily embed thread struct in proc */
 	struct upcall *p_upcall; /* USERLAND POINTER! registered upcall */
 	struct sched *p_sched;	/* work-in-progress / Peter Kadau */
+	int	p_numposixlocks; /* number of POSIX locks */
 };
 
 #if defined(_KERNEL)
Index: sys/resource.h
===================================================================
RCS file: /home/joerg/wd/repo/dragonflybsd/src/sys/sys/resource.h,v
retrieving revision 1.5
diff -u -r1.5 resource.h
--- sys/resource.h	20 Aug 2003 22:59:00 -0000	1.5
+++ sys/resource.h	21 Apr 2004 16:01:52 -0000
@@ -90,8 +90,9 @@
 #define	RLIMIT_NOFILE	8		/* number of open files */
 #define	RLIMIT_SBSIZE	9		/* maximum size of all socket buffers */
 #define	RLIMIT_VMEM	10		/* virtual process size (inclusive of mmap) */
+#define	RLIMIT_POSIXLOCK 11		/* maximum number of POSIX locks per user */
 
-#define	RLIM_NLIMITS	11		/* number of resource limits */
+#define	RLIM_NLIMITS	12		/* number of resource limits */
 
 #define	RLIM_INFINITY	((rlim_t)(((u_quad_t)1 << 63) - 1))
 
@@ -113,6 +114,7 @@
 	"nofile",
 	"sbsize",
 	"vmem",
+	"posixlock",
 };
 #endif
 
Index: sys/resourcevar.h
===================================================================
RCS file: /home/joerg/wd/repo/dragonflybsd/src/sys/sys/resourcevar.h,v
retrieving revision 1.6
diff -u -r1.6 resourcevar.h
--- sys/resourcevar.h	5 Nov 2003 23:26:21 -0000	1.6
+++ sys/resourcevar.h	21 Apr 2004 15:46:53 -0000
@@ -95,6 +95,7 @@
 	long	ui_proccnt;		/* number of processes */
 	uid_t	ui_uid;			/* uid */
 	int	ui_ref;			/* reference count */
+	int	ui_posixlocks;		/* number of POSIX locks */
 	struct varsymset ui_varsymset;	/* variant symlinks */
 };
 
Index: sys/sysctl.h
===================================================================
RCS file: /home/joerg/wd/repo/dragonflybsd/src/sys/sys/sysctl.h,v
retrieving revision 1.9
diff -u -r1.9 sysctl.h
--- sys/sysctl.h	10 Nov 2003 06:12:17 -0000	1.9
+++ sys/sysctl.h	20 Apr 2004 16:01:14 -0000
@@ -342,6 +342,7 @@
 #define	KERN_USRSTACK		33	/* int: address of USRSTACK */
 #define	KERN_LOGSIGEXIT		34	/* int: do we log sigexit procs? */
 #define KERN_MAXID		35      /* number of valid kern ids */
+#define KERN_MAXPOSIXLOCKSPERUID 36	/* int: max POSIX locks per uid */
 
 #define CTL_KERN_NAMES { \
 	{ 0, 0 }, \
@@ -373,6 +374,7 @@
 	{ "bootfile", CTLTYPE_STRING }, \
 	{ "maxfilesperproc", CTLTYPE_INT }, \
 	{ "maxprocperuid", CTLTYPE_INT }, \
+	{ "maxposixlocksperuid", CTLTYPE_INT }, \
 	{ "dumpdev", CTLTYPE_STRUCT }, /* we lie; don't print as int */ \
 	{ "ipc", CTLTYPE_NODE }, \
 	{ "dummy", CTLTYPE_INT }, \




More information about the Submit mailing list