PATCH preadv/pwritev

Chuck Tuffli ctuffli at gmail.com
Tue Apr 25 14:14:42 PDT 2006


I had a need for the iovec versions of pread and pwrite and have a patch 
for people to try. The changes are loosely based on the work done in 
FreeBSD current and have been tested against an enhanced version of 
Iozone. This testing has been on 1.4.x, but there doesn't appear to be a 
big difference between HEAD and the 1.4 slip tag, so I would expect the 
patch to work for HEAD as well.

After applying the patch you will also need to

cd src/sys/kern
make sysent
to update all the auto-magic system call gunk.

---chuck
Index: sys/kern/sys_generic.c
===================================================================
RCS file: /local/dcvs/src/sys/kern/sys_generic.c,v
retrieving revision 1.23
diff -u -r1.23 sys_generic.c
--- sys/kern/sys_generic.c	14 Nov 2005 18:50:05 -0000	1.23
+++ sys/kern/sys_generic.c	24 Apr 2006 17:27:44 -0000
@@ -80,6 +80,9 @@
 static int	pollscan (struct proc *, struct pollfd *, u_int, int *);
 static int	selscan (struct proc *, fd_mask **, fd_mask **,
 			int, int *);
+static int	dofileread(int, struct file *, struct uio *, int, int *);
+static int	dofilewrite(int, struct file *, struct uio *, int, int *);
+
 
 struct file*
 holdfp(fdp, fd, flag)
@@ -108,6 +111,8 @@
 	struct iovec aiov;
 	int error;
 
+	if (uap->nbyte > INT_MAX)
+		return (EINVAL);
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->nbyte;
 	auio.uio_iov = &aiov;
@@ -118,13 +123,13 @@
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 
-	error = kern_readv(uap->fd, &auio, 0, &uap->sysmsg_result);
+	error = kern_preadv(uap->fd, &auio, 0, &uap->sysmsg_result);
 
 	return(error);
 }
 
 /*
- * Pread system call
+ * Positioned (Pread) read system call
  */
 int
 pread(struct pread_args *uap)
@@ -134,6 +139,8 @@
 	struct iovec aiov;
 	int error;
 
+	if (uap->nbyte > INT_MAX)
+		return (EINVAL);
 	aiov.iov_base = uap->buf;
 	aiov.iov_len = uap->nbyte;
 	auio.uio_iov = &aiov;
@@ -144,11 +151,14 @@
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 
-	error = kern_readv(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
+	error = kern_preadv(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
 
 	return(error);
 }
 
+/*
+ * Scatter read system call.
+ */
 int
 readv(struct readv_args *uap)
 {
@@ -168,24 +178,49 @@
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 
-	error = kern_readv(uap->fd, &auio, 0, &uap->sysmsg_result);
+	error = kern_preadv(uap->fd, &auio, 0, &uap->sysmsg_result);
 
 	iovec_free(&iov, aiov);
 	return (error);
 }
 
+
+/*
+ * Scatter positioned read system call.
+ */
+int
+preadv(struct preadv_args *uap)
+{
+	struct thread *td = curthread;
+	struct uio auio;
+	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
+	int error;
+
+	error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt,
+	    &auio.uio_resid);
+	if (error)
+		return (error);
+	auio.uio_iov = iov;
+	auio.uio_iovcnt = uap->iovcnt;
+	auio.uio_offset = uap->offset;
+	auio.uio_rw = UIO_READ;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_td = td;
+
+	error = kern_preadv(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
+
+	iovec_free(&iov, aiov);
+	return(error);
+}
+
 int
-kern_readv(int fd, struct uio *auio, int flags, int *res)
+kern_preadv(int fd, struct uio *auio, int flags, int *res)
 {
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	struct file *fp;
 	struct filedesc *fdp = p->p_fd;
-	int len, error;
-#ifdef KTRACE
-	struct iovec *ktriov = NULL;
-	struct uio ktruio;
-#endif
+	int error;
 
 	KKASSERT(p);
 
@@ -194,12 +229,37 @@
 		return (EBADF);
 	if (flags & FOF_OFFSET && fp->f_type != DTYPE_VNODE) {
 		error = ESPIPE;
-		goto done;
-	}
-	if (auio->uio_resid < 0) {
+	} else if (auio->uio_resid < 0) {
 		error = EINVAL;
-		goto done;
+	} else {
+		error = dofileread(fd, fp, auio, flags, res);
+	}
+	fdrop(fp, td);
+	return(error);
+}
+
+/*
+ * Common code for readv and preadv that reads data in
+ * from a file using the passed in uio, offset, and flags.
+ */
+static int
+dofileread(int fd, struct file *fp, struct uio *auio, int flags, int *res)
+{
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+	ssize_t len;
+	int error;
+#ifdef KTRACE
+	struct iovec *ktriov = NULL;
+	struct uio ktruio;
+#endif
+
+	/* Finish zero length reads right here */
+	if (auio->uio_resid == 0) {
+		*res = 0;
+		return(0);
 	}
+
 #ifdef KTRACE
 	/*
 	 * if tracing, save a copy of iovec
@@ -231,9 +291,8 @@
 #endif
 	if (error == 0)
 		*res = len - auio->uio_resid;
-done:
-	fdrop(fp, td);
-	return (error);
+
+	return(error);
 }
 
 /*
@@ -257,7 +316,7 @@
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 
-	error = kern_writev(uap->fd, &auio, 0, &uap->sysmsg_result);
+	error = kern_pwritev(uap->fd, &auio, 0, &uap->sysmsg_result);
 
 	return(error);
 }
@@ -283,7 +342,7 @@
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 
-	error = kern_writev(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
+	error = kern_pwritev(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
 
 	return(error);
 }
@@ -307,41 +366,85 @@
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 
-	error = kern_writev(uap->fd, &auio, 0, &uap->sysmsg_result);
+	error = kern_pwritev(uap->fd, &auio, 0, &uap->sysmsg_result);
 
 	iovec_free(&iov, aiov);
 	return (error);
 }
 
+
 /*
- * Gather write system call
+ * Gather positioned write system call
  */
 int
-kern_writev(int fd, struct uio *auio, int flags, int *res)
+pwritev(struct pwritev_args *uap)
+{
+	struct thread *td = curthread;
+	struct uio auio;
+	struct iovec aiov[UIO_SMALLIOV], *iov = NULL;
+	int error;
+
+	error = iovec_copyin(uap->iovp, &iov, aiov, uap->iovcnt,
+	    &auio.uio_resid);
+	if (error)
+		return (error);
+	auio.uio_iov = iov;
+	auio.uio_iovcnt = uap->iovcnt;
+	auio.uio_offset = uap->offset;
+	auio.uio_rw = UIO_WRITE;
+	auio.uio_segflg = UIO_USERSPACE;
+	auio.uio_td = td;
+
+	error = kern_pwritev(uap->fd, &auio, FOF_OFFSET, &uap->sysmsg_result);
+
+	iovec_free(&iov, aiov);
+	return(error);
+}
+
+int
+kern_pwritev(int fd, struct uio *auio, int flags, int *res)
 {
 	struct thread *td = curthread;
 	struct proc *p = td->td_proc;
 	struct file *fp;
 	struct filedesc *fdp = p->p_fd;
-	long len, error;
-#ifdef KTRACE
-	struct iovec *ktriov = NULL;
-	struct uio ktruio;
-#endif
+	int error;
 
 	KKASSERT(p);
 
 	fp = holdfp(fdp, fd, FWRITE);
 	if (fp == NULL)
 		return (EBADF);
-	if ((flags & FOF_OFFSET) && fp->f_type != DTYPE_VNODE) {
+	else if ((flags & FOF_OFFSET) && fp->f_type != DTYPE_VNODE) {
 		error = ESPIPE;
-		goto done;
+	} else {
+		error = dofilewrite(fd, fp, auio, flags, res);
 	}
+	
+	fdrop(fp, td);
+	return (error);
+}
+
+/*
+ * Common code for writev and pwritev that writes data to
+ * a file using the passed in uio, offset, and flags.
+ */
+static int
+dofilewrite(int fd, struct file *fp, struct uio *auio, int flags, int *res)
+{	
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+	ssize_t len;
+	int error;
+#ifdef KTRACE
+	struct iovec *ktriov = NULL;
+	struct uio ktruio;
+#endif
+
 	if (auio->uio_resid < 0) {
-		error = EINVAL;
-		goto done;
+		return(EINVAL);
 	}
+
 #ifdef KTRACE
 	/*
 	 * if tracing, save a copy of iovec and uio
@@ -362,6 +465,7 @@
 		if (auio->uio_resid != len && (error == ERESTART ||
 		    error == EINTR || error == EWOULDBLOCK))
 			error = 0;
+		/* Socket layer is responsible for issuing SIGPIPE. */
 		if (error == EPIPE)
 			psignal(p, SIGPIPE);
 	}
@@ -377,9 +481,8 @@
 #endif
 	if (error == 0)
 		*res = len - auio->uio_resid;
-done:
-	fdrop(fp, td);
-	return (error);
+
+	return(error);
 }
 
 /*
Index: sys/kern/syscalls.master
===================================================================
RCS file: /local/dcvs/src/sys/kern/syscalls.master,v
retrieving revision 1.28
diff -u -r1.28 syscalls.master
--- sys/kern/syscalls.master	16 Nov 2005 02:24:30 -0000	1.28
+++ sys/kern/syscalls.master	22 Apr 2006 00:17:09 -0000
@@ -428,8 +428,9 @@
 286	UNIMPL	NOHIDE	nosys
 287	UNIMPL	NOHIDE	nosys
 288	UNIMPL	NOHIDE	nosys
-289	UNIMPL	NOHIDE	nosys
-290	UNIMPL	NOHIDE	nosys
+; 289 and 290 from NetBSD (OpenBSD: 267 and 268)
+289	STD	BSD	{ ssize_t preadv(int fd, struct iovec *iovp, u_int iovcnt, off_t offset); }
+290	STD	BSD	{ ssize_t pwritev(int fd, struct iovec *iovp, u_int iovcnt, off_t offset); }
 291	UNIMPL	NOHIDE	nosys
 292	UNIMPL	NOHIDE	nosys
 293	UNIMPL	NOHIDE	nosys
Index: sys/sys/kern_syscall.h
===================================================================
RCS file: /local/dcvs/src/sys/sys/kern_syscall.h,v
retrieving revision 1.28
diff -u -r1.28 kern_syscall.h
--- sys/sys/kern_syscall.h      9 Aug 2005 20:14:16 -0000       1.28
+++ sys/sys/kern_syscall.h      24 Apr 2006 19:46:01 -0000
@@ -91,8 +91,8 @@
 /*
  * Prototypes for syscalls in kern/sys_generic.c
  */
-int kern_readv(int fd, struct uio *auio, int flags, int *res);
-int kern_writev(int fd, struct uio *auio, int flags, int *res);
+int kern_preadv(int fd, struct uio *auio, int flags, int *res);
+int kern_pwritev(int fd, struct uio *auio, int flags, int *res);
 
 /*
  * Prototypes for syscalls in kern/kern_resource.c
Index: sys/sys/uio.h
===================================================================
RCS file: /local/dcvs/src/sys/sys/uio.h,v
retrieving revision 1.9
diff -u -r1.9 uio.h
--- sys/sys/uio.h	27 Jul 2004 13:11:22 -0000	1.9
+++ sys/sys/uio.h	17 Apr 2006 18:27:35 -0000
@@ -110,8 +110,10 @@
 #include <sys/cdefs.h>
 
 __BEGIN_DECLS
-ssize_t	readv (int, const struct iovec *, int);
-ssize_t	writev (int, const struct iovec *, int);
+ssize_t	readv(int, const struct iovec *, int);
+ssize_t	writev(int, const struct iovec *, int);
+ssize_t	preadv(int, const struct iovec *, int, off_t);
+ssize_t	pwritev(int, const struct iovec *, int, off_t);
 __END_DECLS
 
 #endif /* _KERNEL */




More information about the Kernel mailing list