summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorroot <root>2011-06-05 22:44:30 +0000
committerroot <root>2011-06-05 22:44:30 +0000
commit71e6cec06527558680f5d779a00d469205fd4567 (patch)
tree7ce1aa963d31615de0bbb83f5873079ea369e34f
parent655aee59fd0808a47cf5a0bb544c16abd8924791 (diff)
*** empty log message ***
-rw-r--r--eio.pod435
1 files changed, 432 insertions, 3 deletions
diff --git a/eio.pod b/eio.pod
index 3a54c20..fed01d3 100644
--- a/eio.pod
+++ b/eio.pod
@@ -57,6 +57,11 @@ Calling C<fork ()> is fully supported by this module. It is implemented in these
memory used by the worker threads. This gives you a fully empty
libeio queue.
+Note, however, since libeio does use threads, thr above guarantee doesn't
+cover your libc, for example, malloc and other libc functions are not
+fork-safe, so there is very little you can do after a fork, and in fatc,
+the above might crash, and thus change.
+
=head1 INITIALISATION/INTEGRATION
Before you can call any eio functions you first have to initialise the
@@ -217,15 +222,418 @@ C<eio_poll>).
=back
-=head1 ANATOMY OF AN EIO REQUEST
+=head1 HIGH LEVEL REQUEST API
-#TODO
+Libeio has both a high-level API, which consists of calling a request
+function with a callback to be called on completion, and a low-level API
+where you fill out request structures and submit them.
+This section describes the high-level API.
-=head1 HIGH LEVEL REQUEST API
+=head2 REQUEST SUBMISSION AND RESULT PROCESSING
+
+You submit a request by calling the relevant C<eio_TYPE> function with the
+required parameters, a callback of type C<int (*eio_cb)(eio_req *req)>
+(called C<eio_cb> below) and a freely usable C<void *data> argument.
+
+The return value will either be 0
+
+The callback will be called with an C<eio_req *> which contains the
+results of the request. The members you can access inside that structure
+vary from request to request, except for:
+
+=over 4
+
+=item C<ssize_t result>
+
+This contains the result value from the call (usually the same as the
+syscall of the same name).
+
+=item C<int errorno>
+
+This contains the value of C<errno> after the call.
+
+=item C<void *data>
+
+The C<void *data> member simply stores the value of the C<data> argument.
+
+=back
+
+The return value of the callback is normally C<0>, which tells libeio to
+continue normally. If a callback returns a nonzero value, libeio will
+stop processing results (in C<eio_poll>) and will return the value to its
+caller.
+
+Memory areas passed to libeio must stay valid as long as a request
+executes, with the exception of paths, which are being copied
+internally. Any memory libeio itself allocates will be freed after the
+finish callback has been called. If you want to manage all memory passed
+to libeio yourself you can use the low-level API.
+
+For example, to open a file, you could do this:
+
+ static int
+ file_open_done (eio_req *req)
+ {
+ if (req->result < 0)
+ {
+ /* open() returned -1 */
+ errno = req->errorno;
+ perror ("open");
+ }
+ else
+ {
+ int fd = req->result;
+ /* now we have the new fd in fd */
+ }
+
+ return 0;
+ }
+
+ /* the first three arguments are passed to open(2) */
+ /* the remaining are priority, callback and data */
+ if (!eio_open ("/etc/passwd", O_RDONLY, 0, 0, file_open_done, 0))
+ abort (); /* something ent wrong, we will all die!!! */
+
+Note that you additionally need to call C<eio_poll> when the C<want_cb>
+indicates that requests are ready to be processed.
+
+=head2 AVAILABLE REQUESTS
+
+The following request functions are available. I<All> of them return the
+C<eio_req *> on success and C<0> on failure, and I<all> of them have the
+same three trailing arguments: C<pri>, C<cb> and C<data>. The C<cb> is
+mandatory, but in most cases, you pass in C<0> as C<pri> and C<0> or some
+custom data value as C<data>.
+
+=head3 POSIX API WRAPPERS
+
+These requests simply wrap the POSIX call of the same name, with the same
+arguments:
+
+=over 4
+
+=item eio_open (const char *path, int flags, mode_t mode, int pri, eio_cb cb, void *data)
+
+=item eio_utime (const char *path, eio_tstamp atime, eio_tstamp mtime, int pri, eio_cb cb, void *data)
+
+=item eio_truncate (const char *path, off_t offset, int pri, eio_cb cb, void *data)
+
+=item eio_chown (const char *path, uid_t uid, gid_t gid, int pri, eio_cb cb, void *data)
+
+=item eio_chmod (const char *path, mode_t mode, int pri, eio_cb cb, void *data)
+
+=item eio_mkdir (const char *path, mode_t mode, int pri, eio_cb cb, void *data)
+
+=item eio_rmdir (const char *path, int pri, eio_cb cb, void *data)
+
+=item eio_unlink (const char *path, int pri, eio_cb cb, void *data)
+
+=item eio_readlink (const char *path, int pri, eio_cb cb, void *data) /* result=ptr2 allocated dynamically */
+
+=item eio_stat (const char *path, int pri, eio_cb cb, void *data) /* stat buffer=ptr2 allocated dynamically */
+
+=item eio_lstat (const char *path, int pri, eio_cb cb, void *data) /* stat buffer=ptr2 allocated dynamically */
+
+=item eio_statvfs (const char *path, int pri, eio_cb cb, void *data) /* stat buffer=ptr2 allocated dynamically */
+
+=item eio_mknod (const char *path, mode_t mode, dev_t dev, int pri, eio_cb cb, void *data)
+
+=item eio_link (const char *path, const char *new_path, int pri, eio_cb cb, void *data)
+
+=item eio_symlink (const char *path, const char *new_path, int pri, eio_cb cb, void *data)
+
+=item eio_rename (const char *path, const char *new_path, int pri, eio_cb cb, void *data)
+
+=item eio_msync (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data)
+
+=item eio_mlock (void *addr, size_t length, int pri, eio_cb cb, void *data)
+
+=item eio_mlockall (int flags, int pri, eio_cb cb, void *data)
+
+=item eio_close (int fd, int pri, eio_cb cb, void *data)
+
+=item eio_sync (int pri, eio_cb cb, void *data)
+
+=item eio_fsync (int fd, int pri, eio_cb cb, void *data)
+
+=item eio_fdatasync (int fd, int pri, eio_cb cb, void *data)
+
+=item eio_futime (int fd, eio_tstamp atime, eio_tstamp mtime, int pri, eio_cb cb, void *data)
+
+=item eio_ftruncate (int fd, off_t offset, int pri, eio_cb cb, void *data)
+
+=item eio_fchmod (int fd, mode_t mode, int pri, eio_cb cb, void *data)
+
+=item eio_fchown (int fd, uid_t uid, gid_t gid, int pri, eio_cb cb, void *data)
+
+=item eio_dup2 (int fd, int fd2, int pri, eio_cb cb, void *data)
+
+These have the same semantics as the syscall of the same name, their
+return value is available as C<< req->result >> later.
+
+=item eio_read (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data)
+
+=item eio_write (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data)
+
+These two requests are called C<read> and C<write>, but actually wrap
+C<pread> and C<pwrite>. On systems that lack these calls (such as cygwin),
+libeio uses lseek/read_or_write/lseek and a mutex to serialise the
+requests, so all these requests run serially and do not disturb each
+other. However, they still disturb the file offset while they run, so it's
+not safe to call these functions concurrently with non-libeio functions on
+the same fd on these systems.
+
+Not surprisingly, pread and pwrite are not thread-safe on Darwin (OS/X),
+so it is advised not to submit multiple requests on the same fd on this
+horrible pile of garbage.
+
+=item eio_fstat (int fd, int pri, eio_cb cb, void *data)
+
+Stats a file - if C<< req->result >> indicates success, then you can
+access the C<struct stat>-like structure via C<< req->ptr2 >>:
+
+ EIO_STRUCT_STAT *statdata = (EIO_STRUCT_STAT *)req->ptr2;
+
+=item eio_fstatvfs (int fd, int pri, eio_cb cb, void *data) /* stat buffer=ptr2 allocated dynamically */
+
+Stats a filesystem - if C<< req->result >> indicates success, then you can
+access the C<struct statvfs>-like structure via C<< req->ptr2 >>:
+
+ EIO_STRUCT_STATVFS *statdata = (EIO_STRUCT_STATVFS *)req->ptr2;
+
+=back
+
+=head3 READING DIRECTORIES
+
+Reading directories sounds simple, but can be rather demanding, especially
+if you want to do stuff such as traversing a diretcory hierarchy or
+processing all files in a directory. Libeio can assist thess complex tasks
+with it's C<eio_readdir> call.
+
+=over 4
+
+=item eio_readdir (const char *path, int flags, int pri, eio_cb cb, void *data)
+
+This is a very complex call. It basically reads through a whole directory
+(via the C<opendir>, C<readdir> and C<closedir> calls) and returns either
+the names or an array of C<struct eio_dirent>, depending on the C<flags>
+argument.
+
+The C<< req->result >> indicates either the number of files found, or
+C<-1> on error. On success, zero-terminated names can be found as C<< req->ptr2 >>,
+and C<struct eio_dirents>, if requested by C<flags>, can be found via C<<
+req->ptr1 >>.
+
+Here is an example that prints all the names:
+
+ int i;
+ char *names = (char *)req->ptr2;
+
+ for (i = 0; i < req->result; ++i)
+ {
+ printf ("name #%d: %s\n", i, names);
+
+ /* move to next name */
+ names += strlen (names) + 1;
+ }
+
+Pseudo-entries such as F<.> and F<..> are never returned by C<eio_readdir>.
+
+C<flags> can be any combination of:
+
+=over 4
+
+=item EIO_READDIR_DENTS
+
+If this flag is specified, then, in addition to the names in C<ptr2>,
+also an array of C<struct eio_dirent> is returned, in C<ptr1>. A C<struct
+eio_dirent> looks like this:
+
+ struct eio_dirent
+ {
+ int nameofs; /* offset of null-terminated name string in (char *)req->ptr2 */
+ unsigned short namelen; /* size of filename without trailing 0 */
+ unsigned char type; /* one of EIO_DT_* */
+ signed char score; /* internal use */
+ ino_t inode; /* the inode number, if available, otherwise unspecified */
+ };
+
+The only members you normally would access are C<nameofs>, which is the
+byte-offset from C<ptr2> to the start of the name, C<namelen> and C<type>.
+
+C<type> can be one of:
+
+C<EIO_DT_UNKNOWN> - if the type is not known (very common) and you have to C<stat>
+the name yourself if you need to know,
+one of the "standard" POSIX file types (C<EIO_DT_REG>, C<EIO_DT_DIR>, C<EIO_DT_LNK>,
+C<EIO_DT_FIFO>, C<EIO_DT_SOCK>, C<EIO_DT_CHR>, C<EIO_DT_BLK>)
+or some OS-specific type (currently
+C<EIO_DT_MPC> - multiplexed char device (v7+coherent),
+C<EIO_DT_NAM> - xenix special named file,
+C<EIO_DT_MPB> - multiplexed block device (v7+coherent),
+C<EIO_DT_NWK> - HP-UX network special,
+C<EIO_DT_CMP> - VxFS compressed,
+C<EIO_DT_DOOR> - solaris door, or
+C<EIO_DT_WHT>).
+
+This example prints all names and their type:
+
+ int i;
+ struct eio_dirent *ents = (struct eio_dirent *)req->ptr1;
+ char *names = (char *)req->ptr2;
+
+ for (i = 0; i < req->result; ++i)
+ {
+ struct eio_dirent *ent = ents + i;
+ char *name = names + ent->nameofs;
+
+ printf ("name #%d: %s (type %d)\n", i, name, ent->type);
+ }
+
+=item EIO_READDIR_DIRS_FIRST
+
+When this flag is specified, then the names will be returned in an order
+where likely directories come first, in optimal C<stat> order. This is
+useful when you need to quickly find directories, or you want to find all
+directories while avoiding to stat() each entry.
+
+If the system returns type information in readdir, then this is used
+to find directories directly. Otherwise, likely directories are names
+beginning with ".", or otherwise names with no dots, of which names with
+short names are tried first.
+
+=item EIO_READDIR_STAT_ORDER
+
+When this flag is specified, then the names will be returned in an order
+suitable for stat()'ing each one. That is, when you plan to stat()
+all files in the given directory, then the returned order will likely
+be fastest.
+
+If both this flag and C<EIO_READDIR_DIRS_FIRST> are specified, then
+the likely dirs come first, resulting in a less optimal stat order.
+
+=item EIO_READDIR_FOUND_UNKNOWN
+
+This flag should not be specified when calling C<eio_readdir>. Instead,
+it is being set by C<eio_readdir> (you can access the C<flags> via C<<
+req->int1 >>, when any of the C<type>'s found were C<EIO_DT_UNKNOWN>. The
+absense of this flag therefore indicates that all C<type>'s are known,
+which can be used to speed up some algorithms.
+
+A typical use case would be to identify all subdirectories within a
+directory - you would ask C<eio_readdir> for C<EIO_READDIR_DIRS_FIRST>. If
+then this flag is I<NOT> set, then all the entries at the beginning of the
+returned array of type C<EIO_DT_DIR> are the directories. Otherwise, you
+should start C<stat()>'ing the entries starting at the beginning of the
+array, stopping as soon as you found all directories (the count can be
+deduced by the link count of the directory).
+
+=back
+
+=back
+
+=head3 OS-SPECIFIC CALL WRAPPERS
+
+These wrap OS-specific calls (usually Linux ones), and might or might not
+be emulated on other operating systems. Calls that are not emulated will
+return C<-1> and set C<errno> to C<ENOSYS>.
+
+=over 4
+
+=item eio_sendfile (int out_fd, int in_fd, off_t in_offset, size_t length, int pri, eio_cb cb, void *data)
+
+Wraps the C<sendfile> syscall. The arguments follow the Linux version, but
+libeio supports and will use similar calls on FreeBSD, HP/UX, Solaris and
+Darwin.
+
+If the OS doesn't support some sendfile-like call, or the call fails,
+indicating support for the given file descriptor type (for example,
+Linux's sendfile might not support file to file copies), then libeio will
+emulate the call in userspace, so there are almost no limitations on its
+use.
+
+=item eio_readahead (int fd, off_t offset, size_t length, int pri, eio_cb cb, void *data)
+
+Calls C<readahead(2)>. If the syscall is missing, then the call is
+emulated by simply reading the data (currently in 64kiB chunks).
+
+=item eio_sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags, int pri, eio_cb cb, void *data)
+
+Calls C<sync_file_range>. If the syscall is missing, then this is the same
+as calling C<fdatasync>.
+
+=back
+
+=head3 LIBEIO-SPECIFIC REQUESTS
+
+These requests are specific to libeio and do not correspond to any OS call.
+
+=over 4
+
+=item eio_mtouch (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data)
+
+=item eio_custom (void (*)(eio_req *) execute, int pri, eio_cb cb, void *data)
+
+Executes a custom request, i.e., a user-specified callback.
+
+The callback gets the C<eio_req *> as parameter and is expected to read
+and modify any request-specific members. Specifically, it should set C<<
+req->result >> to the result value, just like other requests.
+
+Here is an example that simply calls C<open>, like C<eio_open>, but it
+uses the C<data> member as filename and uses a hardcoded C<O_RDONLY>. If
+you want to pass more/other parameters, you either need to pass some
+struct or so via C<data> or provide your own wrapper using the low-level
+API.
+
+ static int
+ my_open_done (eio_req *req)
+ {
+ int fd = req->result;
+
+ return 0;
+ }
+
+ static void
+ my_open (eio_req *req)
+ {
+ req->result = open (req->data, O_RDONLY);
+ }
+
+ eio_custom (my_open, 0, my_open_done, "/etc/passwd");
+
+=item eio_busy (eio_tstamp delay, int pri, eio_cb cb, void *data)
+
+This is a a request that takes C<delay> seconds to execute, but otherwise
+does nothing - it simply puts one of the worker threads to sleep for this
+long.
+
+This request can be used to artificially increase load, e.g. for debugging
+or benchmarking reasons.
+
+=item eio_nop (int pri, eio_cb cb, void *data)
+
+This request does nothing, except go through the whole request cycle. This
+can be used to measure latency or in some cases to simplify code, but is
+not really of much use.
+
+=back
+
+=head3 GROUPING AND LIMITING REQUESTS
#TODO
+/*****************************************************************************/
+/* groups */
+
+eio_req *eio_grp (eio_cb cb, void *data);
+void eio_grp_feed (eio_req *grp, void (*feed)(eio_req *req), int limit);
+void eio_grp_limit (eio_req *grp, int limit);
+void eio_grp_add (eio_req *grp, eio_req *req);
+void eio_grp_cancel (eio_req *grp); /* cancels all sub requests but not the group */
+
+
=back
@@ -233,6 +641,27 @@ C<eio_poll>).
#TODO
+
+=head1 ANATOMY AND LIFETIME OF AN EIO REQUEST
+
+A request is represented by a structure of type C<eio_req>. To initialise
+it, clear it to all zero bytes:
+
+ eio_req req;
+
+ memset (&req, 0, sizeof (req));
+
+A more common way to initialise a new C<eio_req> is to use C<calloc>:
+
+ eio_req *req = calloc (1, sizeof (*req));
+
+In either case, libeio neither allocates, initialises or frees the
+C<eio_req> structure for you - it merely uses it.
+
+zero
+
+#TODO
+
=head1 EMBEDDING
Libeio can be embedded directly into programs. This functionality is not