From 71e6cec06527558680f5d779a00d469205fd4567 Mon Sep 17 00:00:00 2001 From: root Date: Sun, 5 Jun 2011 22:44:30 +0000 Subject: *** empty log message *** --- eio.pod | 435 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 432 insertions(+), 3 deletions(-) diff --git a/eio.pod b/eio.pod index 3a54c20..fed01d3 100644 --- a/eio.pod +++ b/eio.pod @@ -57,6 +57,11 @@ Calling C is fully supported by this module. It is implemented in these memory used by the worker threads. This gives you a fully empty libeio queue. +Note, however, since libeio does use threads, thr above guarantee doesn't +cover your libc, for example, malloc and other libc functions are not +fork-safe, so there is very little you can do after a fork, and in fatc, +the above might crash, and thus change. + =head1 INITIALISATION/INTEGRATION Before you can call any eio functions you first have to initialise the @@ -217,15 +222,418 @@ C). =back -=head1 ANATOMY OF AN EIO REQUEST +=head1 HIGH LEVEL REQUEST API -#TODO +Libeio has both a high-level API, which consists of calling a request +function with a callback to be called on completion, and a low-level API +where you fill out request structures and submit them. +This section describes the high-level API. -=head1 HIGH LEVEL REQUEST API +=head2 REQUEST SUBMISSION AND RESULT PROCESSING + +You submit a request by calling the relevant C function with the +required parameters, a callback of type C +(called C below) and a freely usable C argument. + +The return value will either be 0 + +The callback will be called with an C which contains the +results of the request. The members you can access inside that structure +vary from request to request, except for: + +=over 4 + +=item C + +This contains the result value from the call (usually the same as the +syscall of the same name). + +=item C + +This contains the value of C after the call. + +=item C + +The C member simply stores the value of the C argument. + +=back + +The return value of the callback is normally C<0>, which tells libeio to +continue normally. If a callback returns a nonzero value, libeio will +stop processing results (in C) and will return the value to its +caller. + +Memory areas passed to libeio must stay valid as long as a request +executes, with the exception of paths, which are being copied +internally. Any memory libeio itself allocates will be freed after the +finish callback has been called. If you want to manage all memory passed +to libeio yourself you can use the low-level API. + +For example, to open a file, you could do this: + + static int + file_open_done (eio_req *req) + { + if (req->result < 0) + { + /* open() returned -1 */ + errno = req->errorno; + perror ("open"); + } + else + { + int fd = req->result; + /* now we have the new fd in fd */ + } + + return 0; + } + + /* the first three arguments are passed to open(2) */ + /* the remaining are priority, callback and data */ + if (!eio_open ("/etc/passwd", O_RDONLY, 0, 0, file_open_done, 0)) + abort (); /* something ent wrong, we will all die!!! */ + +Note that you additionally need to call C when the C +indicates that requests are ready to be processed. + +=head2 AVAILABLE REQUESTS + +The following request functions are available. I of them return the +C on success and C<0> on failure, and I of them have the +same three trailing arguments: C, C and C. The C is +mandatory, but in most cases, you pass in C<0> as C and C<0> or some +custom data value as C. + +=head3 POSIX API WRAPPERS + +These requests simply wrap the POSIX call of the same name, with the same +arguments: + +=over 4 + +=item eio_open (const char *path, int flags, mode_t mode, int pri, eio_cb cb, void *data) + +=item eio_utime (const char *path, eio_tstamp atime, eio_tstamp mtime, int pri, eio_cb cb, void *data) + +=item eio_truncate (const char *path, off_t offset, int pri, eio_cb cb, void *data) + +=item eio_chown (const char *path, uid_t uid, gid_t gid, int pri, eio_cb cb, void *data) + +=item eio_chmod (const char *path, mode_t mode, int pri, eio_cb cb, void *data) + +=item eio_mkdir (const char *path, mode_t mode, int pri, eio_cb cb, void *data) + +=item eio_rmdir (const char *path, int pri, eio_cb cb, void *data) + +=item eio_unlink (const char *path, int pri, eio_cb cb, void *data) + +=item eio_readlink (const char *path, int pri, eio_cb cb, void *data) /* result=ptr2 allocated dynamically */ + +=item eio_stat (const char *path, int pri, eio_cb cb, void *data) /* stat buffer=ptr2 allocated dynamically */ + +=item eio_lstat (const char *path, int pri, eio_cb cb, void *data) /* stat buffer=ptr2 allocated dynamically */ + +=item eio_statvfs (const char *path, int pri, eio_cb cb, void *data) /* stat buffer=ptr2 allocated dynamically */ + +=item eio_mknod (const char *path, mode_t mode, dev_t dev, int pri, eio_cb cb, void *data) + +=item eio_link (const char *path, const char *new_path, int pri, eio_cb cb, void *data) + +=item eio_symlink (const char *path, const char *new_path, int pri, eio_cb cb, void *data) + +=item eio_rename (const char *path, const char *new_path, int pri, eio_cb cb, void *data) + +=item eio_msync (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data) + +=item eio_mlock (void *addr, size_t length, int pri, eio_cb cb, void *data) + +=item eio_mlockall (int flags, int pri, eio_cb cb, void *data) + +=item eio_close (int fd, int pri, eio_cb cb, void *data) + +=item eio_sync (int pri, eio_cb cb, void *data) + +=item eio_fsync (int fd, int pri, eio_cb cb, void *data) + +=item eio_fdatasync (int fd, int pri, eio_cb cb, void *data) + +=item eio_futime (int fd, eio_tstamp atime, eio_tstamp mtime, int pri, eio_cb cb, void *data) + +=item eio_ftruncate (int fd, off_t offset, int pri, eio_cb cb, void *data) + +=item eio_fchmod (int fd, mode_t mode, int pri, eio_cb cb, void *data) + +=item eio_fchown (int fd, uid_t uid, gid_t gid, int pri, eio_cb cb, void *data) + +=item eio_dup2 (int fd, int fd2, int pri, eio_cb cb, void *data) + +These have the same semantics as the syscall of the same name, their +return value is available as C<< req->result >> later. + +=item eio_read (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data) + +=item eio_write (int fd, void *buf, size_t length, off_t offset, int pri, eio_cb cb, void *data) + +These two requests are called C and C, but actually wrap +C and C. On systems that lack these calls (such as cygwin), +libeio uses lseek/read_or_write/lseek and a mutex to serialise the +requests, so all these requests run serially and do not disturb each +other. However, they still disturb the file offset while they run, so it's +not safe to call these functions concurrently with non-libeio functions on +the same fd on these systems. + +Not surprisingly, pread and pwrite are not thread-safe on Darwin (OS/X), +so it is advised not to submit multiple requests on the same fd on this +horrible pile of garbage. + +=item eio_fstat (int fd, int pri, eio_cb cb, void *data) + +Stats a file - if C<< req->result >> indicates success, then you can +access the C-like structure via C<< req->ptr2 >>: + + EIO_STRUCT_STAT *statdata = (EIO_STRUCT_STAT *)req->ptr2; + +=item eio_fstatvfs (int fd, int pri, eio_cb cb, void *data) /* stat buffer=ptr2 allocated dynamically */ + +Stats a filesystem - if C<< req->result >> indicates success, then you can +access the C-like structure via C<< req->ptr2 >>: + + EIO_STRUCT_STATVFS *statdata = (EIO_STRUCT_STATVFS *)req->ptr2; + +=back + +=head3 READING DIRECTORIES + +Reading directories sounds simple, but can be rather demanding, especially +if you want to do stuff such as traversing a diretcory hierarchy or +processing all files in a directory. Libeio can assist thess complex tasks +with it's C call. + +=over 4 + +=item eio_readdir (const char *path, int flags, int pri, eio_cb cb, void *data) + +This is a very complex call. It basically reads through a whole directory +(via the C, C and C calls) and returns either +the names or an array of C, depending on the C +argument. + +The C<< req->result >> indicates either the number of files found, or +C<-1> on error. On success, zero-terminated names can be found as C<< req->ptr2 >>, +and C, if requested by C, can be found via C<< +req->ptr1 >>. + +Here is an example that prints all the names: + + int i; + char *names = (char *)req->ptr2; + + for (i = 0; i < req->result; ++i) + { + printf ("name #%d: %s\n", i, names); + + /* move to next name */ + names += strlen (names) + 1; + } + +Pseudo-entries such as F<.> and F<..> are never returned by C. + +C can be any combination of: + +=over 4 + +=item EIO_READDIR_DENTS + +If this flag is specified, then, in addition to the names in C, +also an array of C is returned, in C. A C looks like this: + + struct eio_dirent + { + int nameofs; /* offset of null-terminated name string in (char *)req->ptr2 */ + unsigned short namelen; /* size of filename without trailing 0 */ + unsigned char type; /* one of EIO_DT_* */ + signed char score; /* internal use */ + ino_t inode; /* the inode number, if available, otherwise unspecified */ + }; + +The only members you normally would access are C, which is the +byte-offset from C to the start of the name, C and C. + +C can be one of: + +C - if the type is not known (very common) and you have to C +the name yourself if you need to know, +one of the "standard" POSIX file types (C, C, C, +C, C, C, C) +or some OS-specific type (currently +C - multiplexed char device (v7+coherent), +C - xenix special named file, +C - multiplexed block device (v7+coherent), +C - HP-UX network special, +C - VxFS compressed, +C - solaris door, or +C). + +This example prints all names and their type: + + int i; + struct eio_dirent *ents = (struct eio_dirent *)req->ptr1; + char *names = (char *)req->ptr2; + + for (i = 0; i < req->result; ++i) + { + struct eio_dirent *ent = ents + i; + char *name = names + ent->nameofs; + + printf ("name #%d: %s (type %d)\n", i, name, ent->type); + } + +=item EIO_READDIR_DIRS_FIRST + +When this flag is specified, then the names will be returned in an order +where likely directories come first, in optimal C order. This is +useful when you need to quickly find directories, or you want to find all +directories while avoiding to stat() each entry. + +If the system returns type information in readdir, then this is used +to find directories directly. Otherwise, likely directories are names +beginning with ".", or otherwise names with no dots, of which names with +short names are tried first. + +=item EIO_READDIR_STAT_ORDER + +When this flag is specified, then the names will be returned in an order +suitable for stat()'ing each one. That is, when you plan to stat() +all files in the given directory, then the returned order will likely +be fastest. + +If both this flag and C are specified, then +the likely dirs come first, resulting in a less optimal stat order. + +=item EIO_READDIR_FOUND_UNKNOWN + +This flag should not be specified when calling C. Instead, +it is being set by C (you can access the C via C<< +req->int1 >>, when any of the C's found were C. The +absense of this flag therefore indicates that all C's are known, +which can be used to speed up some algorithms. + +A typical use case would be to identify all subdirectories within a +directory - you would ask C for C. If +then this flag is I set, then all the entries at the beginning of the +returned array of type C are the directories. Otherwise, you +should start C'ing the entries starting at the beginning of the +array, stopping as soon as you found all directories (the count can be +deduced by the link count of the directory). + +=back + +=back + +=head3 OS-SPECIFIC CALL WRAPPERS + +These wrap OS-specific calls (usually Linux ones), and might or might not +be emulated on other operating systems. Calls that are not emulated will +return C<-1> and set C to C. + +=over 4 + +=item eio_sendfile (int out_fd, int in_fd, off_t in_offset, size_t length, int pri, eio_cb cb, void *data) + +Wraps the C syscall. The arguments follow the Linux version, but +libeio supports and will use similar calls on FreeBSD, HP/UX, Solaris and +Darwin. + +If the OS doesn't support some sendfile-like call, or the call fails, +indicating support for the given file descriptor type (for example, +Linux's sendfile might not support file to file copies), then libeio will +emulate the call in userspace, so there are almost no limitations on its +use. + +=item eio_readahead (int fd, off_t offset, size_t length, int pri, eio_cb cb, void *data) + +Calls C. If the syscall is missing, then the call is +emulated by simply reading the data (currently in 64kiB chunks). + +=item eio_sync_file_range (int fd, off_t offset, size_t nbytes, unsigned int flags, int pri, eio_cb cb, void *data) + +Calls C. If the syscall is missing, then this is the same +as calling C. + +=back + +=head3 LIBEIO-SPECIFIC REQUESTS + +These requests are specific to libeio and do not correspond to any OS call. + +=over 4 + +=item eio_mtouch (void *addr, size_t length, int flags, int pri, eio_cb cb, void *data) + +=item eio_custom (void (*)(eio_req *) execute, int pri, eio_cb cb, void *data) + +Executes a custom request, i.e., a user-specified callback. + +The callback gets the C as parameter and is expected to read +and modify any request-specific members. Specifically, it should set C<< +req->result >> to the result value, just like other requests. + +Here is an example that simply calls C, like C, but it +uses the C member as filename and uses a hardcoded C. If +you want to pass more/other parameters, you either need to pass some +struct or so via C or provide your own wrapper using the low-level +API. + + static int + my_open_done (eio_req *req) + { + int fd = req->result; + + return 0; + } + + static void + my_open (eio_req *req) + { + req->result = open (req->data, O_RDONLY); + } + + eio_custom (my_open, 0, my_open_done, "/etc/passwd"); + +=item eio_busy (eio_tstamp delay, int pri, eio_cb cb, void *data) + +This is a a request that takes C seconds to execute, but otherwise +does nothing - it simply puts one of the worker threads to sleep for this +long. + +This request can be used to artificially increase load, e.g. for debugging +or benchmarking reasons. + +=item eio_nop (int pri, eio_cb cb, void *data) + +This request does nothing, except go through the whole request cycle. This +can be used to measure latency or in some cases to simplify code, but is +not really of much use. + +=back + +=head3 GROUPING AND LIMITING REQUESTS #TODO +/*****************************************************************************/ +/* groups */ + +eio_req *eio_grp (eio_cb cb, void *data); +void eio_grp_feed (eio_req *grp, void (*feed)(eio_req *req), int limit); +void eio_grp_limit (eio_req *grp, int limit); +void eio_grp_add (eio_req *grp, eio_req *req); +void eio_grp_cancel (eio_req *grp); /* cancels all sub requests but not the group */ + + =back @@ -233,6 +641,27 @@ C). #TODO + +=head1 ANATOMY AND LIFETIME OF AN EIO REQUEST + +A request is represented by a structure of type C. To initialise +it, clear it to all zero bytes: + + eio_req req; + + memset (&req, 0, sizeof (req)); + +A more common way to initialise a new C is to use C: + + eio_req *req = calloc (1, sizeof (*req)); + +In either case, libeio neither allocates, initialises or frees the +C structure for you - it merely uses it. + +zero + +#TODO + =head1 EMBEDDING Libeio can be embedded directly into programs. This functionality is not -- cgit v1.2.3