.\" This file is in the public domain. .\" .\" For the _of and _v_/_l_ function name fragments, we want something .\" semantically like .Fn but without the (). I use .Fa, as the least .\" inaccurate approximation I've found. .\" .Dd December 21, 2013 .Dt LIBAIO 3 .Os NetBSD 1.4T .Sh NAME .Nm aio_poll_init , .Nm aio_poll_reinit , .Nm aio_add_poll , .Nm aio_change_poll_rtest , .Nm aio_change_poll_wtest , .Nm aio_change_poll_rfn , .Nm aio_change_poll_wfn , .Nm aio_change_poll_arg , .Nm aio_add_block , .Nm aio_call_once , .Nm aio_remove_poll , .Nm aio_remove_block , .Nm aio_pre_poll , .Nm aio_do_poll , .Nm aio_post_poll , .Nm aio_event_loop , .Nm aio_poll_init_of , .Nm aio_poll_done_of , .Nm aio_add_poll_of , .Nm aio_change_poll_rtest_of , .Nm aio_change_poll_wtest_of , .Nm aio_change_poll_rfn_of , .Nm aio_change_poll_wfn_of , .Nm aio_change_poll_arg_of , .Nm aio_add_block_of , .Nm aio_call_once_of , .Nm aio_remove_poll_of , .Nm aio_remove_block_of , .Nm aio_pre_poll_l_of , .Nm aio_do_poll_l_of , .Nm aio_post_poll_l_of , .Nm aio_pre_poll_v_of , .Nm aio_do_poll_v_of , .Nm aio_post_poll_v_of , .Nm aio_global_loop , .Nm aio_rwtest_always , .Nm aio_rwtest_never , .Nm aio_oq_init , .Nm aio_oq_queue_point , .Nm aio_oq_queue_copy , .Nm aio_oq_queue_free , .Nm aio_oq_queue_cb , .Nm aio_oq_queue_printf , .Nm aio_oq_queue_special , .Nm aio_oq_set_special_typecb , .Nm aio_oq_writev , .Nm aio_oq_custom_writev , .Nm aio_oq_dropdata , .Nm aio_oq_dropdata_cb , .Nm aio_oq_headlen , .Nm aio_oq_qlen , .Nm aio_oq_empty , .Nm aio_oq_nonempty , .Nm aio_oq_flush , .Nm aio_oq_read , .Nm aio_pq_init , .Nm aio_pq_deinit , .Nm aio_pq_queue_point , .Nm aio_pq_queue_copy , .Nm aio_pq_queue_free , .Nm aio_pq_queue_cb , .Nm aio_pq_queue_printf , .Nm aio_pq_boundary , .Nm aio_pq_writev_drop , .Nm aio_pq_qlen , .Nm aio_pq_empty , .Nm aio_pq_nonempty , .Nm aio_pq_flush , .Nm aio__panic .Nd pseudo-async I/O routines .Sh SYNOPSIS .Fd #include .br .Nm cc .Op Ar arguments .Fl laio .br .Ft void .Fn aio_poll_init "void" .Ft void .Fn aio_poll_reinit "void" .Ft int .Fn aio_add_poll "int" "int (*)(void *)" "int (*)(void *)" "void (*)(void *)" "void (*)(void *)" "void *" .Ft int .Fn aio_change_poll_rtest "int" "int (*)(void *)" .Ft int .Fn aio_change_poll_wtest "int" "int (*)(void *)" .Ft int .Fn aio_change_poll_rfn "int" "void (*)(void *)" .Ft int .Fn aio_change_poll_wfn "int" "void (*)(void *)" .Ft int .Fn aio_change_poll_arg "int" "void *" .Ft int .Fn aio_add_block "int (*)(void *)" "void *" .Ft int .Fn aio_call_once "void (*)(void *)" "void *" .Ft void .Fn aio_remove_poll "int" .Ft int .Fn aio_remove_block "int" .Ft int .Fn aio_pre_poll "void" .Ft int .Fn aio_do_poll "void" .Ft void .Fn aio_post_poll "void" .Ft int .Fn aio_event_loop "void" .Ft AIO_LOOP * .Fn aio_poll_init_of "void" .Ft void .Fn aio_poll_done_of "AIO_LOOP *" .Ft int .Fn aio_add_poll_of "AIO_LOOP *" "int" "int (*)(void *)" "int (*)(void *)" "void (*)(void *)" "void (*)(void *)" "void *" .Ft int .Fn aio_change_poll_rtest_of "AIO_LOOP *" "int" "int (*)(void *)" .Ft int .Fn aio_change_poll_wtest_of "AIO_LOOP *" "int" "int (*)(void *)" .Ft int .Fn aio_change_poll_rfn_of "AIO_LOOP *" "int" "void (*)(void *)" .Ft int .Fn aio_change_poll_wfn_of "AIO_LOOP *" "int" "void (*)(void *)" .Ft int .Fn aio_change_poll_arg_of "AIO_LOOP *" "int" "void *" .Ft int .Fn aio_add_block_of "AIO_LOOP *" "int (*)(void *)" "void *" .Ft int .Fn aio_call_once_of "AIO_LOOP *" "void (*)(void *)" "void *" .Ft void .Fn aio_remove_poll_of "AIO_LOOP *" "int" .Ft int .Fn aio_remove_block_of "AIO_LOOP *" "int" .Ft int .Fn aio_pre_poll_l_of "int" "AIO_LOOP *" "..." .Ft int .Fn aio_do_poll_l_of "int" "AIO_LOOP *" "..." .Ft int .Fn aio_post_poll_l_of "int" "AIO_LOOP *" "..." .Ft int .Fn aio_pre_poll_v_of "int" "AIO_LOOP **" .Ft int .Fn aio_do_poll_v_of "int" "AIO_LOOP **" .Ft void .Fn aio_post_poll_v_of "int" "AIO_LOOP **" .Ft "AIO_LOOP *" .Fn aio_global_loop "void" .Ft int .Fn aio_rwtest_always "void *" .Ft int .Fn aio_rwtest_never "void *" .br .Ft void .Fn aio_oq_init "AIO_OQ *" .Ft int .Fn aio_oq_queue_point "AIO_OQ *" "const void *" "int" .Ft int .Fn aio_oq_queue_copy "AIO_OQ *" "const void *" "int" .Ft int .Fn aio_oq_queue_free "AIO_OQ *" "void *" "int" .Ft int .Fn aio_oq_queue_cb "AIO_OQ *" "void *" "int" "void (*)(void *)" "void *" .Ft int .Fn aio_oq_queue_printf "AIO_OQ *" "const char *" "..." .Ft int .Fn aio_oq_queue_special "AIO_OQ *" "AIO_SPECIAL_TYPE" "void (*)(AIO_SPECIAL_OP, void *, int)" "void *" "int" .Ft void .Fn aio_oq_set_special_typecb "AIO_OQ *" "AIO_SPECIAL_TYPE (*)(void *, int)" .Ft int .Fn aio_oq_writev "AIO_OQ *" "int" "int" .Ft int .Fn aio_oq_custom_writev "AIO_OQ *" "int" "int" "int (*)(void *, const struct iovec *, int)" "void *" .Ft int .Fn aio_oq_dropdata "AIO_OQ *" "int" .Ft int .Fn aio_oq_dropdata_cb "AIO_OQ *" "int" "void (*)(const void *, int, void *)" "void *" .Ft unsigned int .Fn aio_oq_qlen "AIO_OQ *" .Ft int .Fn aio_oq_headlen "AIO_OQ *" .Ft int .Fn aio_oq_empty "AIO_OQ *" .Ft int .Fn aio_oq_nonempty "AIO_OQ *" .Ft void .Fn aio_oq_flush "AIO_OQ *" .Ft int .Fn aio_oq_read "AIO_OQ *" "void *" "int" .br .Ft int .Fn aio_pq_init "AIO_PQ *" "int" .Ft void .Fn aio_pq_deinit "AIO_PQ *" .Ft int .Fn aio_pq_queue_point "AIO_PQ *" "int" "const void *" "int" .Ft int .Fn aio_pq_queue_copy "AIO_PQ *" "int" "const void *" "int" .Ft int .Fn aio_pq_queue_free "AIO_PQ *" "int" "void *" "int" .Ft int .Fn aio_pq_queue_cb "AIO_PQ *" "int" "void *" "int" "void (*)(void *)" "void *" .Ft int .Fn aio_pq_queue_printf "AIO_PQ *" "int" "const char *" "..." .Ft int .Fn aio_pq_boundary "AIO_PQ *" "int" .Ft int .Fn aio_pq_writev_drop "AIO_PQ *" "int" "int" .Ft unsigned int .Fn aio_pq_qlen "AIO_PQ *" .Ft int .Fn aio_pq_empty "AIO_PQ *" .Ft int .Fn aio_pq_nonempty "AIO_PQ *" .Ft void .Fn aio_pq_flush "AIO_PQ *" "int" .br .Ft void .Fn aio__panic "const char *", "..." .Sh DESCRIPTION These functions are designed to make it easy to do pseudo-asynchronous I/O without real threads (which are very difficult to do right in C). .Pp There are three packages involved, one for managing a .Xr poll 2 Ns -driven main loop and two for handling data queued for output. They also share one interface, .Fn aio__panic . .Pp .Sh POLL LOOP These functions manage a callback-style .Xr poll 2 Ns -based main loop. The paradigm is that you tell the poll loop what file descriptors you are interested in and it calls functions you specify when .Xr poll 2 indicates that the corresponding I/O is possible. You can also specify functions to be called whenever the package is about to do a potentially blocking .Xr poll 2 call; these can be used to flush buffers, do background computation when there is nothing better to do, specify maximum timeout for the sleep, and the like. There are three manifest constants and a bunch of functions. .Pp There are actually two sets of these functions. For simple applications that want only one event loop, use the functions without the .Fa \&_of suffix; these operate on a single global event loop. For more complex applications, where one global event loop is inadequate, the functions with the .Fa \&_of suffix support creating, working with, and destroying multiple event loops. This document first describes the functions which use the global event loop, then the multiple-loop versions and how they differ from the global-loop versions. .Pp The first important function is .Fn aio_poll_init . This must be called before any of the other functions in this package are called. (It is safe to call this and then not use the other functions; doing so incurs no overhead beyond the handful of cycles .Fn aio_poll_init needs to initialize some variables.) .Pp There is also .Fn aio_poll_reinit , which clears out any existing internal data structures. It is designed for use after .Xr fork 2 Ns No ing , when the child process wants to use libaio separately. (If you're using .Fn aio_poll_init_of , just call .Fn aio_poll_done_of on each .Dv AIO_LOOP . ) Of course, closing file descriptors needs to be handled separately; the library never handles that. .Pp You then add file descriptors into the .Xr poll 2 pool with .Fn aio_add_poll . This takes the file descriptor in question as its first argument. The second and third arguments are read and write (respectively) interest functions; these are called to allow your code to decide whether it is interested in the descriptor's readability and writability. They return nonzero if the possibility of I/O in the corresponding direction on the descriptor is of interest and zero if not. The fourth and fifth arguments are read and write functions (respectively); these are called when the corresponding test function returns nonzero and then .Xr poll 2 indicates that the corresponding kind of I/O is possible. The sixth argument is a generic pointer which is passed to the test and I/O functions. This will normally be a pointer to some data structure in your code that the test and I/O functions use, but it does not have to be; if the test and I/O functions ignore their argument, for example, it can be anything. The test functions must not be nil (see .Fn aio_rwtest_always and .Fn aio_rwtest_never , below); the read (or write) function may be nil if the read (resp. write) test function always returns zero. (They can even both be, but then there is little point in making the call at all; under those circumstances it effectively is a crippled version of .Fn aio_add_block . ) The return value is a poll ID which can be used to deregister the descriptor later with .Fn aio_remove_poll . .Pp After a poll is set up, you can change its functions or argument with .Fn aio_change_poll_rtest , .Fn aio_change_poll_wtest , .Fn aio_change_poll_rfn , .Fn aio_change_poll_wfn , and .Fn aio_change_poll_arg . Using one of these is equivalent to removing the poll entry and re-adding it with the changed function or pointer, except that it keeps the same ID. They take the ID and the new value. Normally, they return 0; on error (such as the library detecting an invalid argument), they return -1. The library may or may not detect erroneous calls; what happens in response to an undetected erroneous call is specifically undefined. .Pp .Fn aio_add_block adds a callback to be called when the poll loop is about to make a potentially blocking .Xr poll 2 call. The function and a generic pointer argument are passed in; the return value is a block ID which can be used later with .Fn aio_remove_block to deregister the function. Function registered with .Fn aio_add_block are called when the poll loop is about to do a potentially blocking .Xr poll 2 call. The return value must be nonnegative or one of the negative manifest constants .Dv AIO_BLOCK_NIL or .Dv AIO_BLOCK_LOOP (defined in .Pa ) . If the return value is .Dv AIO_BLOCK_NIL , then the poll loop carries on as if the function had not been registered at all. This is suitable for cases where the block function checks and finds it has nothing to do. If the return value is .Dv AIO_BLOCK_LOOP , then the poll loop does not make the .Xr poll 2 call at all; rather, it arranges to short-circuit the rest of that iteration of the loop. This is suitable when the block function changes something which can affect the return value of a test function, or registers or deregisters poll or block functions. Finally, if the return value is nonnegative, it specifies a maximum timeout for the .Xr poll 2 , in the units of .Xr poll 2 Ap s last argument. (Even if no I/O is possible, the call may delay less than this time; the actual delay used is the minimum of that returned by all block functions, or .Dv INFTIM if all registered block functions return .Dv AIO_BLOCK_NIL . ) Negative return values other than .Dv AIO_BLOCK_NIL and .Dv AIO_BLOCK_LOOP from block functions are errors; the library's reaction to such errors is specifically undefined. .Pp .Fn aio_call_once is a convenience wrapper for the common idiom of using .Fn aio_add_block to call a function once at some future time. It registers a block function which just deregisters itself and then calls the provided function with the provided argument. It returns the negative value .Dv AIO_ERR if it can't allocate memory, or if adding the block function fails; on success, it returns zero (for which the library provides the convenience name .Dv AIO_OK ) . .Pp .Fn aio_remove_poll undoes the registration .Fn aio_add_poll performs for a file descriptor. Its argument must be the poll ID returned by .Fn aio_add_poll ; after this, that poll ID becomes invalid and must not be used again. .Pp .Fn aio_remove_block undoes the registration .Fn aio_add_block performs for a block function. Its argument must be the block ID returned by .Fn aio_add_block ; after this, that block ID becomes invalid and must not be used again. The return value is normally 0; on error (such as the library detecting an invalid argument), it can return -1. The library may or may not detect erroneous calls; what happens in response to an undetected erroneous call is specifically undefined. .Pp The main loop itself is broken into three parts. The first part is .Fn aio_pre_poll . This calls read and write test functions and prepares internal data structures for the call to .Xr poll 2 . It normally returns 0. It can fail (if .Xr malloc 3 fails), in which case it returns a negative number. The second part is .Fn aio_do_poll . This calls .Xr poll 2 , calling block functions if a call with a zero timeout indicates that no I/O is possible immediately. Depending on what the block functions return, it either calls .Xr poll 2 with a nonzero (possibly infinite) timeout or, if a block function returns .Dv AIO_BLOCK_LOOP , sets an internal variable so that .Fn aio_post_poll does nothing this iteration. The return value from .Fn aio_do_poll is that of the last .Xr poll 2 call made, or 0 if a block function returned .Dv AIO_BLOCK_LOOP . Finally, .Fn aio_post_poll checks to see if .Xr poll 2 indicated that any I/O is possible, calling read and write functions accordingly. .Pp As a convenience, there is also .Fn aio_event_loop , which is just an infinite loop, calling .Fn aio_pre_poll , .Fn aio_do_poll , and .Fn aio_post_poll . It returns when (and only when) .Fn aio_pre_poll returns failure, or .Fn aio_do_poll returns a negative value with .Va errno set to anything other than .Dv EINTR . It returns an errno value. .Pp .Fn aio_poll_init_of creates and initializes a new event loop, returning a pointer to an opaque type .Dv AIO_LOOP which must be passed to the other .Fa \&_of Ns No -suffixed functions. (It may return nil on error if .Xr malloc 3 fails.) When finished with the resulting event loop, pass it to .Fn aio_poll_done_of to tear it down and free up all relevant memory. (Implicit in this is deregistration of all poll and block functions registered with that .Dv AIO_LOOP . ) .Pp Most of the other .Fa \&_of functions are identical to their .Pf non- Ns Fa \&_of counterparts, except that they operate on the argument .Dv AIO_LOOP instead of the global loop. Exceptions to these are the main-loop functions .Fn aio_pre_poll_l_of , .Fn aio_do_poll_l_of , .Fn aio_post_poll_l_of , .Fn aio_pre_poll_v_of , .Fn aio_do_poll_v_of , and .Fn aio_post_poll_v_of , which instead take a list of .Dv AIO_LOOP pointers; the .Dv int argument specifies how many of there are, with the list itself specified as multiple arguments for the .Fa \&_l_ variants and as a pointer-to-array for the .Fa \&_v_ variants (this difference is directly analogous to .Xr execl 3 and .Xr execv 3 ) . Note that .Fa aio_post_poll_l_of returns int even though .Fa aio_post_poll returns void; the former calls .Xr malloc 3 and thus can fail. .Pp When callbacks are called from within .Fn aio_pre_poll_l_of , .Fn aio_do_poll_l_of , .Fn aio_post_poll_l_of , .Fn aio_pre_poll_v_of , .Fn aio_do_poll_v_of , or .Fn aio_post_poll_v_of , the callback receives no explicit indication of which .Dv AIO_LOOP a given callback is being made for. .Pp It is an error to call .Fn aio_pre_poll_l_of , .Fn aio_do_poll_l_of , .Fn aio_post_poll_l_of , .Fn aio_pre_poll_v_of , .Fn aio_do_poll_v_of , or .Fn aio_post_poll_v_of , with a zero or negative loop count; it is also an error to call .Fn aio_do_poll_l_of , .Fn aio_post_poll_l_of , .Fn aio_do_poll_v_of , or .Fn aio_post_poll_v_of with a different list from the one passed to .Fn aio_pre_poll_l_of , or .Fn aio_pre_poll_v_of . The same pointers in a different order counts as a different list, but using the .Fa \&_l_ variant for one call and the .Fa \&_v_ variant for the other does not (provided the order in the array in the .Fa \&_v_ call is the same as the order in the arglist in the .Fa \&_l_ call). .Pp .Fn aio_global_loop returns an .Dv AIO_LOOP pointer for the global loop. It is a catastrophic error to pass this to .Fn aio_poll_done_of or otherwise attempt to free it, but it is otherwise a perfectly good loop and may, for example, be one of the loops passed to .Fn aio_pre_poll_l_of . .Pp .Fn aio_rwtest_always and .Fn aio_rwtest_never are convenience functions which always return nonzero and zero, respectively, suitable for passing as the read and/or write test functions when you know you always or never want to check for I/O. .Pp There are three manifest constants: .Dv AIO_NOID , which can be stored in an int and is guaranteed to be different from any poll ID returned by .Fn aio_add_poll (or .Fn aio_add_poll_of ) or block ID returned by .Fn aio_add_block .Pf ( Fn aio_add_block_of ) , .Dv AIO_ERR , a negative value which can returned as an error indication by some functions, including any function which returns a poll or block ID, and .Dv AIO_OK , which is a convenience name defined as zero. .Pp The order in which block functions are called (when they are called) is explicitly undefined. Similarly, no order is defined among the calls to read and write test functions, nor for read and write I/O functions. However, test functions are called only from within .Fn aio_pre_poll .Pf ( Fn aio_pre_poll_of ) , block functions only from within .Fn aio_do_poll .Pf ( Fn aio_do_poll_of ) , and I/O functions only from within .Fn aio_post_poll .Pf ( Fn aio_post_poll_of ) , which implies well-defined orderings between any two functions from different sets. It is, however, promised that block functions are called in some order; that is, no two block function executions can overlap. Similar remarks apply to read/write test functions and I/O functions. .Pp Except for .Dv AIO_NOID and .Dv AIO_ERR , which are invalid for both, poll and block IDs are in separate namespaces. It is an error to pass a poll ID to .Fn aio_remove_block or a block ID to .Fn aio_remove_poll , and a poll ID may happen to equal a block ID. Similarly, each .Dv AIO_LOOP is a separate namespace for poll and block IDs (again, except for .Dv AIO_NOID and .Dv AIO_ERR ) . .Pp A single event loop (including the global one) is not thread-safe, but different event loops are completely independent; provided the underlying OS calls (eg, .Xr malloc 3 and .Xr writev 2 ) are thread-safe, distinct threads may use distinct event loops without additional synchronization. (But see .Sx BUGS , below.) .Pp For compatability with previous versions of this library, .Dv AIO_PL_NOID is also defined; its semantics are identical to those of .Dv AIO_NOID , but .Dv AIO_NOID should be used for new code. .Sh OUTPUT QUEUES (OQ) These functions are designed to manage queues of data waiting to be written to file descriptors. There is one user-visible type, one manifest constant, and a bunch of functions. The type is a struct, for which there is a typedef, .Dv AIO_OQ , which represents a queue of data. (The name stands for Output Queue.) Its contents are visible, but this is only because C requires this (unless we were to use another level of indirection); the field names in the struct and their semantics are specifically not part of the interface spec. .Pp .Fn aio_oq_init must be called to initialize a newly-allocated .Dv AIO_OQ . Before this is done, the other calls' behaviours on that .Dv AIO_OQ are undefined. Note that .Dv aio_oq_init is actually a macro, in order to make the interface better able to tolerate certain internal changes. This is relevant mostly in that certain erroneous calls will generate compiler errors that talk about the macro rather than the function; it also means that any use of .Dv aio_oq_init that does not expand the macro (such as taking a pointer to it) will not work as it would if the description actually described the underlying function. See the source for full details. .Pp Data blocks are queued on an .Dv AIO_OQ with five functions. The most basic one is .Fn aio_oq_queue_point . This just queues a block of data to be written. Nothing in particular happens when the block is written. This is suitable when, for example, the data block is a string literal, or is part of some data structure that is known to outlive the queue of data. It normally returns 0, but if .Xr malloc 3 fails, it returns a negative number (and the block is not queued). .Pp Next is .Fn aio_oq_queue_copy , which is similar, except it makes a copy of the data block; the data argument need not remain valid past the point at which .Fn aio_oq_queue_copy returns. The copy is owned by the .Dv AIO_OQ and is not user-accessible; it will be freed automatically once it is no longer needed. It normally returns 0, but if .Xr malloc 3 fails, it returns a negative number (and the block is not queued). .Pp .Fn aio_oq_queue_free is designed for cases where you have a block of data to write and want to free it as soon as it's been written. It is like .Fn aio_oq_queue_copy except that, rather making a copy of the data block, it takes over the block you pass in, keeping it around until it's been entirely written, at which point it is freed with .Xr free 3 . It normally returns 0, but if .Xr malloc 3 fails, it returns a negative number (and the block is not queued). .Pp .Fn aio_oq_queue_cb is for cases where you want to take some special action when the block is completely written (for example, if it needs freeing with something other than .Xr free 3 ) . Besides the data block, it takes a callback function and argument; when the block has been completely written or is otherwise being discarded from the queue, the callback is called with the argument. It is otherwise like .Fn aio_oq_queue_point . Note that there is no requirement that the callback argument and data block be the same, though they may be; for example, calling this with .Xr free 3 as the callback and the data block pointer as the callback arg is effectively the same as calling .Fn aio_oq_queue_free . Note that the callback may be called during the .Fn aio_oq_queue_cb call, if the block is not actually saved in the queue (for example, if the block length is zero). .Pp .Fn aio_oq_queue_printf generates the data to be written from a .Xr printf 3 Ns -style format and data. In other respects, it is like .Fn aio_oq_queue_free : it handles all the memory management involved. .Pp It is also possible to queue .Sq special blocks. These amount to non-data objects in the queue; they cause a callback to be called when they are handled. There are two kinds of special objects. .Pp .Sq Write special objects interrupt the sequence of writing. A single .Xr writev 3 will never include data from both before and after a write special object. If a write special object is at the head of the queue when .Fn aio_oq_writev is called, its callback is called (see below), it is dropped, and .Fn aio_oq_writev returns .Dv AIO_WRITEV_SPECIAL . These are queued with type .Dv AIO_SPECIAL_WRITE . .Pp .Sq Drop special objects do not interrupt writing. Instead, when .Fn aio_oq_dropdata is dropping data from a queue, any drop special objects mixed with, or immediately following, the data being dropped have their callbacks called (see below) and are dropped along with the data. These are queued with type .Dv AIO_SPECIAL_DROP . .Pp Special objects are queued with .Fn aio_oq_queue_special , which takes the .Dv AIO_OQ pointer, the special object type .Pf ( Dv AIO_SPECIAL_WRITE or .Dv AIO_SPECIAL_DROP ) , a callback function (see below), and a generic pointer and an integer, which are saved to be passed to the callback when it is called. It normally returns 0, but if .Xr malloc 3 fails, it returns a negative number (and the block is not queued). .Pp It is also possible to have a special object change its type dynamically. .Fn aio_oq_set_special_typecb takes an .Dv AIO_OQ pointer and a callback which returns a type, such as can be passed to .Fn aio_oq_queue_special . The callback gets passed the same two arguments passed as the last two arguments to .Fn aio_oq_queue_special and passed to its callback. If the type returned is not valid, you get whatever the implementation finds most convenient. .Pp .Fn aio_oq_writev writes data. It takes a pointer to the .Dv AIO_OQ , the file descriptor to write to, and a maximum number of bytes to write (or a negative number for no limit). Its return value is normally the return value from .Xr writev 2 . If .Fn aio_oq_writev returns early because it found a write special object at the head of the queue, it returns the negative value .Dv AIO_WRITEV_SPECIAL ; if .Xr writev 2 fails, .Fn aio_oq_writev returns the negative value .Dv AIO_WRITEV_ERROR , without disturbing .Dv errno . .Fn aio_oq_writev can return zero; for example, if the head of the queue is a drop special object but the next thing in the queue is a write special object, .Fn aio_oq_writev will return zero without calling .Xr writev 2 . Also, if .Xr writev 2 returns zero for any reason, .Fn aio_oq_writev will reflect that to its caller. .Pp If .Fn aio_oq_writev is called with a zero maximum-bytes argument, it will never write anything, but it will process a write special object if one is at the head of the queue. .Pp .Fn aio_oq_custom_writev is just like .Fn aio_oq_writev except that, instead of taking a file descriptor, it takes a callback and a pointer-to-void argument to it. The callback is called just like .Xr writev 2 except that the pointer-to-void is passed where .Xr writev 2 takes a file descriptor; its return semantics are identical to .Xr writev 2 Ap s (except that the return type is .Fa int instead of .Fa ssize_t ) . This allows use of an application-specific function (such as a wrapper around .Xr send 2 ) instead of .Xr writev 2 . .Pp .Fn aio_oq_dropdata is used to drop written data. When .Fn aio_oq_writev writes data, it returns the amount written; .Fn aio_oq_dropdata is, normally, then called to drop the data. It takes the .Dv AIO_OQ pointer and a byte count; it drops that many bytes at the head of the queue. The return value is 1 if the queue is empty after dropping the data or 0 if not. Any drop special objects mixed with, or immediately following, the data being dropped will have their callbacks called (see below) and then will be dropped along with the data. .Pp If multiple callbacks are to be called within a single .Fn aio_oq_dropdata call, they are called in the order the relevant objects appear in the queue. .Pp .Fn aio_oq_dropdata_cb is just like .Fn aio_oq_dropdata except that, for every block of data dropped, it calls a callback. The calls are made in the same order the data occur in the queue. Drop special objects have their callbacks called in sequence where they occur in the queue. (If the callback is nil, the data callbacks are not made; when called this way, .Fn aio_oq_dropdata_cb is equivalent to .Fn aio_oq_dropdata . ) One callback is made for each chunk of data dropped, even if it's just part of a queued block. .Pp There are also four simple status enquiry functions: .Fn aio_oq_qlen simply returns the number of bytes queued (special objects are counted as zero bytes); .Fn aio_oq_headlen returns the number of unwritten bytes in the entry at the head of the queue, or the negative value .Dv AIO_OQ_HL_SPECIAL if the entry at the head of the queue is a special entry, or the negative value .Dv AIO_OQ_HL_EMPTY if the queue is empty (ie, there is no .Sq "entry at the head of the queue" ) . .Fn aio_oq_empty returns true if the queue is empty and false if not; and .Fn aio_oq_nonempty returns false if the queue is empty and true if not. Because of special objects, .Fn aio_oq_empty and .Fn aio_oq_nonempty are not necessarily equivalent to calling .Fn aio_oq_qlen and checking whether the result is zero. (They are equivalent exactly when there are no special objects at the head of the queue.) .Pp .Fn aio_oq_flush flushes anything remaining in the queue and cleans up any remaining data structures. This includes calling the callback for blocks queued with .Fn aio_oq_queue_cb and calling the callbacks for any special objects encountered (for more on which see below). This must be called before freeing the .Dv AIO_OQ to avoid leaking memory. The .Dv AIO_OQ is left in a state equivalent to that immediately after calling .Fn aio_oq_init on it. .Pp Finally, .Fn aio_oq_read is designed for cases where you want to use an .Dv AIO_OQ as a queue of data but you aren't interested in writing it to a file descriptor. .Fn aio_oq_read takes an .Dv AIO_OQ pointer, a buffer pointer, and a length (called .Dv N here); it reads up to .Dv N bytes from the head of the queue into the buffer, removing them from the queue. (These are the bytes that would be written if an .Fn aio_oq_writev were done that wrote .Dv N bytes.) Write special objects work much as they do for .Fn aio_oq_writev ; they interrupt the data sequence, causing a distinctive return. Drop special objects are affected as if by a .Fn aio_oq_dropdata call that dropped .Dv N bytes: they have their callbacks called and are dropped. .Fn aio_oq_read normally returns the number of bytes read, or zero if the queue is empty (possibly excepting drop special objects) when called. If the head-of-queue is a write special object, its callback is called, it is dropped, and .Fn aio_oq_read returns .Dv AIO_READ_SPECIAL . .Pf ( Fn aio_oq_read is a convenience routine; it can be built using .Fn aio_oq_custom_writev . ) .Pp A special object's callback can be called either normally, meaning from within .Fn aio_oq_writev or .Fn aio_oq_read for write specials and from within .Fn aio_oq_dropdata for drop specials, or it can be called from within .Fn aio_oq_flush . In the normal case, the callback's first argument is .Dv AIO_SPECIAL_NORMAL ; calls from .Fn aio_oq_flush instead pass .Dv AIO_SPECIAL_FLUSH for the first argument. .Pp Whenever a callback is called, there is the potential for it to throw out, such as with .Xr longjmp 3 . This will not break the library (that is, queues will always be left in a consistent state), but it will mean certain other things may not function as expected. .Pp If the callback for a block queued with .Fn aio_oq_queue_cb throws out of .Fn aio_oq_queue_cb (because the block isn't being saved at all), it is as if the throw happened immediately after .Fn aio_oq_queue_cb returned. If it throws out of .Fn aio_oq_dropdata , any data that would normally have been dropped after the block whose callback threw out will remain in the queue. If it throws out of .Fn aio_oq_flush , again, anything after the block will remain in the queue. .Pp If a special callback throws out of .Fn aio_oq_writev , it must have been a write special object, and it is as if the throw happened immediately after .Fn aio_oq_writev returned. If a special callback throws out of .Fn aio_oq_dropdata , it must have been a drop special object, and anything that would normally have been dropped after the special object whose callback threw out will remain in the queue. If it throws out of .Fn aio_oq_flush , anything after the block will remain in the queue. .Pp In all cases, the block whose callback threw out will have been removed from the queue. If the queue is being used for writing data, as designed, leaving things in the queue can mean they get written twice. .Pp As a convenience, there is a manifest constant .Dv AIO_STRLEN which may be passed as the length field to any of the functions which queue data from a pointer-and-length; when this is done, the block pointer is passed to .Xr strlen 3 and the return value is used as the length. .Sh PRIORITY QUEUES (PQ) These functions are designed to manage prioritized queues of packets waiting to be written to file descriptors. The .Fa \&_PQ_ functions parallel the .Fa \&_OQ_ functions, except that, where an .Dv AIO_OQ represents a queue of bytes waiting to be written, an .Dv AIO_PQ represents multiple prioritized queues of packets waiting to be written. .Pp When there is only one queue, this is a distinction without a difference, but an .Dv AIO_PQ actually represents multiple queues. Each queue has a priority; when writing, data is taken from the highest-priority queue that has anything to send. Packet boundaries are important in that it is only at packet boundaries that interleaving happens; that is, once even so much as a single byte is written from a given queue, nothing is ever written from any other queue until a packet boundary is reached. .Pp There is one user-visible type, .Dv AIO_PQ , numerous functions, and two manifest constants .Pf ( Dv AIO_STRLEN , which is shared with the .Fa aio_oq_\&* functions, and .Dv AIO_ALL_PRIO , used by .Fn aio_pq_flush ) . .Pp .Fn aio_pq_init initializes a newly-allocated .Dv AIO_PQ . Before this is done, other calls' behaviours on that .Dv AIO_PQ are undefined. Like .Fn aio_oq_init (qv), .Fn aio_pq_init is actually a macro; see .Fn aio_oq_init for how this is relevant. .Fn aio_pq_init takes one argument besides the .Dv AIO_PQ pointer. This must be at least 1; it specifies the number of priority levels to be supported by the .Dv AIO_PQ . Priorities range from zero through one less than this value. (Specifying a large value here will impair performance. Specifying a value less than 1 will cause .Fn aio_pq_init to do nothing.) This normally returns 0; if .Xr malloc 3 fails, it returns a negative value. .Pp .Fn aio_pq_deinit frees all memory referred to by an .Dv AIO_PQ ; this includes all the effects of .Fn aio_pq_flush with .Dv AIO_ALL_PRIO . .Fn aio_pq_deinit must be called before freeing the .Dv AIO_PQ to avoid leaking memory. After this call, the .Dv AIO_PQ is not usable without calling .Fn aio_pq_init on it again. .Pp Data blocks are queued on a .Dv AIO_PQ with five functions. In each case, the first argument is the .Dv AIO_PQ pointer and the second is the priority. (If the priority is out of range, the data block is dropped immediately rather than being queued anywhere; processing happens as if it had got written during the queue call.) The most basic queue function is .Fn aio_pq_queue_point . This just queues a block of data to be written. Nothing in particular happens when the block is written. This is suitable for, for example, queueing data obtained from a string literal. It normally returns 0; if .Xr malloc 3 fails, it returns a negative value. .Pp .Fn aio_pq_queue_copy is just like .Fn aio_pq_queue_point except that it allocates a copy of the block of data, so the data block passed in does not need to remain valid past the point of .Fn aio_pq_queue_copy returning. The copy will be freed automatically when it is no longer needed. .Pp .Fn aio_pq_queue_free is like .Fn aio_pq_queue_copy except that, instead of making a copy, the queue takes over the data block passed in, .Xr free 3 Ns No ing it once it has been entirely written. This exists as an optimization for cases where you would otherwise have to call .Fn aio_pq_queue_copy and then immediately free the block passed in. .Pp .Fn aio_pq_queue_cb is for cases where something needs to be done once the data block is written, but it's more complex than just freeing it. It takes a callback and a void pointer; once the data block is written, or is otherwise being dequeued, the callback is called with the specified argument. Note that the callback argument does not have to equal the data block being written, though of course it may. .Pp .Fn aio_pq_queue_printf is like .Fn aio_pq_queue_copy except that, instead of being copied from a pointer-and-length passed in, the data block is generated from a .Xr printf 3 Ns No \&-style format and argument list. .Pp It is particularly important to note that a packet is not available to be written until the whole packet has been queued. Packet boundaries are indicated with .Fn aio_pq_boundary . This indicates that the packet-in-progress for the relevant queue is now done; the packet is made available to be written. Zero-size packets (that is, two calls .Fn aio_pq_boundary for a given .Dv AIO_PQ and priority with no other data queued for it in between) are permitted, but are semantically equivalent to just a single boundary. Note that each priority is completely independent; a packet in progress for one priority has absolutely no bearing on how the queue behaves for other priorities. .Fn aio_pq_boundary normally returns 0; if .Xr malloc 3 fails, it returns a negative value. .Pp .Fn aio_pq_writev_drop writes data, selecting it from the various queues, preferring queues with higher priority values over those with lower when it has the choice. Any data successfully written is automatically dropped from the relevant queue(s). If .Xr writev 3 succeeds (returns a nonnegative value), that value is returned by .Fn aio_pq_writev_drop (after dropping any relevant data). If .Xr writev 3 fails (returns a negative value), .Fn aio_pq_writev_drop returns the negative value .Dv AIO_WRITEV_ERROR , without affecting the .Fa errno value set by .Xr writev 3 , and does not drop any data. The first argument to .Fn aio_pq_writev_drop is of course the relevant .Dv AIO_PQ pointer; the second argument is the file descriptor to write to and the third is the maximum number of bytes to write, or a negative number for no limit. .Pp There are three simple status inquiry functions: .Fn aio_pq_qlen returns the number of bytes queued on a given .Dv AIO_PQ , summed across all priorities; .Fn aio_pq_empty returns true if the .Dv AIO_PQ has nothing available to be written and false otherwise; and .Fn aio_pq_nonempty returns false if the .Dv AIO_PQ has nothing available to be written and true otherwise. (Note that, in particular, an unfinished partial packet is not counted in the queue length and does not make a queue nonempty.) .Pp Finally, .Fn aio_pq_flush flushes everything queued for a given priority, dequeueing all queued blocks (including freeing blocks queued with .Fn aio_pq_queue_copy , calling the callbacks for blocks queued with .Fn aio_pq_queue_cb , and the like); this also discards any partial packet in progress for that priority. There is a manifest constant, .Dv AIO_ALL_PRIO , which has a negative value and can be stored in an int; this may be passed as the priority to flush all priorities. Note that, unlike .Fn aio_oq_flush , .Fn aio_pq_flush is never suitable for use as a pre-free cleanup routine; for that, see .Fn aio_pq_deinit . .Pp .Dv AIO_STRLEN works the same way for the .Fa aio_pq_\&* functions that it does for the .Fa aio_oq_\&* functions. .Sh SHARED There is one function called by all three of the above subsystems. This is called .Fn aio__panic . It is called upon detecting an internal inconsistency or a blatantly incorrect call (passing certain invalid IDs to .Fn aio_remove_poll is an example). The argument pattern matches that of .Xr printf 3 , but note that the return type does not. If it returns, no promises are made about what happens. If it throws out, the library attempts to leave data structures in a reasonable state, but this is not always possible and is specifically not promised. .Pp If the calling program does not define .Fn aio__panic , the library provides a version that saves the panic message to an internal variable (for post-mortem debugger access) and calls .Xr abort 3 . .Sh BUGS Error handling could be improved. .Fn aio__panic provides rudimentary error-handling capability, but something better should be designed. Also, not all errors are noticed. .Pp .Dv aio_pq_writev_drop and .Dv aio_oq_writev are not actually reentrant or threadsafe. This could and probably should be fixed, but fixing it would impose additional overhead, overhead which for most uses would be unnecessary. .Sh AUTHOR der Mouse, .Aq mouse@rodents.montreal.qc.ca .