PostgreSQL Source Code git master
aio.c File Reference
#include "postgres.h"
#include "lib/ilist.h"
#include "miscadmin.h"
#include "port/atomics.h"
#include "storage/aio.h"
#include "storage/aio_internal.h"
#include "storage/aio_subsys.h"
#include "utils/guc.h"
#include "utils/guc_hooks.h"
#include "utils/resowner.h"
#include "utils/wait_event_types.h"
Include dependency graph for aio.c:

Go to the source code of this file.

Macros

#define PGAIO_HS_TOSTR_CASE(sym)   case PGAIO_HS_##sym: return #sym
 

Functions

static void pgaio_io_update_state (PgAioHandle *ioh, PgAioHandleState new_state)
 
static void pgaio_io_reclaim (PgAioHandle *ioh)
 
static void pgaio_io_resowner_register (PgAioHandle *ioh)
 
static void pgaio_io_wait_for_free (void)
 
static PgAioHandlepgaio_io_from_wref (PgAioWaitRef *iow, uint64 *ref_generation)
 
static const char * pgaio_io_state_get_name (PgAioHandleState s)
 
static void pgaio_io_wait (PgAioHandle *ioh, uint64 ref_generation)
 
PgAioHandlepgaio_io_acquire (struct ResourceOwnerData *resowner, PgAioReturn *ret)
 
PgAioHandlepgaio_io_acquire_nb (struct ResourceOwnerData *resowner, PgAioReturn *ret)
 
void pgaio_io_release (PgAioHandle *ioh)
 
void pgaio_io_release_resowner (dlist_node *ioh_node, bool on_error)
 
void pgaio_io_set_flag (PgAioHandle *ioh, PgAioHandleFlags flag)
 
int pgaio_io_get_id (PgAioHandle *ioh)
 
ProcNumber pgaio_io_get_owner (PgAioHandle *ioh)
 
void pgaio_io_get_wref (PgAioHandle *ioh, PgAioWaitRef *iow)
 
void pgaio_io_stage (PgAioHandle *ioh, PgAioOp op)
 
bool pgaio_io_needs_synchronous_execution (PgAioHandle *ioh)
 
void pgaio_io_prepare_submit (PgAioHandle *ioh)
 
void pgaio_io_process_completion (PgAioHandle *ioh, int result)
 
bool pgaio_io_was_recycled (PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state)
 
const char * pgaio_io_get_state_name (PgAioHandle *ioh)
 
const char * pgaio_result_status_string (PgAioResultStatus rs)
 
void pgaio_wref_clear (PgAioWaitRef *iow)
 
bool pgaio_wref_valid (PgAioWaitRef *iow)
 
int pgaio_wref_get_id (PgAioWaitRef *iow)
 
void pgaio_wref_wait (PgAioWaitRef *iow)
 
bool pgaio_wref_check_done (PgAioWaitRef *iow)
 
void pgaio_enter_batchmode (void)
 
void pgaio_exit_batchmode (void)
 
bool pgaio_have_staged (void)
 
void pgaio_submit_staged (void)
 
void pgaio_error_cleanup (void)
 
void AtEOXact_Aio (bool is_commit)
 
void pgaio_closing_fd (int fd)
 
void pgaio_shutdown (int code, Datum arg)
 
void assign_io_method (int newval, void *extra)
 
bool check_io_max_concurrency (int *newval, void **extra, GucSource source)
 

Variables

const struct config_enum_entry io_method_options []
 
int io_method = DEFAULT_IO_METHOD
 
int io_max_concurrency = -1
 
PgAioCtlpgaio_ctl
 
PgAioBackendpgaio_my_backend
 
static const IoMethodOps *const pgaio_method_ops_table []
 
const IoMethodOpspgaio_method_ops
 

Macro Definition Documentation

◆ PGAIO_HS_TOSTR_CASE

#define PGAIO_HS_TOSTR_CASE (   sym)    case PGAIO_HS_##sym: return #sym

Function Documentation

◆ assign_io_method()

void assign_io_method ( int  newval,
void *  extra 
)

Definition at line 1231 of file aio.c.

1232{
1235
1237}
const IoMethodOps * pgaio_method_ops
Definition: aio.c:96
static const IoMethodOps *const pgaio_method_ops_table[]
Definition: aio.c:87
const struct config_enum_entry io_method_options[]
Definition: aio.c:67
#define lengthof(array)
Definition: c.h:759
#define newval
Assert(PointerIsAligned(start, uint64))

References Assert(), io_method_options, lengthof, newval, pgaio_method_ops, and pgaio_method_ops_table.

◆ AtEOXact_Aio()

void AtEOXact_Aio ( bool  is_commit)

Definition at line 1106 of file aio.c.

1107{
1108 /*
1109 * We should never be in batch mode at transactional boundaries. In case
1110 * an error was thrown while in batch mode, pgaio_error_cleanup() should
1111 * have exited batchmode.
1112 *
1113 * In case we are in batchmode somehow, make sure to submit all staged
1114 * IOs, other backends may need them to complete to continue.
1115 */
1117 {
1119 elog(WARNING, "open AIO batch at end of (sub-)transaction");
1120 }
1121
1122 /*
1123 * As we aren't in batchmode, there shouldn't be any unsubmitted IOs.
1124 */
1126}
PgAioBackend * pgaio_my_backend
Definition: aio.c:84
void pgaio_error_cleanup(void)
Definition: aio.c:1078
#define WARNING
Definition: elog.h:36
#define elog(elevel,...)
Definition: elog.h:225
uint16 num_staged_ios
Definition: aio_internal.h:208

References Assert(), elog, PgAioBackend::in_batchmode, PgAioBackend::num_staged_ios, pgaio_error_cleanup(), pgaio_my_backend, and WARNING.

Referenced by AbortSubTransaction(), AbortTransaction(), CommitTransaction(), pgaio_shutdown(), and PrepareTransaction().

◆ check_io_max_concurrency()

bool check_io_max_concurrency ( int *  newval,
void **  extra,
GucSource  source 
)

Definition at line 1240 of file aio.c.

1241{
1242 if (*newval == -1)
1243 {
1244 /*
1245 * Auto-tuning will be applied later during startup, as auto-tuning
1246 * depends on the value of various GUCs.
1247 */
1248 return true;
1249 }
1250 else if (*newval == 0)
1251 {
1252 GUC_check_errdetail("Only -1 or values bigger than 0 are valid.");
1253 return false;
1254 }
1255
1256 return true;
1257}
#define GUC_check_errdetail
Definition: guc.h:481

References GUC_check_errdetail, and newval.

◆ pgaio_closing_fd()

void pgaio_closing_fd ( int  fd)

Definition at line 1133 of file aio.c.

1134{
1135 /*
1136 * Might be called before AIO is initialized or in a subprocess that
1137 * doesn't use AIO.
1138 */
1139 if (!pgaio_my_backend)
1140 return;
1141
1142 /*
1143 * For now just submit all staged IOs - we could be more selective, but
1144 * it's probably not worth it.
1145 */
1147 {
1149 "submitting %d IOs before FD %d gets closed",
1152 }
1153
1154 /*
1155 * If requested by the IO method, wait for all IOs that use the
1156 * to-be-closed FD.
1157 */
1159 {
1160 /*
1161 * As waiting for one IO to complete may complete multiple IOs, we
1162 * can't just use a mutable list iterator. The maximum number of
1163 * in-flight IOs is fairly small, so just restart the loop after
1164 * waiting for an IO.
1165 */
1167 {
1168 dlist_iter iter;
1169 PgAioHandle *ioh = NULL;
1170
1172 {
1173 ioh = dclist_container(PgAioHandle, node, iter.cur);
1174
1175 if (pgaio_io_uses_fd(ioh, fd))
1176 break;
1177 else
1178 ioh = NULL;
1179 }
1180
1181 if (!ioh)
1182 break;
1183
1185 "waiting for IO before FD %d gets closed, %d in-flight IOs",
1187
1188 /* see comment in pgaio_io_wait_for_free() about raciness */
1189 pgaio_io_wait(ioh, ioh->generation);
1190 }
1191 }
1192}
void pgaio_submit_staged(void)
Definition: aio.c:1036
static void pgaio_io_wait(PgAioHandle *ioh, uint64 ref_generation)
Definition: aio.c:544
#define pgaio_debug(elevel, msg,...)
Definition: aio_internal.h:376
#define pgaio_debug_io(elevel, ioh, msg,...)
Definition: aio_internal.h:389
bool pgaio_io_uses_fd(PgAioHandle *ioh, int fd)
Definition: aio_io.c:197
#define DEBUG2
Definition: elog.h:29
#define dclist_container(type, membername, ptr)
Definition: ilist.h:947
static uint32 dclist_count(const dclist_head *head)
Definition: ilist.h:932
static bool dclist_is_empty(const dclist_head *head)
Definition: ilist.h:682
#define dclist_foreach(iter, lhead)
Definition: ilist.h:970
static int fd(const char *x, int i)
Definition: preproc-init.c:105
bool wait_on_fd_before_close
Definition: aio_internal.h:262
dclist_head in_flight_ios
Definition: aio_internal.h:219
uint64 generation
Definition: aio_internal.h:146
dlist_node * cur
Definition: ilist.h:179

References dlist_iter::cur, dclist_container, dclist_count(), dclist_foreach, dclist_is_empty(), DEBUG2, fd(), PgAioHandle::generation, PgAioBackend::in_flight_ios, PgAioBackend::num_staged_ios, pgaio_debug, pgaio_debug_io, pgaio_io_uses_fd(), pgaio_io_wait(), pgaio_method_ops, pgaio_my_backend, pgaio_submit_staged(), and IoMethodOps::wait_on_fd_before_close.

Referenced by CloseTransientFile(), FileClose(), FreeDesc(), and LruDelete().

◆ pgaio_enter_batchmode()

void pgaio_enter_batchmode ( void  )

Definition at line 994 of file aio.c.

995{
997 elog(ERROR, "starting batch while batch already in progress");
999}
#define ERROR
Definition: elog.h:39

References elog, ERROR, PgAioBackend::in_batchmode, and pgaio_my_backend.

Referenced by batch_start(), read_rel_block_ll(), and read_stream_look_ahead().

◆ pgaio_error_cleanup()

void pgaio_error_cleanup ( void  )

Definition at line 1078 of file aio.c.

1079{
1080 /*
1081 * It is possible that code errored out after pgaio_enter_batchmode() but
1082 * before pgaio_exit_batchmode() was called. In that case we need to
1083 * submit the IO now.
1084 */
1086 {
1088
1090 }
1091
1092 /*
1093 * As we aren't in batchmode, there shouldn't be any unsubmitted IOs.
1094 */
1096}

References Assert(), PgAioBackend::in_batchmode, PgAioBackend::num_staged_ios, pgaio_my_backend, and pgaio_submit_staged().

Referenced by AbortSubTransaction(), AbortTransaction(), AtEOXact_Aio(), AutoVacLauncherMain(), BackgroundWriterMain(), CheckpointerMain(), pgarch_archiveXlog(), WalSndErrorCleanup(), WalSummarizerMain(), and WalWriterMain().

◆ pgaio_exit_batchmode()

void pgaio_exit_batchmode ( void  )

◆ pgaio_have_staged()

bool pgaio_have_staged ( void  )

◆ pgaio_io_acquire()

PgAioHandle * pgaio_io_acquire ( struct ResourceOwnerData resowner,
PgAioReturn ret 
)

Definition at line 173 of file aio.c.

174{
175 PgAioHandle *h;
176
177 while (true)
178 {
179 h = pgaio_io_acquire_nb(resowner, ret);
180
181 if (h != NULL)
182 return h;
183
184 /*
185 * Evidently all handles by this backend are in use. Just wait for
186 * some to complete.
187 */
189 }
190}
static void pgaio_io_wait_for_free(void)
Definition: aio.c:710
PgAioHandle * pgaio_io_acquire_nb(struct ResourceOwnerData *resowner, PgAioReturn *ret)
Definition: aio.c:199

References pgaio_io_acquire_nb(), and pgaio_io_wait_for_free().

Referenced by AsyncReadBuffers(), handle_get(), handle_get_and_error(), handle_get_release(), handle_get_twice(), and read_rel_block_ll().

◆ pgaio_io_acquire_nb()

PgAioHandle * pgaio_io_acquire_nb ( struct ResourceOwnerData resowner,
PgAioReturn ret 
)

Definition at line 199 of file aio.c.

200{
202 {
205 }
206
208 elog(ERROR, "API violation: Only one IO can be handed out");
209
211 {
213 PgAioHandle *ioh = dclist_container(PgAioHandle, node, ion);
214
215 Assert(ioh->state == PGAIO_HS_IDLE);
217
220
221 if (resowner)
223
224 if (ret)
225 {
226 ioh->report_return = ret;
228 }
229
230 return ioh;
231 }
232
233 return NULL;
234}
static void pgaio_io_update_state(PgAioHandle *ioh, PgAioHandleState new_state)
Definition: aio.c:374
static void pgaio_io_resowner_register(PgAioHandle *ioh)
Definition: aio.c:390
@ PGAIO_HS_IDLE
Definition: aio_internal.h:46
@ PGAIO_HS_HANDED_OUT
Definition: aio_internal.h:53
#define PGAIO_SUBMIT_BATCH_SIZE
Definition: aio_internal.h:28
@ PGAIO_RS_UNKNOWN
Definition: aio_types.h:80
ProcNumber MyProcNumber
Definition: globals.c:91
static dlist_node * dclist_pop_head_node(dclist_head *head)
Definition: ilist.h:789
dclist_head idle_ios
Definition: aio_internal.h:191
PgAioHandle * handed_out_io
Definition: aio_internal.h:200
int32 owner_procno
Definition: aio_internal.h:125
PgAioReturn * report_return
Definition: aio_internal.h:171
PgAioHandleState state
Definition: aio_internal.h:99
uint32 status
Definition: aio_types.h:108
PgAioResult result
Definition: aio_types.h:132

References Assert(), dclist_container, dclist_is_empty(), dclist_pop_head_node(), elog, ERROR, PgAioBackend::handed_out_io, PgAioBackend::idle_ios, MyProcNumber, PgAioBackend::num_staged_ios, PgAioHandle::owner_procno, PGAIO_HS_HANDED_OUT, PGAIO_HS_IDLE, pgaio_io_resowner_register(), pgaio_io_update_state(), pgaio_my_backend, PGAIO_RS_UNKNOWN, PGAIO_SUBMIT_BATCH_SIZE, pgaio_submit_staged(), PgAioHandle::report_return, PgAioReturn::result, PgAioHandle::state, and PgAioResult::status.

Referenced by AsyncReadBuffers(), and pgaio_io_acquire().

◆ pgaio_io_from_wref()

static PgAioHandle * pgaio_io_from_wref ( PgAioWaitRef iow,
uint64 ref_generation 
)
static

Definition at line 809 of file aio.c.

810{
811 PgAioHandle *ioh;
812
814
815 ioh = &pgaio_ctl->io_handles[iow->aio_index];
816
817 *ref_generation = ((uint64) iow->generation_upper) << 32 |
818 iow->generation_lower;
819
820 Assert(*ref_generation != 0);
821
822 return ioh;
823}
PgAioCtl * pgaio_ctl
Definition: aio.c:81
uint64_t uint64
Definition: c.h:503
PgAioHandle * io_handles
Definition: aio_internal.h:246
uint32 io_handle_count
Definition: aio_internal.h:245
uint32 generation_upper
Definition: aio_types.h:45
uint32 aio_index
Definition: aio_types.h:35
uint32 generation_lower
Definition: aio_types.h:46

References PgAioWaitRef::aio_index, Assert(), PgAioWaitRef::generation_lower, PgAioWaitRef::generation_upper, PgAioCtl::io_handle_count, PgAioCtl::io_handles, and pgaio_ctl.

Referenced by pgaio_wref_check_done(), and pgaio_wref_wait().

◆ pgaio_io_get_id()

int pgaio_io_get_id ( PgAioHandle ioh)

◆ pgaio_io_get_owner()

ProcNumber pgaio_io_get_owner ( PgAioHandle ioh)

Definition at line 343 of file aio.c.

344{
345 return ioh->owner_procno;
346}

References PgAioHandle::owner_procno.

Referenced by buffer_readv_complete(), and smgr_aio_reopen().

◆ pgaio_io_get_state_name()

const char * pgaio_io_get_state_name ( PgAioHandle ioh)

Definition at line 846 of file aio.c.

847{
848 return pgaio_io_state_get_name(ioh->state);
849}
static const char * pgaio_io_state_get_name(PgAioHandleState s)
Definition: aio.c:826

References pgaio_io_state_get_name(), and PgAioHandle::state.

Referenced by pg_get_aios().

◆ pgaio_io_get_wref()

void pgaio_io_get_wref ( PgAioHandle ioh,
PgAioWaitRef iow 
)

◆ pgaio_io_needs_synchronous_execution()

bool pgaio_io_needs_synchronous_execution ( PgAioHandle ioh)

Definition at line 455 of file aio.c.

456{
457 /*
458 * If the caller said to execute the IO synchronously, do so.
459 *
460 * XXX: We could optimize the logic when to execute synchronously by first
461 * checking if there are other IOs in flight and only synchronously
462 * executing if not. Unclear whether that'll be sufficiently common to be
463 * worth worrying about.
464 */
465 if (ioh->flags & PGAIO_HF_SYNCHRONOUS)
466 return true;
467
468 /* Check if the IO method requires synchronous execution of IO */
471
472 return false;
473}
@ PGAIO_HF_SYNCHRONOUS
Definition: aio.h:70
bool(* needs_synchronous_execution)(PgAioHandle *ioh)
Definition: aio_internal.h:288

References PgAioHandle::flags, IoMethodOps::needs_synchronous_execution, PGAIO_HF_SYNCHRONOUS, and pgaio_method_ops.

Referenced by pgaio_io_stage().

◆ pgaio_io_prepare_submit()

void pgaio_io_prepare_submit ( PgAioHandle ioh)

Definition at line 482 of file aio.c.

483{
485
487}
@ PGAIO_HS_SUBMITTED
Definition: aio_internal.h:69
static void dclist_push_tail(dclist_head *head, dlist_node *node)
Definition: ilist.h:709
dlist_node node
Definition: aio_internal.h:140

References dclist_push_tail(), PgAioBackend::in_flight_ios, PgAioHandle::node, PGAIO_HS_SUBMITTED, pgaio_io_update_state(), and pgaio_my_backend.

Referenced by pgaio_io_stage(), and pgaio_worker_submit().

◆ pgaio_io_process_completion()

void pgaio_io_process_completion ( PgAioHandle ioh,
int  result 
)

Definition at line 500 of file aio.c.

501{
503
505
506 ioh->result = result;
507
509
510 pgaio_io_call_inj(ioh, "aio-process-completion-before-shared");
511
513
515
516 /* condition variable broadcast ensures state is visible before wakeup */
518
519 /* contains call to pgaio_io_call_complete_local() */
520 if (ioh->owner_procno == MyProcNumber)
521 pgaio_io_reclaim(ioh);
522}
static void pgaio_io_reclaim(PgAioHandle *ioh)
Definition: aio.c:629
void pgaio_io_call_complete_shared(PgAioHandle *ioh)
Definition: aio_callback.c:225
@ PGAIO_HS_COMPLETED_SHARED
Definition: aio_internal.h:82
@ PGAIO_HS_COMPLETED_IO
Definition: aio_internal.h:72
#define pgaio_io_call_inj(ioh, injection_point)
Definition: aio_internal.h:407
void ConditionVariableBroadcast(ConditionVariable *cv)
volatile uint32 CritSectionCount
Definition: globals.c:46
ConditionVariable cv
Definition: aio_internal.h:153

References Assert(), ConditionVariableBroadcast(), CritSectionCount, PgAioHandle::cv, MyProcNumber, PgAioHandle::owner_procno, PGAIO_HS_COMPLETED_IO, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_SUBMITTED, pgaio_io_call_complete_shared(), pgaio_io_call_inj, pgaio_io_reclaim(), pgaio_io_update_state(), PgAioHandle::result, and PgAioHandle::state.

Referenced by IoWorkerMain(), and pgaio_io_perform_synchronously().

◆ pgaio_io_reclaim()

static void pgaio_io_reclaim ( PgAioHandle ioh)
static

Definition at line 629 of file aio.c.

630{
631 /* This is only ok if it's our IO */
633 Assert(ioh->state != PGAIO_HS_IDLE);
634
635 /*
636 * It's a bit ugly, but right now the easiest place to put the execution
637 * of local completion callbacks is this function, as we need to execute
638 * local callbacks just before reclaiming at multiple callsites.
639 */
641 {
642 PgAioResult local_result;
643
644 local_result = pgaio_io_call_complete_local(ioh);
646
647 if (ioh->report_return)
648 {
649 ioh->report_return->result = local_result;
651 }
652 }
653
655 "reclaiming: distilled_result: (status %s, id %u, error_data %d), raw_result: %d",
657 ioh->distilled_result.id,
659 ioh->result);
660
661 /* if the IO has been defined, it's on the in-flight list, remove */
662 if (ioh->state != PGAIO_HS_HANDED_OUT)
664
665 if (ioh->resowner)
666 {
668 ioh->resowner = NULL;
669 }
670
671 Assert(!ioh->resowner);
672
673 /*
674 * Update generation & state first, before resetting the IO's fields,
675 * otherwise a concurrent "viewer" could think the fields are valid, even
676 * though they are being reset. Increment the generation first, so that
677 * we can assert elsewhere that we never wait for an IDLE IO. While it's
678 * a bit weird for the state to go backwards for a generation, it's OK
679 * here, as there cannot be references to the "reborn" IO yet. Can't
680 * update both at once, so something has to give.
681 */
682 ioh->generation++;
684
685 /* ensure the state update is visible before we reset fields */
687
688 ioh->op = PGAIO_OP_INVALID;
690 ioh->flags = 0;
691 ioh->num_callbacks = 0;
692 ioh->handle_data_len = 0;
693 ioh->report_return = NULL;
694 ioh->result = 0;
696
697 /*
698 * We push the IO to the head of the idle IO list, that seems more cache
699 * efficient in cases where only a few IOs are used.
700 */
702}
const char * pgaio_result_status_string(PgAioResultStatus rs)
Definition: aio.c:852
@ PGAIO_TID_INVALID
Definition: aio.h:119
@ PGAIO_OP_INVALID
Definition: aio.h:90
PgAioResult pgaio_io_call_complete_local(PgAioHandle *ioh)
Definition: aio_callback.c:282
@ PGAIO_HS_COMPLETED_LOCAL
Definition: aio_internal.h:89
#define pg_write_barrier()
Definition: atomics.h:157
#define DEBUG4
Definition: elog.h:27
static void dclist_delete_from(dclist_head *head, dlist_node *node)
Definition: ilist.h:763
static void dclist_push_head(dclist_head *head, dlist_node *node)
Definition: ilist.h:693
void ResourceOwnerForgetAioHandle(ResourceOwner owner, struct dlist_node *ioh_node)
Definition: resowner.c:1110
PgAioTargetData target_data
Definition: aio_internal.h:181
struct ResourceOwnerData * resowner
Definition: aio_internal.h:142
PgAioResult distilled_result
Definition: aio_internal.h:156
uint8 handle_data_len
Definition: aio_internal.h:122
PgAioOp op
Definition: aio_internal.h:105
uint8 num_callbacks
Definition: aio_internal.h:110
dlist_node resowner_node
Definition: aio_internal.h:143
PgAioTargetID target
Definition: aio_internal.h:102
uint32 error_data
Definition: aio_types.h:111
uint32 id
Definition: aio_types.h:105
PgAioTargetData target_data
Definition: aio_types.h:133

References Assert(), dclist_delete_from(), dclist_push_head(), DEBUG4, PgAioHandle::distilled_result, PgAioResult::error_data, PgAioHandle::flags, PgAioHandle::generation, PgAioHandle::handle_data_len, PgAioResult::id, PgAioBackend::idle_ios, PgAioBackend::in_flight_ios, MyProcNumber, PgAioHandle::node, PgAioHandle::num_callbacks, PgAioHandle::op, PgAioHandle::owner_procno, pg_write_barrier, pgaio_debug_io, PGAIO_HS_COMPLETED_LOCAL, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_HANDED_OUT, PGAIO_HS_IDLE, pgaio_io_call_complete_local(), pgaio_io_update_state(), pgaio_my_backend, PGAIO_OP_INVALID, pgaio_result_status_string(), PGAIO_RS_UNKNOWN, PGAIO_TID_INVALID, PgAioHandle::report_return, ResourceOwnerForgetAioHandle(), PgAioHandle::resowner, PgAioHandle::resowner_node, PgAioHandle::result, PgAioReturn::result, PgAioHandle::state, PgAioResult::status, PgAioHandle::target, PgAioHandle::target_data, and PgAioReturn::target_data.

Referenced by pgaio_io_process_completion(), pgaio_io_release(), pgaio_io_release_resowner(), pgaio_io_wait(), pgaio_io_wait_for_free(), and pgaio_wref_check_done().

◆ pgaio_io_release()

void pgaio_io_release ( PgAioHandle ioh)

Definition at line 242 of file aio.c.

243{
245 {
247 Assert(ioh->resowner);
248
250 pgaio_io_reclaim(ioh);
251 }
252 else
253 {
254 elog(ERROR, "release in unexpected state");
255 }
256}

References Assert(), elog, ERROR, PgAioBackend::handed_out_io, PGAIO_HS_HANDED_OUT, pgaio_io_reclaim(), pgaio_my_backend, PgAioHandle::resowner, and PgAioHandle::state.

Referenced by AsyncReadBuffers(), handle_get_release(), and handle_release_last().

◆ pgaio_io_release_resowner()

void pgaio_io_release_resowner ( dlist_node ioh_node,
bool  on_error 
)

Definition at line 262 of file aio.c.

263{
264 PgAioHandle *ioh = dlist_container(PgAioHandle, resowner_node, ioh_node);
265
266 Assert(ioh->resowner);
267
269 ioh->resowner = NULL;
270
271 switch (ioh->state)
272 {
273 case PGAIO_HS_IDLE:
274 elog(ERROR, "unexpected");
275 break;
278
280 {
282 if (!on_error)
283 elog(WARNING, "leaked AIO handle");
284 }
285
286 pgaio_io_reclaim(ioh);
287 break;
288 case PGAIO_HS_DEFINED:
289 case PGAIO_HS_STAGED:
290 if (!on_error)
291 elog(WARNING, "AIO handle was not submitted");
293 break;
298 /* this is expected to happen */
299 break;
300 }
301
302 /*
303 * Need to unregister the reporting of the IO's result, the memory it's
304 * referencing likely has gone away.
305 */
306 if (ioh->report_return)
307 ioh->report_return = NULL;
308}
#define dlist_container(type, membername, ptr)
Definition: ilist.h:593

References Assert(), dlist_container, elog, ERROR, PgAioBackend::handed_out_io, PGAIO_HS_COMPLETED_IO, PGAIO_HS_COMPLETED_LOCAL, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_DEFINED, PGAIO_HS_HANDED_OUT, PGAIO_HS_IDLE, PGAIO_HS_STAGED, PGAIO_HS_SUBMITTED, pgaio_io_reclaim(), pgaio_my_backend, pgaio_submit_staged(), PgAioHandle::report_return, ResourceOwnerForgetAioHandle(), PgAioHandle::resowner, PgAioHandle::resowner_node, PgAioHandle::state, and WARNING.

Referenced by ResourceOwnerReleaseInternal().

◆ pgaio_io_resowner_register()

static void pgaio_io_resowner_register ( PgAioHandle ioh)
static

Definition at line 390 of file aio.c.

391{
392 Assert(!ioh->resowner);
394
397}
ResourceOwner CurrentResourceOwner
Definition: resowner.c:173
void ResourceOwnerRememberAioHandle(ResourceOwner owner, struct dlist_node *ioh_node)
Definition: resowner.c:1104

References Assert(), CurrentResourceOwner, ResourceOwnerRememberAioHandle(), PgAioHandle::resowner, and PgAioHandle::resowner_node.

Referenced by pgaio_io_acquire_nb().

◆ pgaio_io_set_flag()

void pgaio_io_set_flag ( PgAioHandle ioh,
PgAioHandleFlags  flag 
)

Definition at line 318 of file aio.c.

319{
321
322 ioh->flags |= flag;
323}
char * flag(int b)
Definition: test-ctype.c:33

References Assert(), flag(), PgAioHandle::flags, PGAIO_HS_HANDED_OUT, and PgAioHandle::state.

Referenced by AsyncReadBuffers(), mdstartreadv(), and read_rel_block_ll().

◆ pgaio_io_stage()

void pgaio_io_stage ( PgAioHandle ioh,
PgAioOp  op 
)

Definition at line 405 of file aio.c.

406{
407 bool needs_synchronous;
408
412
413 ioh->op = op;
414 ioh->result = 0;
415
417
418 /* allow a new IO to be staged */
420
422
424
425 /*
426 * Synchronous execution has to be executed, well, synchronously, so check
427 * that first.
428 */
429 needs_synchronous = pgaio_io_needs_synchronous_execution(ioh);
430
432 "staged (synchronous: %d, in_batch: %d)",
433 needs_synchronous, pgaio_my_backend->in_batchmode);
434
435 if (!needs_synchronous)
436 {
439
440 /*
441 * Unless code explicitly opted into batching IOs, submit the IO
442 * immediately.
443 */
446 }
447 else
448 {
451 }
452}
bool pgaio_io_needs_synchronous_execution(PgAioHandle *ioh)
Definition: aio.c:455
void pgaio_io_prepare_submit(PgAioHandle *ioh)
Definition: aio.c:482
void pgaio_io_call_stage(PgAioHandle *ioh)
Definition: aio_callback.c:199
void pgaio_io_perform_synchronously(PgAioHandle *ioh)
Definition: aio_io.c:116
bool pgaio_io_has_target(PgAioHandle *ioh)
Definition: aio_target.c:40
#define DEBUG3
Definition: elog.h:28
PgAioHandle * staged_ios[PGAIO_SUBMIT_BATCH_SIZE]
Definition: aio_internal.h:209

References Assert(), DEBUG3, PgAioBackend::handed_out_io, PgAioBackend::in_batchmode, PgAioBackend::num_staged_ios, PgAioHandle::op, pgaio_debug_io, PGAIO_HS_DEFINED, PGAIO_HS_HANDED_OUT, PGAIO_HS_STAGED, pgaio_io_call_stage(), pgaio_io_has_target(), pgaio_io_needs_synchronous_execution(), pgaio_io_perform_synchronously(), pgaio_io_prepare_submit(), pgaio_io_update_state(), pgaio_my_backend, PGAIO_SUBMIT_BATCH_SIZE, pgaio_submit_staged(), PgAioHandle::result, PgAioBackend::staged_ios, and PgAioHandle::state.

Referenced by pgaio_io_start_readv(), and pgaio_io_start_writev().

◆ pgaio_io_state_get_name()

static const char * pgaio_io_state_get_name ( PgAioHandleState  s)
static

Definition at line 826 of file aio.c.

827{
828#define PGAIO_HS_TOSTR_CASE(sym) case PGAIO_HS_##sym: return #sym
829 switch (s)
830 {
832 PGAIO_HS_TOSTR_CASE(HANDED_OUT);
833 PGAIO_HS_TOSTR_CASE(DEFINED);
834 PGAIO_HS_TOSTR_CASE(STAGED);
835 PGAIO_HS_TOSTR_CASE(SUBMITTED);
836 PGAIO_HS_TOSTR_CASE(COMPLETED_IO);
837 PGAIO_HS_TOSTR_CASE(COMPLETED_SHARED);
838 PGAIO_HS_TOSTR_CASE(COMPLETED_LOCAL);
839 }
840#undef PGAIO_HS_TOSTR_CASE
841
842 return NULL; /* silence compiler */
843}
#define PGAIO_HS_TOSTR_CASE(sym)

References PGAIO_HS_TOSTR_CASE.

Referenced by pgaio_io_get_state_name(), and pgaio_io_update_state().

◆ pgaio_io_update_state()

static void pgaio_io_update_state ( PgAioHandle ioh,
PgAioHandleState  new_state 
)
inlinestatic

Definition at line 374 of file aio.c.

375{
377 "updating state to %s",
378 pgaio_io_state_get_name(new_state));
379
380 /*
381 * Ensure the changes signified by the new state are visible before the
382 * new state becomes visible.
383 */
385
386 ioh->state = new_state;
387}
#define DEBUG5
Definition: elog.h:26

References DEBUG5, pg_write_barrier, pgaio_debug_io, pgaio_io_state_get_name(), and PgAioHandle::state.

Referenced by pgaio_io_acquire_nb(), pgaio_io_prepare_submit(), pgaio_io_process_completion(), pgaio_io_reclaim(), and pgaio_io_stage().

◆ pgaio_io_wait()

static void pgaio_io_wait ( PgAioHandle ioh,
uint64  ref_generation 
)
static

Definition at line 544 of file aio.c.

545{
547 bool am_owner;
548
549 am_owner = ioh->owner_procno == MyProcNumber;
550
551 if (pgaio_io_was_recycled(ioh, ref_generation, &state))
552 return;
553
554 if (am_owner)
555 {
560 {
561 elog(PANIC, "waiting for own IO in wrong state: %d",
562 state);
563 }
564 }
565
566 while (true)
567 {
568 if (pgaio_io_was_recycled(ioh, ref_generation, &state))
569 return;
570
571 switch (state)
572 {
573 case PGAIO_HS_IDLE:
575 elog(ERROR, "IO in wrong state: %d", state);
576 break;
577
579
580 /*
581 * If we need to wait via the IO method, do so now. Don't
582 * check via the IO method if the issuing backend is executing
583 * the IO synchronously.
584 */
586 {
587 pgaio_method_ops->wait_one(ioh, ref_generation);
588 continue;
589 }
590 /* fallthrough */
591
592 /* waiting for owner to submit */
593 case PGAIO_HS_DEFINED:
594 case PGAIO_HS_STAGED:
595 /* waiting for reaper to complete */
596 /* fallthrough */
598 /* shouldn't be able to hit this otherwise */
600 /* ensure we're going to get woken up */
602
603 while (!pgaio_io_was_recycled(ioh, ref_generation, &state))
604 {
607 break;
608 ConditionVariableSleep(&ioh->cv, WAIT_EVENT_AIO_IO_COMPLETION);
609 }
610
612 break;
613
616 /* see above */
617 if (am_owner)
618 pgaio_io_reclaim(ioh);
619 return;
620 }
621 }
622}
bool pgaio_io_was_recycled(PgAioHandle *ioh, uint64 ref_generation, PgAioHandleState *state)
Definition: aio.c:531
PgAioHandleState
Definition: aio_internal.h:44
bool ConditionVariableCancelSleep(void)
void ConditionVariablePrepareToSleep(ConditionVariable *cv)
void ConditionVariableSleep(ConditionVariable *cv, uint32 wait_event_info)
#define PANIC
Definition: elog.h:42
bool IsUnderPostmaster
Definition: globals.c:121
void(* wait_one)(PgAioHandle *ioh, uint64 ref_generation)
Definition: aio_internal.h:323
Definition: regguts.h:323

References Assert(), ConditionVariableCancelSleep(), ConditionVariablePrepareToSleep(), ConditionVariableSleep(), PgAioHandle::cv, elog, ERROR, PgAioHandle::flags, IsUnderPostmaster, MyProcNumber, PgAioHandle::owner_procno, PANIC, PGAIO_HF_SYNCHRONOUS, PGAIO_HS_COMPLETED_IO, PGAIO_HS_COMPLETED_LOCAL, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_DEFINED, PGAIO_HS_HANDED_OUT, PGAIO_HS_IDLE, PGAIO_HS_STAGED, PGAIO_HS_SUBMITTED, pgaio_io_reclaim(), pgaio_io_was_recycled(), pgaio_method_ops, and IoMethodOps::wait_one.

Referenced by pgaio_closing_fd(), pgaio_io_wait_for_free(), pgaio_shutdown(), and pgaio_wref_wait().

◆ pgaio_io_wait_for_free()

static void pgaio_io_wait_for_free ( void  )
static

Definition at line 710 of file aio.c.

711{
712 int reclaimed = 0;
713
714 pgaio_debug(DEBUG2, "waiting for free IO with %d pending, %d in-flight, %d idle IOs",
718
719 /*
720 * First check if any of our IOs actually have completed - when using
721 * worker, that'll often be the case. We could do so as part of the loop
722 * below, but that'd potentially lead us to wait for some IO submitted
723 * before.
724 */
725 for (int i = 0; i < io_max_concurrency; i++)
726 {
728
730 {
731 pgaio_io_reclaim(ioh);
732 reclaimed++;
733 }
734 }
735
736 if (reclaimed > 0)
737 return;
738
739 /*
740 * If we have any unsubmitted IOs, submit them now. We'll start waiting in
741 * a second, so it's better they're in flight. This also addresses the
742 * edge-case that all IOs are unsubmitted.
743 */
746
749 errmsg_internal("no free IOs despite no in-flight IOs"),
750 errdetail_internal("%d pending, %d in-flight, %d idle IOs",
754
755 /*
756 * Wait for the oldest in-flight IO to complete.
757 *
758 * XXX: Reusing the general IO wait is suboptimal, we don't need to wait
759 * for that specific IO to complete, we just need *any* IO to complete.
760 */
761 {
764
765 switch (ioh->state)
766 {
767 /* should not be in in-flight list */
768 case PGAIO_HS_IDLE:
769 case PGAIO_HS_DEFINED:
771 case PGAIO_HS_STAGED:
773 elog(ERROR, "shouldn't get here with io:%d in state %d",
774 pgaio_io_get_id(ioh), ioh->state);
775 break;
776
780 "waiting for free io with %d in flight",
782
783 /*
784 * In a more general case this would be racy, because the
785 * generation could increase after we read ioh->state above.
786 * But we are only looking at IOs by the current backend and
787 * the IO can only be recycled by this backend.
788 */
789 pgaio_io_wait(ioh, ioh->generation);
790 break;
791
793 /* it's possible that another backend just finished this IO */
794 pgaio_io_reclaim(ioh);
795 break;
796 }
797
799 elog(PANIC, "no idle IO after waiting for IO to terminate");
800 return;
801 }
802}
int pgaio_io_get_id(PgAioHandle *ioh)
Definition: aio.c:330
int io_max_concurrency
Definition: aio.c:78
int errmsg_internal(const char *fmt,...)
Definition: elog.c:1158
int errdetail_internal(const char *fmt,...)
Definition: elog.c:1231
#define ereport(elevel,...)
Definition: elog.h:149
#define dclist_head_element(type, membername, lhead)
Definition: ilist.h:955
int i
Definition: isn.c:77
uint32 io_handle_off
Definition: aio_internal.h:188

References dclist_count(), dclist_head_element, dclist_is_empty(), DEBUG2, elog, ereport, errdetail_internal(), errmsg_internal(), ERROR, PgAioHandle::generation, i, PgAioBackend::idle_ios, PgAioBackend::in_flight_ios, PgAioBackend::io_handle_off, PgAioCtl::io_handles, io_max_concurrency, PgAioBackend::num_staged_ios, PANIC, pgaio_ctl, pgaio_debug, pgaio_debug_io, PGAIO_HS_COMPLETED_IO, PGAIO_HS_COMPLETED_LOCAL, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_DEFINED, PGAIO_HS_HANDED_OUT, PGAIO_HS_IDLE, PGAIO_HS_STAGED, PGAIO_HS_SUBMITTED, pgaio_io_get_id(), pgaio_io_reclaim(), pgaio_io_wait(), pgaio_my_backend, pgaio_submit_staged(), and PgAioHandle::state.

Referenced by pgaio_io_acquire().

◆ pgaio_io_was_recycled()

bool pgaio_io_was_recycled ( PgAioHandle ioh,
uint64  ref_generation,
PgAioHandleState state 
)

Definition at line 531 of file aio.c.

532{
533 *state = ioh->state;
535
536 return ioh->generation != ref_generation;
537}
#define pg_read_barrier()
Definition: atomics.h:156

References PgAioHandle::generation, pg_read_barrier, and PgAioHandle::state.

Referenced by pgaio_io_wait(), and pgaio_wref_check_done().

◆ pgaio_result_status_string()

const char * pgaio_result_status_string ( PgAioResultStatus  rs)

Definition at line 852 of file aio.c.

853{
854 switch (rs)
855 {
856 case PGAIO_RS_UNKNOWN:
857 return "UNKNOWN";
858 case PGAIO_RS_OK:
859 return "OK";
860 case PGAIO_RS_WARNING:
861 return "WARNING";
862 case PGAIO_RS_PARTIAL:
863 return "PARTIAL";
864 case PGAIO_RS_ERROR:
865 return "ERROR";
866 }
867
868 return NULL; /* silence compiler */
869}
@ PGAIO_RS_OK
Definition: aio_types.h:81
@ PGAIO_RS_PARTIAL
Definition: aio_types.h:82
@ PGAIO_RS_ERROR
Definition: aio_types.h:84
@ PGAIO_RS_WARNING
Definition: aio_types.h:83

References PGAIO_RS_ERROR, PGAIO_RS_OK, PGAIO_RS_PARTIAL, PGAIO_RS_UNKNOWN, and PGAIO_RS_WARNING.

Referenced by pg_get_aios(), pgaio_io_call_complete_local(), pgaio_io_call_complete_shared(), and pgaio_io_reclaim().

◆ pgaio_shutdown()

void pgaio_shutdown ( int  code,
Datum  arg 
)

Definition at line 1198 of file aio.c.

1199{
1202
1203 /* first clean up resources as we would at a transaction boundary */
1204 AtEOXact_Aio(code == 0);
1205
1206 /*
1207 * Before exiting, make sure that all IOs are finished. That has two main
1208 * purposes:
1209 *
1210 * - Some kernel-level AIO mechanisms don't deal well with the issuer of
1211 * an AIO exiting before IO completed
1212 *
1213 * - It'd be confusing to see partially finished IOs in stats views etc
1214 */
1216 {
1218
1220 "waiting for IO to complete during shutdown, %d in-flight IOs",
1222
1223 /* see comment in pgaio_io_wait_for_free() about raciness */
1224 pgaio_io_wait(ioh, ioh->generation);
1225 }
1226
1227 pgaio_my_backend = NULL;
1228}
void AtEOXact_Aio(bool is_commit)
Definition: aio.c:1106

References Assert(), AtEOXact_Aio(), dclist_count(), dclist_head_element, dclist_is_empty(), DEBUG2, PgAioHandle::generation, PgAioBackend::handed_out_io, PgAioBackend::in_flight_ios, pgaio_debug_io, pgaio_io_wait(), and pgaio_my_backend.

Referenced by pgaio_init_backend().

◆ pgaio_submit_staged()

void pgaio_submit_staged ( void  )

Definition at line 1036 of file aio.c.

1037{
1038 int total_submitted = 0;
1039 int did_submit;
1040
1042 return;
1043
1044
1046
1049
1051
1052 total_submitted += did_submit;
1053
1054 Assert(total_submitted == did_submit);
1055
1057
1059 "aio: submitted %d IOs",
1060 total_submitted);
1061}
#define START_CRIT_SECTION()
Definition: miscadmin.h:150
#define END_CRIT_SECTION()
Definition: miscadmin.h:152
int(* submit)(uint16 num_staged_ios, PgAioHandle **staged_ios)
Definition: aio_internal.h:302

References Assert(), DEBUG4, END_CRIT_SECTION, PgAioBackend::num_staged_ios, pgaio_debug, pgaio_method_ops, pgaio_my_backend, PgAioBackend::staged_ios, START_CRIT_SECTION, and IoMethodOps::submit.

Referenced by AsyncReadBuffers(), pgaio_closing_fd(), pgaio_error_cleanup(), pgaio_exit_batchmode(), pgaio_io_acquire_nb(), pgaio_io_release_resowner(), pgaio_io_stage(), pgaio_io_wait_for_free(), and ReadBuffersCanStartIO().

◆ pgaio_wref_check_done()

bool pgaio_wref_check_done ( PgAioWaitRef iow)

Definition at line 923 of file aio.c.

924{
925 uint64 ref_generation;
927 bool am_owner;
928 PgAioHandle *ioh;
929
930 ioh = pgaio_io_from_wref(iow, &ref_generation);
931
932 if (pgaio_io_was_recycled(ioh, ref_generation, &state))
933 return true;
934
935 if (state == PGAIO_HS_IDLE)
936 return true;
937
938 am_owner = ioh->owner_procno == MyProcNumber;
939
942 {
943 if (am_owner)
944 pgaio_io_reclaim(ioh);
945 return true;
946 }
947
948 /*
949 * XXX: It likely would be worth checking in with the io method, to give
950 * the IO method a chance to check if there are completion events queued.
951 */
952
953 return false;
954}
static PgAioHandle * pgaio_io_from_wref(PgAioWaitRef *iow, uint64 *ref_generation)
Definition: aio.c:809

References MyProcNumber, PgAioHandle::owner_procno, PGAIO_HS_COMPLETED_LOCAL, PGAIO_HS_COMPLETED_SHARED, PGAIO_HS_IDLE, pgaio_io_from_wref(), pgaio_io_reclaim(), and pgaio_io_was_recycled().

Referenced by WaitReadBuffers().

◆ pgaio_wref_clear()

void pgaio_wref_clear ( PgAioWaitRef iow)

◆ pgaio_wref_get_id()

int pgaio_wref_get_id ( PgAioWaitRef iow)

Definition at line 898 of file aio.c.

899{
901 return iow->aio_index;
902}
bool pgaio_wref_valid(PgAioWaitRef *iow)
Definition: aio.c:889

References PgAioWaitRef::aio_index, Assert(), and pgaio_wref_valid().

◆ pgaio_wref_valid()

bool pgaio_wref_valid ( PgAioWaitRef iow)

◆ pgaio_wref_wait()

void pgaio_wref_wait ( PgAioWaitRef iow)

Definition at line 909 of file aio.c.

910{
911 uint64 ref_generation;
912 PgAioHandle *ioh;
913
914 ioh = pgaio_io_from_wref(iow, &ref_generation);
915
916 pgaio_io_wait(ioh, ref_generation);
917}

References pgaio_io_from_wref(), and pgaio_io_wait().

Referenced by InvalidateLocalBuffer(), read_rel_block_ll(), StartLocalBufferIO(), WaitIO(), and WaitReadBuffers().

Variable Documentation

◆ io_max_concurrency

int io_max_concurrency = -1

◆ io_method

int io_method = DEFAULT_IO_METHOD

◆ io_method_options

const struct config_enum_entry io_method_options[]
Initial value:
= {
{"sync", IOMETHOD_SYNC, false},
{"worker", IOMETHOD_WORKER, false},
{NULL, 0, false}
}
@ IOMETHOD_WORKER
Definition: aio.h:35
@ IOMETHOD_SYNC
Definition: aio.h:34

Definition at line 67 of file aio.c.

Referenced by assign_io_method().

◆ pgaio_ctl

◆ pgaio_method_ops

◆ pgaio_method_ops_table

const IoMethodOps* const pgaio_method_ops_table[]
static
Initial value:
= {
}
const IoMethodOps pgaio_sync_ops
Definition: method_sync.c:28
const IoMethodOps pgaio_worker_ops
Definition: method_worker.c:83

Definition at line 87 of file aio.c.

Referenced by assign_io_method().

◆ pgaio_my_backend