From a58ed908898b706190a4d5d86824ce9268c95b50 Mon Sep 17 00:00:00 2001 From: Jun Date: Mon, 3 Jul 2017 13:27:10 -0400 Subject: [PATCH] [TDB-28-25]try_again_after_handling_write_error checks only for EINTR and ENOSPC & ubuntu-yakkety-64bit fails for PerconaFT-param tests TDB-25: ubuntu-yakkety-64bit fails for PerconaFT-param tests Updated CMakeList to not link againt jemalloc.a TDB-28 : try_again_after_handling_write_error checks only for EINTR and ENOSPC - Added some structured error logging during node, ft header/descriptor serialization and writing to the log file. - The error logging contains the file name, the calling function name and a bit of background information such as if it is a node serialization, whether the root cause is checkpoint or eviction. --- ft/ft-ops.h | 14 ++- ft/ft.cc | 16 ++- ft/logger/logger.cc | 73 ++++++++--- ft/logger/logger.h | 18 +++ ft/serialize/block_table.cc | 21 ++-- ft/serialize/block_table.h | 47 ++++++- ft/serialize/ft-serialize.cc | 47 ++++--- ft/serialize/ft-serialize.h | 3 +- ft/serialize/ft_node-serialize.cc | 52 +++++--- ft/tests/test1308a.cc | 4 +- ftcxx/tests/CMakeLists.txt | 6 +- portability/file.cc | 194 ++++++++++++++++++----------- portability/tests/test-pwrite4g.cc | 2 +- portability/toku_portability.h | 16 ++- 14 files changed, 364 insertions(+), 149 deletions(-) diff --git a/ft/ft-ops.h b/ft/ft-ops.h index df8ffe287..3e9d07fe0 100644 --- a/ft/ft-ops.h +++ b/ft/ft-ops.h @@ -263,11 +263,19 @@ void toku_ft_layer_destroy(void); void toku_ft_serialize_layer_init(void); void toku_ft_serialize_layer_destroy(void); -void toku_maybe_truncate_file (int fd, uint64_t size_used, uint64_t expected_size, uint64_t *new_size); +void toku_maybe_truncate_file(int fd, + uint64_t size_used, + uint64_t expected_size, + uint64_t *new_size); // Effect: truncate file if overallocated by at least 32MiB -void toku_maybe_preallocate_in_file (int fd, int64_t size, int64_t expected_size, int64_t *new_size); -// Effect: make the file bigger by either doubling it or growing by 16MiB whichever is less, until it is at least size +void toku_maybe_preallocate_in_file(int fd, + int64_t size, + int64_t expected_size, + int64_t *new_size, + const char *dbg_context); +// Effect: make the file bigger by either doubling it or growing by 16MiB +// whichever is less, until it is at least size // Return 0 on success, otherwise an error number. int toku_ft_get_fragmentation(FT_HANDLE ft_h, TOKU_DB_FRAGMENTATION report) __attribute__ ((warn_unused_result)); diff --git a/ft/ft.cc b/ft/ft.cc index 700e532d5..7a82d3eda 100644 --- a/ft/ft.cc +++ b/ft/ft.cc @@ -866,16 +866,22 @@ toku_ft_update_descriptor(FT ft, DESCRIPTOR desc) // from the ft's cachefile. we do this so serialize code can // update a descriptor before the ft is fully opened and has // a valid cachefile. -void -toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd) { +void toku_ft_update_descriptor_with_fd(FT ft, DESCRIPTOR desc, int fd) { // the checksum is four bytes, so that's where the magic number comes from // make space for the new descriptor and write it out to disk DISKOFF offset, size; size = toku_serialize_descriptor_size(desc) + 4; - ft->blocktable.realloc_descriptor_on_disk(size, &offset, ft, fd); - toku_serialize_descriptor_contents_to_fd(fd, desc, offset); - // cleanup the old descriptor and set the in-memory descriptor to the new one + const char *dbg_context = construct_dbg_context_for_write_others( + toku_cachefile_fname_in_env(ft->cf), + __func__, + "updating the descriptor for an ft"); + ft->blocktable.realloc_descriptor_on_disk( + size, &offset, ft, fd, dbg_context); + toku_serialize_descriptor_contents_to_fd(fd, desc, offset, dbg_context); + destruct_dbg_context_for_write(dbg_context); + // cleanup the old descriptor and set the in-memory descriptor to the new + // one toku_destroy_dbt(&ft->descriptor.dbt); toku_clone_dbt(&ft->descriptor.dbt, desc->dbt); } diff --git a/ft/logger/logger.cc b/ft/logger/logger.cc index eacec9cb8..62612fc7f 100644 --- a/ft/logger/logger.cc +++ b/ft/logger/logger.cc @@ -449,22 +449,44 @@ swap_inbuf_outbuf (TOKULOGGER logger) assert(logger->inbuf.n_in_buf == 0); } -static void -write_outbuf_to_logfile (TOKULOGGER logger, LSN *fsynced_lsn) -// Effect: Write the contents of outbuf to logfile. Don't necessarily fsync (but it might, in which case fynced_lsn is updated). -// If the logfile gets too big, open the next one (that's the case where an fsync might happen). -// Entry and exit: Holds permission to modify output (and doesn't let it go, so it's ok to also hold the inlock). +static void write_outbuf_to_logfile(TOKULOGGER logger, LSN *fsynced_lsn) +// Effect: Write the contents of outbuf to logfile. Don't necessarily fsync +// (but it might, in which case fynced_lsn is updated). +// If the logfile gets too big, open the next one (that's the case where an +// fsync might happen). +// Entry and exit: Holds permission to modify output (and doesn't let it go, so +// it's ok to also hold the inlock). { - if (logger->outbuf.n_in_buf>0) { + if (logger->outbuf.n_in_buf > 0) { // Write the outbuf to disk, take accounting measurements + + int fnamelen = strlen(logger->directory) + 50; + char fname[fnamelen]; + snprintf(fname, + fnamelen, + "%s/log%012lld.tokulog%d", + logger->directory, + logger->next_log_file_number, + TOKU_LOG_VERSION); + + const char *dbg_context = construct_dbg_context_for_logging( + fname, __func__, "flushing redo logs to the log file"); tokutime_t io_t0 = toku_time_now(); - toku_os_full_write(logger->fd, logger->outbuf.buf, logger->outbuf.n_in_buf); + toku_os_full_write(logger->fd, + logger->outbuf.buf, + logger->outbuf.n_in_buf, + dbg_context); tokutime_t io_t1 = toku_time_now(); + destruct_dbg_context(dbg_context); + logger->num_writes_to_disk++; logger->bytes_written_to_disk += logger->outbuf.n_in_buf; logger->time_spent_writing_to_disk += (io_t1 - io_t0); - assert(logger->outbuf.max_lsn_in_buf.lsn > logger->written_lsn.lsn); // since there is something in the buffer, its LSN must be bigger than what's previously written. + assert(logger->outbuf.max_lsn_in_buf.lsn > + logger->written_lsn.lsn); // since there is something in the + // buffer, its LSN must be bigger than + // what's previously written. logger->written_lsn = logger->outbuf.max_lsn_in_buf; logger->n_in_file += logger->outbuf.n_in_buf; logger->outbuf.n_in_buf = 0; @@ -672,30 +694,43 @@ void toku_logger_free_logfiles(char **logfiles, int n_logfiles) { toku_free(logfiles); } -static int open_logfile (TOKULOGGER logger) +static int open_logfile(TOKULOGGER logger) // Entry and Exit: This thread has permission to modify the output. { - int fnamelen = strlen(logger->directory)+50; + int fnamelen = strlen(logger->directory) + 50; char fname[fnamelen]; - snprintf(fname, fnamelen, "%s/log%012lld.tokulog%d", logger->directory, logger->next_log_file_number, TOKU_LOG_VERSION); + snprintf(fname, + fnamelen, + "%s/log%012lld.tokulog%d", + logger->directory, + logger->next_log_file_number, + TOKU_LOG_VERSION); long long index = logger->next_log_file_number; if (logger->write_log_files) { - logger->fd = open(fname, O_CREAT+O_WRONLY+O_TRUNC+O_EXCL+O_BINARY, S_IRUSR+S_IWUSR); - if (logger->fd==-1) { + logger->fd = open(fname, + O_CREAT + O_WRONLY + O_TRUNC + O_EXCL + O_BINARY, + S_IRUSR + S_IWUSR); + if (logger->fd == -1) { return get_error_errno(); } fsync_logdir(logger); logger->next_log_file_number++; } else { - logger->fd = open(DEV_NULL_FILE, O_WRONLY+O_BINARY); - if (logger->fd==-1) { + logger->fd = open(DEV_NULL_FILE, O_WRONLY + O_BINARY); + if (logger->fd == -1) { return get_error_errno(); } } - toku_os_full_write(logger->fd, "tokulogg", 8); - int version_l = toku_htonl(log_format_version); //version MUST be in network byte order regardless of disk order - toku_os_full_write(logger->fd, &version_l, 4); - if ( logger->write_log_files ) { + const char *dbg_context = construct_dbg_context_for_logging( + fname, __func__, "initing the logfile upon opening"); + toku_os_full_write(logger->fd, "tokulogg", 8, dbg_context); + int version_l = toku_htonl(log_format_version); // version MUST be in + // network byte order + // regardless of disk order + toku_os_full_write(logger->fd, &version_l, 4, dbg_context); + destruct_dbg_context(dbg_context); + + if (logger->write_log_files) { TOKULOGFILEINFO XMALLOC(lf_info); lf_info->index = index; lf_info->maxlsn = logger->written_lsn; diff --git a/ft/logger/logger.h b/ft/logger/logger.h index d9595d710..5166b34ab 100644 --- a/ft/logger/logger.h +++ b/ft/logger/logger.h @@ -272,3 +272,21 @@ static inline void rbuf_ma_FILENUMS(struct rbuf *rb, memarena *ma, FILENUMS *fil rbuf_ma_FILENUM(rb, ma, &(filenums->filenums[i])); } } + +inline const char *construct_dbg_context_for_logging(char *fname, + const char *func_name, + const char *others) { + char *dbg_context = + (char *)toku_malloc(sizeof("Called by %s: writing to file %s for %s") + + sizeof(__func__) + PATH_MAX + sizeof(others)); + sprintf(dbg_context, + "Called by %s: writing to file %s for %s", + func_name, + fname, + others); + return (const char *)dbg_context; +} + +inline void destruct_dbg_context(const char *buffer) { + toku_free((void *)buffer); +} diff --git a/ft/serialize/block_table.cc b/ft/serialize/block_table.cc index d2532134d..1be97c96c 100644 --- a/ft/serialize/block_table.cc +++ b/ft/serialize/block_table.cc @@ -478,7 +478,8 @@ void block_table::_realloc_on_disk_internal(BLOCKNUM b, void block_table::_ensure_safe_write_unlocked(int fd, DISKOFF block_size, - DISKOFF block_offset) { + DISKOFF block_offset, + const char *dbg_context) { // Requires: holding _mutex uint64_t size_needed = block_size + block_offset; if (size_needed > _safe_file_size) { @@ -489,7 +490,7 @@ void block_table::_ensure_safe_write_unlocked(int fd, int64_t size_after; toku_maybe_preallocate_in_file( - fd, size_needed, _safe_file_size, &size_after); + fd, size_needed, _safe_file_size, &size_after, dbg_context); _mutex_lock(); _safe_file_size = size_after; @@ -509,7 +510,11 @@ void block_table::realloc_on_disk(BLOCKNUM b, _verify_valid_freeable_blocknum(t, b); _realloc_on_disk_internal(b, size, offset, ft, for_checkpoint); - _ensure_safe_write_unlocked(fd, size, *offset); + char *fname = toku_cachefile_fname_in_env(ft->cf); + const char *dbg_context = + construct_dbg_context_for_write_node(fname, for_checkpoint, __func__); + _ensure_safe_write_unlocked(fd, size, *offset, dbg_context); + destruct_dbg_context_for_write(dbg_context); _mutex_unlock(); } @@ -553,7 +558,8 @@ void block_table::_alloc_inprogress_translation_on_disk_unlocked() { void block_table::serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, - int64_t *size) { + int64_t *size, + const char *dbg_context) { _mutex_lock(); struct translation *t = &_inprogress; @@ -599,7 +605,7 @@ void block_table::serialize_translation_to_wbuf(int fd, *size = size_translation; invariant((*address) % 512 == 0); - _ensure_safe_write_unlocked(fd, size_aligned, *address); + _ensure_safe_write_unlocked(fd, size_aligned, *address, dbg_context); _mutex_unlock(); } @@ -1018,10 +1024,11 @@ void block_table::_realloc_descriptor_on_disk_unlocked(DISKOFF size, void block_table::realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, FT ft, - int fd) { + int fd, + const char *dbg_context) { _mutex_lock(); _realloc_descriptor_on_disk_unlocked(size, offset, ft); - _ensure_safe_write_unlocked(fd, size, *offset); + _ensure_safe_write_unlocked(fd, size, *offset, dbg_context); _mutex_unlock(); } diff --git a/ft/serialize/block_table.h b/ft/serialize/block_table.h index dd732d4f3..46230c8dc 100644 --- a/ft/serialize/block_table.h +++ b/ft/serialize/block_table.h @@ -136,7 +136,8 @@ class block_table { void realloc_descriptor_on_disk(DISKOFF size, DISKOFF *offset, struct ft *ft, - int fd); + int fd, + const char *dbg_context); void get_descriptor_offset_size(DISKOFF *offset, DISKOFF *size); // External verfication @@ -148,7 +149,8 @@ class block_table { void serialize_translation_to_wbuf(int fd, struct wbuf *w, int64_t *address, - int64_t *size); + int64_t *size, + const char *dbg_context); // DEBUG ONLY (ftdump included), tests included void blocknum_dump_translation(BLOCKNUM b); @@ -259,7 +261,8 @@ class block_table { void _maybe_truncate_file(int fd, uint64_t size_needed_before); void _ensure_safe_write_unlocked(int fd, DISKOFF block_size, - DISKOFF block_offset); + DISKOFF block_offset, + const char *dbg_context); // Verification bool _is_valid_blocknum(struct translation *t, BLOCKNUM b); @@ -338,3 +341,41 @@ static inline void rbuf_ma_BLOCKNUM(struct rbuf *rb, BLOCKNUM *blocknum) { *blocknum = rbuf_blocknum(rb); } + +inline const char *construct_dbg_context_for_write_node(const char *fname, + bool for_checkpoint, + const char *func_name) { + char *dbg_context = (char *)toku_malloc( + sizeof("Called by %s: writing to file %s for checkpoint") + + sizeof(func_name) + PATH_MAX); + + if (for_checkpoint) + sprintf(dbg_context, + "Called by %s: writing to file %s for checkpoint", + func_name, + fname); + else + sprintf(dbg_context, + "Called by %s; writing to file %s for eviction", + func_name, + fname); + + return (const char *)dbg_context; +} + +inline const char *construct_dbg_context_for_write_others(const char *fname, + const char *func_name, + const char *others) { + char *dbg_context = + (char *)toku_malloc(sizeof("Called by %s: writing to file %s for %s") + + sizeof(func_name) + PATH_MAX + sizeof(others)); + sprintf(dbg_context, + "Called by %s: writing to file %s for %s", + func_name, + fname, + others); + return (const char *)dbg_context; +} +inline void destruct_dbg_context_for_write(const char *buffer) { + toku_free((void *)buffer); +} diff --git a/ft/serialize/ft-serialize.cc b/ft/serialize/ft-serialize.cc index fcab9fc67..3ba4c4c94 100644 --- a/ft/serialize/ft-serialize.cc +++ b/ft/serialize/ft-serialize.cc @@ -67,28 +67,32 @@ void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc wbuf_bytes(wb, desc->dbt.data, desc->dbt.size); } -//Descriptor is written to disk during toku_ft_handle_open iff we have a new (or changed) -//descriptor. -//Descriptors are NOT written during the header checkpoint process. -void -toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, DISKOFF offset) { +// Descriptor is written to disk during toku_ft_handle_open iff we have a new +// (or changed) +// descriptor. +// Descriptors are NOT written during the header checkpoint process. +void toku_serialize_descriptor_contents_to_fd(int fd, + DESCRIPTOR desc, + DISKOFF offset, + const char *dbg_context) { // make the checksum - int64_t size = toku_serialize_descriptor_size(desc)+4; //4 for checksum + int64_t size = toku_serialize_descriptor_size(desc) + 4; // 4 for checksum int64_t size_aligned = roundup_to_multiple(512, size); struct wbuf w; char *XMALLOC_N_ALIGNED(512, size_aligned, aligned_buf); - for (int64_t i=size; iserialize_translation_to_wbuf( - fd, &w_translation, &address_translation, &size_translation); + const char *dbg_context = + construct_dbg_context_for_write_others(toku_cachefile_fname_in_env(cf), + __func__, + "serializing ft for checkpoint"); + bt->serialize_translation_to_wbuf(fd, + &w_translation, + &address_translation, + &size_translation, + dbg_context); invariant(size_translation == w_translation.ndone); // the number of bytes available in the buffer is 0 mod 512, and those last @@ -880,7 +891,8 @@ void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) { toku_os_full_pwrite(fd, w_translation.buf, roundup_to_multiple(512, size_translation), - address_translation); + address_translation, + dbg_context); // Everything but the header MUST be on disk before header starts. // Otherwise we will think the header is good and some blocks might not @@ -900,7 +912,12 @@ void toku_serialize_ft_to(int fd, FT_HEADER h, block_table *bt, CACHEFILE cf) { main_offset = (h->checkpoint_count & 0x1) ? 0 : BlockAllocator::BLOCK_ALLOCATOR_HEADER_RESERVE; - toku_os_full_pwrite(fd, w_main.buf, size_main_aligned, main_offset); + toku_os_full_pwrite(fd, + w_main.buf, + size_main_aligned, + main_offset, + dbg_context); + destruct_dbg_context_for_write(dbg_context); toku_free(w_main.buf); toku_free(w_translation.buf); } diff --git a/ft/serialize/ft-serialize.h b/ft/serialize/ft-serialize.h index 144e18856..2aebf4c78 100644 --- a/ft/serialize/ft-serialize.h +++ b/ft/serialize/ft-serialize.h @@ -52,7 +52,8 @@ void toku_serialize_ft_to_wbuf(struct wbuf *wbuf, DISKOFF translation_size_on_disk); void toku_serialize_descriptor_contents_to_fd(int fd, DESCRIPTOR desc, - DISKOFF offset); + DISKOFF offset, + const char *dbg_context); void toku_serialize_descriptor_contents_to_wbuf(struct wbuf *wb, DESCRIPTOR desc); int toku_deserialize_ft_from(int fd, diff --git a/ft/serialize/ft_node-serialize.cc b/ft/serialize/ft_node-serialize.cc index 55899905b..1abb6cfd2 100644 --- a/ft/serialize/ft_node-serialize.cc +++ b/ft/serialize/ft_node-serialize.cc @@ -148,45 +148,59 @@ min64(int64_t a, int64_t b) { return b; } -void -toku_maybe_preallocate_in_file (int fd, int64_t size, int64_t expected_size, int64_t *new_size) -// Effect: make the file bigger by either doubling it or growing by 16MiB whichever is less, until it is at least size +void toku_maybe_preallocate_in_file(int fd, + int64_t size, + int64_t expected_size, + int64_t *new_size, + const char *dbg_context) +// Effect: make the file bigger by either doubling it or growing by 16MiB +// whichever is less, until it is at least size // Return 0 on success, otherwise an error number. { int64_t file_size = 0; - //TODO(yoni): Allow variable stripe_width (perhaps from ft) for larger raids + // TODO(yoni): Allow variable stripe_width (perhaps from ft) for larger + // raids const uint64_t stripe_width = 4096; { int r = toku_os_get_file_size(fd, &file_size); - if (r != 0) { // debug #2463 + if (r != 0) { // debug #2463 int the_errno = get_maybe_error_errno(); - fprintf(stderr, "%s:%d fd=%d size=%" PRIu64 " r=%d errno=%d\n", __FUNCTION__, __LINE__, fd, size, r, the_errno); fflush(stderr); + fprintf(stderr, + "%s:%d fd=%d size=%" PRIu64 " r=%d errno=%d\n", + __FUNCTION__, + __LINE__, + fd, + size, + r, + the_errno); + fflush(stderr); } lazy_assert_zero(r); } invariant(file_size >= 0); invariant(expected_size == file_size); // We want to double the size of the file, or add 16MiB, whichever is less. - // We emulate calling this function repeatedly until it satisfies the request. + // We emulate calling this function repeatedly until it satisfies the + // request. int64_t to_write = 0; if (file_size == 0) { // Prevent infinite loop by starting with stripe_width as a base case. to_write = stripe_width; } while (file_size + to_write < size) { - to_write += alignup64(min64(file_size + to_write, FILE_CHANGE_INCREMENT), stripe_width); + to_write += alignup64( + min64(file_size + to_write, FILE_CHANGE_INCREMENT), stripe_width); } if (to_write > 0) { - assert(to_write%512==0); + assert(to_write % 512 == 0); toku::scoped_malloc_aligned wbuf_aligned(to_write, 512); char *wbuf = reinterpret_cast(wbuf_aligned.get()); memset(wbuf, 0, to_write); toku_off_t start_write = alignup64(file_size, stripe_width); invariant(start_write >= file_size); - toku_os_full_pwrite(fd, wbuf, to_write, start_write); + toku_os_full_pwrite(fd, wbuf, to_write, start_write, dbg_context); *new_size = start_write + to_write; - } - else { + } else { *new_size = file_size; } } @@ -818,10 +832,13 @@ int toku_serialize_ftnode_to(int fd, ft->blocktable.realloc_on_disk( blocknum, n_to_write, &offset, ft, fd, for_checkpoint); + char *fname = toku_cachefile_fname_in_env(ft->cf); + const char *dbg_context = + construct_dbg_context_for_write_node(fname, for_checkpoint, __func__); tokutime_t t0 = toku_time_now(); - toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); + toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset, dbg_context); tokutime_t t1 = toku_time_now(); - + destruct_dbg_context_for_write(dbg_context); tokutime_t io_time = t1 - t0; toku_ft_status_update_flush_reason( node, n_uncompressed_bytes, n_to_write, io_time, for_checkpoint); @@ -2904,7 +2921,12 @@ int toku_serialize_rollback_log_to(int fd, ft->blocktable.realloc_on_disk( blocknum, n_to_write, &offset, ft, fd, for_checkpoint); - toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset); + char *fname = toku_cachefile_fname_in_env(ft->cf); + const char *dbg_context = + construct_dbg_context_for_write_node(fname, for_checkpoint, __func__); + toku_os_full_pwrite(fd, compressed_buf, n_to_write, offset, dbg_context); + destruct_dbg_context_for_write(dbg_context); + toku_free(compressed_buf); if (!is_serialized) { toku_static_serialized_rollback_log_destroy(&serialized_local); diff --git a/ft/tests/test1308a.cc b/ft/tests/test1308a.cc index 8b25df6e9..b51a8f3ed 100644 --- a/ft/tests/test1308a.cc +++ b/ft/tests/test1308a.cc @@ -64,7 +64,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_ memset(buf, 0, sizeof(buf)); uint64_t i; for (i=0; i<(1LL<<32); i+=BUFSIZE) { - toku_os_full_write(fd, buf, BUFSIZE); + toku_os_full_write(fd, buf, BUFSIZE, "test1308a"); } } int64_t file_size; @@ -74,7 +74,7 @@ test_main (int argc __attribute__((__unused__)), const char *argv[] __attribute_ } { int64_t size_after; - toku_maybe_preallocate_in_file(fd, 1000, file_size, &size_after); + toku_maybe_preallocate_in_file(fd, 1000, file_size, &size_after, "test1308a"); assert(size_after == file_size); } int64_t file_size2; diff --git a/ftcxx/tests/CMakeLists.txt b/ftcxx/tests/CMakeLists.txt index 6f9146ce5..c07ef5774 100644 --- a/ftcxx/tests/CMakeLists.txt +++ b/ftcxx/tests/CMakeLists.txt @@ -32,9 +32,9 @@ if (BUILD_TESTING) add_executable(${_testname} ${test}) if (with_jemalloc AND JEMALLOC_STATIC_LIBRARY) if (APPLE) - target_link_libraries(${_testname} -Wl,-force_load ${JEMALLOC_STATIC_LIBRARY}) + target_link_libraries(${_testname} -Wl,-force_load) else () - target_link_libraries(${_testname} -Wl,--whole-archive ${JEMALLOC_STATIC_LIBRARY} -Wl,--no-whole-archive) + target_link_libraries(${_testname} -Wl,--whole-archive -Wl,--no-whole-archive) endif () endif () target_link_libraries(${_testname} ${impl}) @@ -44,4 +44,4 @@ if (BUILD_TESTING) endforeach () endforeach () endforeach () -endif (BUILD_TESTING) \ No newline at end of file +endif (BUILD_TESTING) diff --git a/portability/file.cc b/portability/file.cc index 0e3efc1a1..4e436fabc 100644 --- a/portability/file.cc +++ b/portability/file.cc @@ -69,67 +69,114 @@ void toku_fs_get_write_info(time_t *enospc_last_time, uint64_t *enospc_current, *enospc_total = toku_write_enospc_total; } -//Print any necessary errors -//Return whether we should try the write again. -static void -try_again_after_handling_write_error(int fd, size_t len, ssize_t r_write) { +#define dump_state_of_write_error() \ + fprintf(stderr, \ + "%s:%d:try_again_after_handling_write_error - " \ + "write failed with an error: " \ + "%s \n" \ + "file descriptor[%d] " \ + "length[%" PRIu64 \ + "] " \ + "errno[%d] \n" \ + "%s \n", \ + __FILE__, \ + __LINE__, \ + err_msg, \ + fd, \ + len, \ + errno_write, \ + dbg_context); + +// Print any necessary errors +// Return whether we should try the write again. +static void try_again_after_handling_write_error(int fd, + size_t len, + ssize_t r_write, + const char *dbg_context) { int try_again = 0; assert(r_write < 0); int errno_write = get_error_errno(); switch (errno_write) { - case EINTR: { //The call was interrupted by a signal before any data was written; see signal(7). - char err_msg[sizeof("Write of [] bytes to fd=[] interrupted. Retrying.") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars - snprintf(err_msg, sizeof(err_msg), "Write of [%" PRIu64 "] bytes to fd=[%d] interrupted. Retrying.", (uint64_t)len, fd); - perror(err_msg); - fflush(stderr); - try_again = 1; - break; - } - case ENOSPC: { - if (toku_assert_on_write_enospc) { - char err_msg[sizeof("Failed write of [] bytes to fd=[].") + 20+10]; //64 bit is 20 chars, 32 bit is 10 chars - snprintf(err_msg, sizeof(err_msg), "Failed write of [%" PRIu64 "] bytes to fd=[%d].", (uint64_t)len, fd); - perror(err_msg); - fflush(stderr); - int out_of_disk_space = 1; - assert(!out_of_disk_space); //Give an error message that might be useful if this is the only one that survives. - } else { - toku_sync_fetch_and_add(&toku_write_enospc_total, 1); - toku_sync_fetch_and_add(&toku_write_enospc_current, 1); - - time_t tnow = time(0); - toku_write_enospc_last_time = tnow; - if (toku_write_enospc_last_report == 0 || tnow - toku_write_enospc_last_report >= 60) { - toku_write_enospc_last_report = tnow; - - const int tstr_length = 26; - char tstr[tstr_length]; - time_t t = time(0); - ctime_r(&t, tstr); - - const int MY_MAX_PATH = 256; - char fname[MY_MAX_PATH], symname[MY_MAX_PATH+1]; - sprintf(fname, "/proc/%d/fd/%d", getpid(), fd); - ssize_t n = readlink(fname, symname, MY_MAX_PATH); - - if ((int)n == -1) - fprintf(stderr, "%.24s PerconaFT No space when writing %" PRIu64 " bytes to fd=%d ", tstr, (uint64_t) len, fd); - else { - tstr[n] = 0; // readlink doesn't append a NUL to the end of the buffer. - fprintf(stderr, "%.24s PerconaFT No space when writing %" PRIu64 " bytes to %*s ", tstr, (uint64_t) len, (int) n, symname); - } - fprintf(stderr, "retry in %d second%s\n", toku_write_enospc_sleep, toku_write_enospc_sleep > 1 ? "s" : ""); - fflush(stderr); - } - sleep(toku_write_enospc_sleep); + case EINTR: { // The call was interrupted by a signal before any data + // was written; see signal(7). + char err_msg[] = "Write to the file interrupted. Retrying."; + dump_state_of_write_error(); try_again = 1; - toku_sync_fetch_and_sub(&toku_write_enospc_current, 1); break; } - } - default: - break; + case ENOSPC: { + if (toku_assert_on_write_enospc) { + char err_msg[] = + "Failed write to the file due to no space. Will not retry"; + dump_state_of_write_error(); + int out_of_disk_space = 1; + assert( + !out_of_disk_space); // Give an error message that might be + // useful if this is the only one that + // survives. + } else { + toku_sync_fetch_and_add(&toku_write_enospc_total, 1); + toku_sync_fetch_and_add(&toku_write_enospc_current, 1); + + time_t tnow = time(0); + toku_write_enospc_last_time = tnow; + if (toku_write_enospc_last_report == 0 || + tnow - toku_write_enospc_last_report >= 60) { + toku_write_enospc_last_report = tnow; + + const int tstr_length = 26; + char tstr[tstr_length]; + time_t t = time(0); + ctime_r(&t, tstr); + + const int MY_MAX_PATH = 256; + char fname[MY_MAX_PATH], symname[MY_MAX_PATH + 1]; + sprintf(fname, "/proc/%d/fd/%d", getpid(), fd); + ssize_t n = readlink(fname, symname, MY_MAX_PATH); + + if ((int)n == -1) { + char + err_msg[sizeof("PerconaFT No space when writing to " + "the file, retry in seconds") + + 25 + 10 + 2]; + snprintf(err_msg, + sizeof(err_msg), + "%.24s PerconaFT No space when writing to the " + "file, retry in %d seconds%s", + tstr, + toku_write_enospc_sleep, + toku_write_enospc_sleep > 1 ? "s" : ""); + dump_state_of_write_error(); + } else { + tstr[n] = + 0; // readlink doesn't append a NUL to the end of + // the buffer. + char err_msg[sizeof( + "PerconaFT No space when writing to , " + "retry in seconds") + + 25 + sizeof(symname) + 10 + 2]; + snprintf(err_msg, + sizeof(err_msg), + "%.24s PerconaFT No space when writing to %s, " + "retry in %d seconds%s", + tstr, + symname, + toku_write_enospc_sleep, + toku_write_enospc_sleep > 1 ? "s" : ""); + dump_state_of_write_error(); + } + } + sleep(toku_write_enospc_sleep); + try_again = 1; + toku_sync_fetch_and_sub(&toku_write_enospc_current, 1); + break; + } + } + default: + char err_msg[] = "Write to the file failed due to other reasons:"; + dump_state_of_write_error(); + break; } assert(try_again); errno = errno_write; @@ -186,9 +233,11 @@ void toku_set_func_pread (ssize_t (*pread_fun)(int, void *, size_t, off_t)) { t_pread = pread_fun; } -void -toku_os_full_write (int fd, const void *buf, size_t len) { - const char *bp = (const char *) buf; +void toku_os_full_write(int fd, + const void *buf, + size_t len, + const char *dbg_context) { + const char *bp = (const char *)buf; while (len > 0) { ssize_t r; if (t_full_write) { @@ -197,11 +246,10 @@ toku_os_full_write (int fd, const void *buf, size_t len) { r = write(fd, bp, len); } if (r > 0) { - len -= r; - bp += r; - } - else { - try_again_after_handling_write_error(fd, len, r); + len -= r; + bp += r; + } else { + try_again_after_handling_write_error(fd, len, r, dbg_context); } } assert(len == 0); @@ -228,11 +276,14 @@ toku_os_write (int fd, const void *buf, size_t len) { return result; } -void -toku_os_full_pwrite (int fd, const void *buf, size_t len, toku_off_t off) { - assert(0==((long long)buf)%512); - assert((len%512 == 0) && (off%512)==0); // to make pwrite work. - const char *bp = (const char *) buf; +void toku_os_full_pwrite(int fd, + const void *buf, + size_t len, + toku_off_t off, + const char *dbg_context) { + assert(0 == ((long long)buf) % 512); + assert((len % 512 == 0) && (off % 512) == 0); // to make pwrite work. + const char *bp = (const char *)buf; while (len > 0) { ssize_t r; if (t_full_pwrite) { @@ -241,12 +292,11 @@ toku_os_full_pwrite (int fd, const void *buf, size_t len, toku_off_t off) { r = pwrite(fd, bp, len, off); } if (r > 0) { - len -= r; - bp += r; - off += r; - } - else { - try_again_after_handling_write_error(fd, len, r); + len -= r; + bp += r; + off += r; + } else { + try_again_after_handling_write_error(fd, len, r, dbg_context); } } assert(len == 0); diff --git a/portability/tests/test-pwrite4g.cc b/portability/tests/test-pwrite4g.cc index 9bf50b9b6..8a2b98aa3 100644 --- a/portability/tests/test-pwrite4g.cc +++ b/portability/tests/test-pwrite4g.cc @@ -62,7 +62,7 @@ int test_main(int UU(argc), char *const UU(argv[])) { memset(buf, 0, 512); strcpy(buf, "hello"); int64_t offset = (1LL<<32) + 512; - toku_os_full_pwrite(fd, buf, 512, offset); + toku_os_full_pwrite(fd, buf, 512, offset, "test-pwrite4g"); char newbuf[512]; r = pread(fd, newbuf, sizeof newbuf, 100); assert(r==sizeof newbuf); diff --git a/portability/toku_portability.h b/portability/toku_portability.h index f127b0fe1..6985f4343 100644 --- a/portability/toku_portability.h +++ b/portability/toku_portability.h @@ -231,9 +231,19 @@ size_t os_malloc_usable_size(const void *p) __attribute__((__visibility__("defau // Effect: Return an estimate of the usable size inside a pointer. If this function is not defined the memory.cc will // look for the jemalloc, libc, or darwin versions of the function for computing memory footprint. -// full_pwrite and full_write performs a pwrite, and checks errors. It doesn't return unless all the data was written. */ -void toku_os_full_pwrite (int fd, const void *buf, size_t len, toku_off_t off) __attribute__((__visibility__("default"))); -void toku_os_full_write (int fd, const void *buf, size_t len) __attribute__((__visibility__("default"))); +// full_pwrite and full_write performs a pwrite, and checks errors. It doesn't +// return unless all the data was written. */ +void toku_os_full_pwrite(int fd, + const void *buf, + size_t len, + toku_off_t off, + const char *dbg_context) + __attribute__((__visibility__("default"))); +void toku_os_full_write(int fd, + const void *buf, + size_t len, + const char *dbg_context) + __attribute__((__visibility__("default"))); // os_write returns 0 on success, otherwise an errno. ssize_t toku_os_pwrite (int fd, const void *buf, size_t len, toku_off_t off) __attribute__((__visibility__("default")));