diff -r eaa1577ff396 include/mysql/plugin.h --- a/include/mysql/plugin.h Thu Sep 04 12:16:10 2008 -0700 +++ b/include/mysql/plugin.h Thu Sep 04 12:17:55 2008 -0700 @@ -684,6 +684,14 @@ /* Increments the row counter, see THD::row_count */ void thd_inc_row_count(MYSQL_THD thd); +void increment_thd_innodb_stats(MYSQL_THD thd, + long io_reads, + long long io_read, + long io_reads_wait_timer, + long lock_que_wait_timer, + long que_wait_timer, + long page_access); + /** Create a temporary file. diff -r eaa1577ff396 patch_info/innodb_stat.info --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/patch_info/innodb_stat.info Thu Sep 04 12:17:55 2008 -0700 @@ -0,0 +1,6 @@ +File=innodb_stat.patch +Name=Extended statistics of InnoDB in slow.log +Version=1.0 +Author=Percona +License=GPL +Comment= diff -r eaa1577ff396 sql/log.cc --- a/sql/log.cc Thu Sep 04 12:16:10 2008 -0700 +++ b/sql/log.cc Thu Sep 04 12:17:55 2008 -0700 @@ -981,6 +981,7 @@ if (!query_length) { thd->sent_row_count= thd->examined_row_count= 0; + thd->innodb_was_used= FALSE; thd->query_plan_flags= QPLAN_NONE; thd->query_plan_fsort_passes= 0; } @@ -2264,7 +2265,30 @@ ((thd->query_plan_flags & QPLAN_FILESORT) ? "Yes" : "No"), ((thd->query_plan_flags & QPLAN_FILESORT_DISK) ? "Yes" : "No"), thd->query_plan_fsort_passes) == (uint) -1) - tmp_errno=errno; + tmp_errno=errno; + if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) && thd->innodb_was_used) + { + char buff[5][20]; + sprintf(buff[2], "%.6f", thd->innodb_io_reads_wait_timer / 1000000.0); + sprintf(buff[3], "%.6f", thd->innodb_lock_que_wait_timer / 1000000.0); + sprintf(buff[4], "%.6f", thd->innodb_innodb_que_wait_timer / 1000000.0); + if (my_b_printf(&log_file, + "# InnoDB_IO_r_ops: %lu InnoDB_IO_r_bytes: %lu InnoDB_IO_r_wait: %s\n" \ + "# InnoDB_rec_lock_wait: %s InnoDB_queue_wait: %s\n" \ + "# InnoDB_pages_distinct: %lu\n", + (ulong) thd->innodb_io_reads, + (ulong) thd->innodb_io_read, + buff[2], buff[3], buff[4], + (ulong) thd->innodb_page_access) == (uint) -1) + tmp_errno=errno; + } + else + { + if ((thd->variables.log_slow_verbosity & SLOG_V_INNODB) && + my_b_printf(&log_file,"# No InnoDB statistics available for this query\n") == (uint) -1) + tmp_errno=errno; + } + if (thd->db && strcmp(thd->db, db)) { // Database changed if (my_b_printf(&log_file,"use %s;\n",thd->db) == (uint) -1) diff -r eaa1577ff396 sql/mysql_priv.h --- a/sql/mysql_priv.h Thu Sep 04 12:16:10 2008 -0700 +++ b/sql/mysql_priv.h Thu Sep 04 12:17:55 2008 -0700 @@ -605,6 +605,7 @@ }; #define SLOG_V_QUERY_PLAN 1 << 1 +#define SLOG_V_INNODB 1 << 2 /* ... */ #define SLOG_V_INVALID 1 << 31 #define SLOG_V_NONE 1 << 0 @@ -614,6 +615,7 @@ /* Basic flags */ { SLOG_V_QUERY_PLAN, "query_plan" }, + { SLOG_V_INNODB, "innodb" }, /* End of baisc flags */ @@ -622,7 +624,7 @@ /* Complex flags */ { SLOG_V_QUERY_PLAN, "standard" }, - { SLOG_V_INVALID, "full" }, + { SLOG_V_QUERY_PLAN|SLOG_V_INNODB, "full" }, /* End of complex flags */ diff -r eaa1577ff396 sql/sql_class.cc --- a/sql/sql_class.cc Thu Sep 04 12:16:10 2008 -0700 +++ b/sql/sql_class.cc Thu Sep 04 12:17:55 2008 -0700 @@ -304,6 +304,23 @@ void thd_inc_row_count(THD *thd) { thd->row_count++; +} + +extern "C" +void increment_thd_innodb_stats(THD* thd,long io_reads, + long long io_read, + long io_reads_wait_timer, + long lock_que_wait_timer, + long que_wait_timer, + long page_access) +{ + thd->innodb_was_used = TRUE; + thd->innodb_io_reads += io_reads; + thd->innodb_io_read += io_read; + thd->innodb_io_reads_wait_timer += io_reads_wait_timer; + thd->innodb_lock_que_wait_timer += lock_que_wait_timer; + thd->innodb_innodb_que_wait_timer += que_wait_timer; + thd->innodb_page_access += page_access; } /* @@ -2949,6 +2966,12 @@ first_successful_insert_id_in_prev_stmt; backup->first_successful_insert_id_in_cur_stmt= first_successful_insert_id_in_cur_stmt; + backup->innodb_io_reads= innodb_io_reads; + backup->innodb_io_read= innodb_io_read; + backup->innodb_io_reads_wait_timer= innodb_io_reads_wait_timer; + backup->innodb_lock_que_wait_timer= innodb_lock_que_wait_timer; + backup->innodb_innodb_que_wait_timer= innodb_innodb_que_wait_timer; + backup->innodb_page_access= innodb_page_access; if ((!lex->requires_prelocking() || is_update_query(lex->sql_command)) && !current_stmt_binlog_row_based) @@ -2968,6 +2991,12 @@ cuted_fields= 0; transaction.savepoints= 0; first_successful_insert_id_in_cur_stmt= 0; + innodb_io_reads= 0; + innodb_io_read= 0; + innodb_io_reads_wait_timer= 0; + innodb_lock_que_wait_timer= 0; + innodb_innodb_que_wait_timer= 0; + innodb_page_access= 0; } @@ -3028,8 +3057,13 @@ */ examined_row_count+= backup->examined_row_count; cuted_fields+= backup->cuted_fields; + innodb_io_reads+= backup->innodb_io_reads; + innodb_io_read+= backup->innodb_io_read; + innodb_io_reads_wait_timer+= backup->innodb_io_reads_wait_timer; + innodb_lock_que_wait_timer+= backup->innodb_lock_que_wait_timer; + innodb_innodb_que_wait_timer+= backup->innodb_innodb_que_wait_timer; + innodb_page_access+= backup->innodb_page_access; } - /** Mark transaction to rollback and mark error as fatal to a sub-statement. diff -r eaa1577ff396 sql/sql_class.h --- a/sql/sql_class.h Thu Sep 04 12:16:10 2008 -0700 +++ b/sql/sql_class.h Thu Sep 04 12:17:55 2008 -0700 @@ -400,7 +400,12 @@ ulong log_slow_rate_limit; ulong log_slow_filter; ulong log_slow_verbosity; - + ulong innodb_io_reads; + ulonglong innodb_io_read; + ulong innodb_io_reads_wait_timer; + ulong innodb_lock_que_wait_timer; + ulong innodb_innodb_que_wait_timer; + ulong innodb_page_access; }; @@ -985,6 +990,14 @@ uint in_sub_stmt; bool enable_slow_log; bool last_insert_id_used; + + ulong innodb_io_reads; + ulonglong innodb_io_read; + ulong innodb_io_reads_wait_timer; + ulong innodb_lock_que_wait_timer; + ulong innodb_innodb_que_wait_timer; + ulong innodb_page_access; + SAVEPOINT *savepoints; }; @@ -1328,6 +1341,14 @@ Delayed_insert *di; bool write_to_slow_log; + + bool innodb_was_used; + ulong innodb_io_reads; + ulonglong innodb_io_read; + ulong innodb_io_reads_wait_timer; + ulong innodb_lock_que_wait_timer; + ulong innodb_innodb_que_wait_timer; + ulong innodb_page_access; ulong query_plan_flags; ulong query_plan_fsort_passes; diff -r eaa1577ff396 sql/sql_parse.cc --- a/sql/sql_parse.cc Thu Sep 04 12:16:10 2008 -0700 +++ b/sql/sql_parse.cc Thu Sep 04 12:17:55 2008 -0700 @@ -5662,6 +5662,14 @@ thd->rand_used= 0; thd->sent_row_count= thd->examined_row_count= 0; + thd->innodb_was_used= FALSE; + thd->innodb_io_reads= 0; + thd->innodb_io_read= 0; + thd->innodb_io_reads_wait_timer= 0; + thd->innodb_lock_que_wait_timer= 0; + thd->innodb_innodb_que_wait_timer= 0; + thd->innodb_page_access= 0; + thd->query_plan_flags= QPLAN_NONE; thd->query_plan_fsort_passes= 0; diff -r eaa1577ff396 storage/innobase/buf/buf0buf.c --- a/storage/innobase/buf/buf0buf.c Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/buf/buf0buf.c Thu Sep 04 12:17:55 2008 -0700 @@ -39,6 +39,7 @@ #include "trx0undo.h" #include "srv0srv.h" #include "page0zip.h" +#include "thr0loc.h" /* IMPLEMENTATION OF THE BUFFER POOL @@ -1530,6 +1531,33 @@ } #endif /* UNIV_DEBUG_FILE_ACCESSES */ + +inline void _increment_page_get_statistics(buf_page_t* bpage, trx_t* trx) +{ + ulint bpage_hash; + ulint bpage_hash_byte; + byte bpage_hash_offset; + + ut_ad(bpage); + + if (!trx || !trx->distinct_page_access_hash) + return; + + bpage_hash = ut_hash_ulint((bpage->space << 20) + bpage->space + + bpage->offset, DPAH_SIZE << 3); + bpage_hash_byte = bpage_hash >> 3; + bpage_hash_offset = (byte) bpage_hash & 0x07; + if (bpage_hash_byte < 0 || bpage_hash_byte >= DPAH_SIZE) + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %u !!!\n", bpage_hash_byte, bpage_hash_offset); + if (bpage_hash_offset < 0 || bpage_hash_offset > 7) + fprintf(stderr, "!!! block_hash_byte = %lu block_hash_offset = %u !!!\n", bpage_hash_byte, bpage_hash_offset); + if ((trx->distinct_page_access_hash[bpage_hash_byte] & ((byte) 0x01 << bpage_hash_offset)) == 0) + trx->distinct_page_access++; + trx->distinct_page_access_hash[bpage_hash_byte] |= (byte) 0x01 << bpage_hash_offset; + return; +} + + /************************************************************************ Get read access to a compressed page (usually of type FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2). @@ -1550,10 +1578,16 @@ buf_page_t* bpage; mutex_t* block_mutex; ibool must_read; + trx_t* trx; + ulint sec; + ulint ms; + ib_int64_t start_time; + ib_int64_t finish_time; #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside()); #endif + trx = thr_local_get_trx(os_thread_get_curr_id()); buf_pool->n_page_gets++; for (;;) { @@ -1568,7 +1602,7 @@ buf_pool_mutex_exit(); - buf_read_page(space, zip_size, offset); + buf_read_page(space, zip_size, offset, trx); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 37 || buf_validate()); @@ -1634,6 +1668,12 @@ /* Let us wait until the read operation completes */ + if (trx) + { + ut_usectime(&sec, &ms); + start_time = (ib_int64_t)sec * 1000000 + ms; + } + for (;;) { enum buf_io_fix io_fix; @@ -1648,12 +1688,21 @@ break; } } + if (trx) + { + ut_usectime(&sec, &ms); + finish_time = (ib_int64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } } #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(buf_page_get_space(bpage), buf_page_get_page_no(bpage)) == 0); #endif + + _increment_page_get_statistics(bpage, trx); + return(bpage); } @@ -1775,7 +1824,6 @@ return(FALSE); } - /************************************************************************ This is the general function used to get access to a database page. */ UNIV_INTERN @@ -1799,6 +1847,11 @@ ibool accessed; ulint fix_type; ibool must_read; + trx_t* trx; + ulint sec; + ulint ms; + ib_int64_t start_time; + ib_int64_t finish_time; ut_ad(mtr); ut_ad((rw_latch == RW_S_LATCH) @@ -1811,6 +1864,7 @@ #ifndef UNIV_LOG_DEBUG ut_ad(!ibuf_inside() || ibuf_page(space, zip_size, offset)); #endif + trx = thr_local_get_trx(os_thread_get_curr_id()); buf_pool->n_page_gets++; loop: block = guess; @@ -1852,7 +1906,7 @@ return(NULL); } - buf_read_page(space, zip_size, offset); + buf_read_page(space, zip_size, offset, trx); #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG ut_a(++buf_dbg_counter % 37 || buf_validate()); @@ -2083,6 +2137,11 @@ /* Let us wait until the read operation completes */ + if (trx) + { + ut_usectime(&sec, &ms); + start_time = (ib_int64_t)sec * 1000000 + ms; + } for (;;) { enum buf_io_fix io_fix; @@ -2096,6 +2155,12 @@ } else { break; } + } + if (trx) + { + ut_usectime(&sec, &ms); + finish_time = (ib_int64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); } } @@ -2117,13 +2182,16 @@ /* In the case of a first access, try to apply linear read-ahead */ - buf_read_ahead_linear(space, zip_size, offset); + buf_read_ahead_linear(space, zip_size, offset, trx); } #ifdef UNIV_IBUF_COUNT_DEBUG ut_a(ibuf_count_get(buf_block_get_space(block), buf_block_get_page_no(block)) == 0); #endif + + _increment_page_get_statistics(&block->page, trx); + return(block); } @@ -2146,6 +2214,7 @@ ibool accessed; ibool success; ulint fix_type; + trx_t* trx; ut_ad(mtr && block); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); @@ -2220,13 +2289,16 @@ #ifdef UNIV_DEBUG_FILE_ACCESSES ut_a(block->page.file_page_was_freed == FALSE); #endif + + trx = thr_local_get_trx(os_thread_get_curr_id()); + if (UNIV_UNLIKELY(!accessed)) { /* In the case of a first access, try to apply linear read-ahead */ buf_read_ahead_linear(buf_block_get_space(block), buf_block_get_zip_size(block), - buf_block_get_page_no(block)); + buf_block_get_page_no(block), trx); } #ifdef UNIV_IBUF_COUNT_DEBUG @@ -2234,6 +2306,8 @@ buf_block_get_page_no(block)) == 0); #endif buf_pool->n_page_gets++; + + _increment_page_get_statistics(&block->page, trx); return(TRUE); } @@ -2256,6 +2330,7 @@ { ibool success; ulint fix_type; + trx_t* trx; ut_ad(mtr); ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); diff -r eaa1577ff396 storage/innobase/buf/buf0rea.c --- a/storage/innobase/buf/buf0rea.c Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/buf/buf0rea.c Thu Sep 04 12:17:55 2008 -0700 @@ -72,7 +72,8 @@ treat the tablespace as dropped; this is a timestamp we use to stop dangling page reads from a tablespace which we have DISCARDed + IMPORTed back */ - ulint offset) /* in: page number */ + ulint offset, /* in: page number */ + trx_t* trx) { buf_page_t* bpage; ulint wake_later; @@ -133,15 +134,15 @@ ut_ad(buf_page_in_file(bpage)); if (zip_size) { - *err = fil_io(OS_FILE_READ | wake_later, + *err = _fil_io(OS_FILE_READ | wake_later, sync, space, zip_size, offset, 0, zip_size, - bpage->zip.data, bpage); + bpage->zip.data, bpage, trx); } else { ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); - *err = fil_io(OS_FILE_READ | wake_later, + *err = _fil_io(OS_FILE_READ | wake_later, sync, space, 0, offset, 0, UNIV_PAGE_SIZE, - ((buf_block_t*) bpage)->frame, bpage); + ((buf_block_t*) bpage)->frame, bpage, trx); } ut_a(*err == DB_SUCCESS); @@ -174,8 +175,9 @@ read even if we return a value > 0! */ ulint space, /* in: space id */ ulint zip_size,/* in: compressed page size in bytes, or 0 */ - ulint offset) /* in: page number of a page which the current thread + ulint offset, /* in: page number of a page which the current thread wants to access */ + trx_t* trx) { ib_int64_t tablespace_version; ulint recent_blocks = 0; @@ -278,7 +280,7 @@ &err, FALSE, ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, space, zip_size, FALSE, - tablespace_version, i); + tablespace_version, i, trx); if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fprintf(stderr, @@ -325,7 +327,8 @@ be > 1 if read-ahead occurred */ ulint space, /* in: space id */ ulint zip_size,/* in: compressed page size in bytes, or 0 */ - ulint offset) /* in: page number */ + ulint offset, /* in: page number */ + trx_t* trx) { ib_int64_t tablespace_version; ulint count; @@ -334,14 +337,14 @@ tablespace_version = fil_space_get_version(space); - count = buf_read_ahead_random(space, zip_size, offset); + count = buf_read_ahead_random(space, zip_size, offset, trx); /* We do the i/o in the synchronous aio mode to save thread switches: hence TRUE */ count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, zip_size, FALSE, - tablespace_version, offset); + tablespace_version, offset, trx); srv_buf_pool_reads+= count2; if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); @@ -392,8 +395,9 @@ /* out: number of page read requests issued */ ulint space, /* in: space id */ ulint zip_size,/* in: compressed page size in bytes, or 0 */ - ulint offset) /* in: page number of a page; NOTE: the current thread + ulint offset, /* in: page number of a page; NOTE: the current thread must want access to this page (see NOTE 3 above) */ + trx_t* trx) { ib_int64_t tablespace_version; buf_page_t* bpage; @@ -590,7 +594,7 @@ count += buf_read_page_low( &err, FALSE, ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, - space, zip_size, FALSE, tablespace_version, i); + space, zip_size, FALSE, tablespace_version, i, trx); if (err == DB_TABLESPACE_DELETED) { ut_print_timestamp(stderr); fprintf(stderr, @@ -680,7 +684,7 @@ buf_read_page_low(&err, sync && (i + 1 == n_stored), BUF_READ_ANY_PAGE, space_ids[i], zip_size, TRUE, space_versions[i], - page_nos[i]); + page_nos[i], NULL); if (UNIV_UNLIKELY(err == DB_TABLESPACE_DELETED)) { tablespace_deleted: @@ -769,12 +773,12 @@ if ((i + 1 == n_stored) && sync) { buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, zip_size, TRUE, tablespace_version, - page_nos[i]); + page_nos[i], NULL); } else { buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE | OS_AIO_SIMULATED_WAKE_LATER, space, zip_size, TRUE, - tablespace_version, page_nos[i]); + tablespace_version, page_nos[i], NULL); } } diff -r eaa1577ff396 storage/innobase/fil/fil0fil.c --- a/storage/innobase/fil/fil0fil.c Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/fil/fil0fil.c Thu Sep 04 12:17:55 2008 -0700 @@ -3923,7 +3923,7 @@ node->name, node->handle, buf, offset_low, offset_high, page_size * n_pages, - NULL, NULL); + NULL, NULL, NULL); #endif if (success) { node->size += n_pages; @@ -4251,7 +4251,7 @@ Reads or writes data. This operation is asynchronous (aio). */ UNIV_INTERN ulint -fil_io( +_fil_io( /*===*/ /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace @@ -4279,8 +4279,9 @@ void* buf, /* in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ - void* message) /* in: message for aio handler if non-sync + void* message, /* in: message for aio handler if non-sync aio used, else ignored */ + trx_t* trx) { fil_system_t* system = fil_system; ulint mode; @@ -4447,7 +4448,7 @@ #else /* Queue the aio request */ ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset_low, offset_high, len, node, message); + offset_low, offset_high, len, node, message, trx); #endif ut_a(ret); diff -r eaa1577ff396 storage/innobase/handler/ha_innodb.cc --- a/storage/innobase/handler/ha_innodb.cc Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/handler/ha_innodb.cc Thu Sep 04 12:17:55 2008 -0700 @@ -7476,6 +7476,22 @@ statement has ended */ if (trx->n_mysql_tables_in_use == 0) { + increment_thd_innodb_stats(thd, trx->io_reads, + trx->io_read, + trx->io_reads_wait_timer, + trx->lock_que_wait_timer, + trx->innodb_que_wait_timer, + trx->distinct_page_access + ); + + trx->io_reads = 0; + trx->io_read = 0; + trx->io_reads_wait_timer = 0; + trx->lock_que_wait_timer = 0; + trx->innodb_que_wait_timer = 0; + trx->distinct_page_access = 0; + if (trx->distinct_page_access_hash) + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); trx->mysql_n_tables_locked = 0; prebuilt->used_in_HANDLER = FALSE; diff -r eaa1577ff396 storage/innobase/include/buf0rea.h --- a/storage/innobase/include/buf0rea.h Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/include/buf0rea.h Thu Sep 04 12:17:55 2008 -0700 @@ -10,6 +10,7 @@ #define buf0rea_h #include "univ.i" +#include "trx0types.h" #include "buf0types.h" /************************************************************************ @@ -26,7 +27,9 @@ be > 1 if read-ahead occurred */ ulint space, /* in: space id */ ulint zip_size,/* in: compressed page size in bytes, or 0 */ - ulint offset);/* in: page number */ + ulint offset, /* in: page number */ + trx_t* trx); + /************************************************************************ Applies linear read-ahead if in the buf_pool the page is a border page of a linear read-ahead area and all the pages in the area have been accessed. @@ -57,8 +60,10 @@ /* out: number of page read requests issued */ ulint space, /* in: space id */ ulint zip_size,/* in: compressed page size in bytes, or 0 */ - ulint offset);/* in: page number of a page; NOTE: the current thread + ulint offset, /* in: page number of a page; NOTE: the current thread must want access to this page (see NOTE 3 above) */ + trx_t* trx); + /************************************************************************ Issues read requests for pages which the ibuf module wants to read in, in order to contract the insert buffer tree. Technically, this function is like diff -r eaa1577ff396 storage/innobase/include/fil0fil.h --- a/storage/innobase/include/fil0fil.h Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/include/fil0fil.h Thu Sep 04 12:17:55 2008 -0700 @@ -595,9 +595,13 @@ ulint id); /* in: space id */ /************************************************************************ Reads or writes data. This operation is asynchronous (aio). */ + +#define fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message) \ + _fil_io(type, sync, space_id, zip_size, block_offset, byte_offset, len, buf, message, NULL) + UNIV_INTERN ulint -fil_io( +_fil_io( /*===*/ /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do i/o on a tablespace @@ -625,8 +629,10 @@ void* buf, /* in/out: buffer where to store read data or from where to write; in aio this must be appropriately aligned */ - void* message); /* in: message for aio handler if non-sync + void* message, /* in: message for aio handler if non-sync aio used, else ignored */ + trx_t* trx); + /************************************************************************** Waits for an aio operation to complete. This function is used to write the handler for completed requests. The aio array of pending requests is divided diff -r eaa1577ff396 storage/innobase/include/os0file.h --- a/storage/innobase/include/os0file.h Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/include/os0file.h Thu Sep 04 12:17:55 2008 -0700 @@ -10,6 +10,8 @@ #define os0file_h #include "univ.i" + +#include "trx0types.h" #ifndef __WIN__ #include @@ -436,9 +438,13 @@ printed of all errors */ /*********************************************************************** Requests a synchronous read operation. */ + +#define os_file_read(file, buf, offset, offset_high, n) \ + _os_file_read(file, buf, offset, offset_high, n, NULL) + UNIV_INTERN ibool -os_file_read( +_os_file_read( /*=========*/ /* out: TRUE if request was successful, FALSE if fail */ @@ -448,7 +454,8 @@ offset where to read */ ulint offset_high,/* in: most significant 32 bits of offset */ - ulint n); /* in: number of bytes to read */ + ulint n, /* in: number of bytes to read */ + trx_t* trx); /*********************************************************************** Rewind file to its start, read at most size - 1 bytes from it to str, and NUL-terminate str. All errors are silently ignored. This function is @@ -600,7 +607,8 @@ can be used to identify a completed aio operation); if mode is OS_AIO_SYNC, these are ignored */ - void* message2); + void* message2, + trx_t* trx); /**************************************************************************** Wakes up all async i/o threads so that they know to exit themselves in shutdown. */ diff -r eaa1577ff396 storage/innobase/include/thr0loc.h --- a/storage/innobase/include/thr0loc.h Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/include/thr0loc.h Thu Sep 04 12:17:55 2008 -0700 @@ -15,6 +15,7 @@ #include "univ.i" #include "os0thread.h" +#include "trx0trx.h" /******************************************************************** Initializes the thread local storage module. */ @@ -36,6 +37,14 @@ /*===========*/ os_thread_id_t id); /* in: thread id */ /*********************************************************************** +Gets trx */ + +trx_t* +thr_local_get_trx( +/*==================*/ + /* out: trx for mysql */ + os_thread_id_t id); /* in: thread id of the thread */ +/*********************************************************************** Gets the slot number in the thread table of a thread. */ UNIV_INTERN ulint @@ -43,6 +52,14 @@ /*==================*/ /* out: slot number */ os_thread_id_t id); /* in: thread id of the thread */ +/*********************************************************************** +Sets in the local storage the slot number in the thread table of a thread. */ + +void +thr_local_set_trx( +/*==================*/ + os_thread_id_t id, /* in: thread id of the thread */ + trx_t* trx); /* in: slot number */ /*********************************************************************** Sets in the local storage the slot number in the thread table of a thread. */ UNIV_INTERN diff -r eaa1577ff396 storage/innobase/include/trx0trx.h --- a/storage/innobase/include/trx0trx.h Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/include/trx0trx.h Thu Sep 04 12:17:55 2008 -0700 @@ -732,6 +732,17 @@ /*------------------------------*/ char detailed_error[256]; /* detailed error message for last error, or empty. */ + /*------------------------------*/ + os_thread_id_t trx_thread_id; + ulint io_reads; + ib_uint64_t io_read; + ulint io_reads_wait_timer; + ib_uint64_t lock_que_wait_ustarted; + ulint lock_que_wait_timer; + ulint innodb_que_wait_timer; + ulint distinct_page_access; +#define DPAH_SIZE 8192 + byte* distinct_page_access_hash; }; #define TRX_MAX_N_THREADS 32 /* maximum number of diff -r eaa1577ff396 storage/innobase/lock/lock0lock.c --- a/storage/innobase/lock/lock0lock.c Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/lock/lock0lock.c Thu Sep 04 12:17:55 2008 -0700 @@ -1738,6 +1738,8 @@ { lock_t* lock; trx_t* trx; + ulint sec; + ulint ms; ut_ad(mutex_own(&kernel_mutex)); @@ -1796,6 +1798,8 @@ trx->que_state = TRX_QUE_LOCK_WAIT; trx->was_chosen_as_deadlock_victim = FALSE; trx->wait_started = time(NULL); + ut_usectime(&sec, &ms); + trx->lock_que_wait_ustarted = (ib_int64_t)sec * 1000000 + ms; ut_a(que_thr_stop(thr)); @@ -3610,6 +3614,8 @@ { lock_t* lock; trx_t* trx; + ulint sec; + ulint ms; ut_ad(mutex_own(&kernel_mutex)); @@ -3663,6 +3669,9 @@ return(DB_SUCCESS); } + trx->wait_started = time(NULL); + ut_usectime(&sec, &ms); + trx->lock_que_wait_ustarted = (ib_int64_t)sec * 1000000 + ms; trx->que_state = TRX_QUE_LOCK_WAIT; trx->was_chosen_as_deadlock_victim = FALSE; trx->wait_started = time(NULL); @@ -4351,7 +4360,7 @@ ulint i; mtr_t mtr; trx_t* trx; - + fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n"); /* First print info on non-active transactions */ diff -r eaa1577ff396 storage/innobase/os/os0file.c --- a/storage/innobase/os/os0file.c Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/os/os0file.c Thu Sep 04 12:17:55 2008 -0700 @@ -14,6 +14,7 @@ #include "srv0start.h" #include "fil0fil.h" #include "buf0buf.h" +#include "trx0sys.h" #if defined(UNIV_HOTBACKUP) && defined(__WIN__) /* Add includes for the _stat() call to compile on Windows */ @@ -100,6 +101,7 @@ struct aiocb control; /* Posix control block for aio request */ #endif + trx_t* trx; }; /* The aio array structure */ @@ -1947,9 +1949,13 @@ #ifndef __WIN__ /*********************************************************************** Does a synchronous read operation in Posix. */ + +#define os_file_pread(file, buf, n, offset, offset_high) \ + _os_file_pread(file, buf, n, offset, offset_high, NULL); + static ssize_t -os_file_pread( +_os_file_pread( /*==========*/ /* out: number of bytes read, -1 if error */ os_file_t file, /* in: handle to a file */ @@ -1957,12 +1963,17 @@ ulint n, /* in: number of bytes to read */ ulint offset, /* in: least significant 32 bits of file offset from where to read */ - ulint offset_high) /* in: most significant 32 bits of - offset */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + trx_t* trx) { off_t offs; ssize_t n_bytes; - + ulint sec; + ulint ms; + ib_int64_t start_time; + ib_int64_t finish_time; + ut_a((offset & 0xFFFFFFFFUL) == offset); /* If off_t is > 4 bytes in size, then we assume we can pass a @@ -1981,7 +1992,13 @@ } os_n_file_reads++; - + if (trx) + { + trx->io_reads++; + trx->io_read += n; + ut_usectime(&sec, &ms); + start_time = (ib_int64_t)sec * 1000000 + ms; + } #if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD) os_mutex_enter(os_file_count_mutex); os_file_n_pending_preads++; @@ -1994,6 +2011,13 @@ os_file_n_pending_preads--; os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); + + if (trx) + { + ut_usectime(&sec, &ms); + finish_time = (ib_int64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } return(n_bytes); #else @@ -2025,6 +2049,13 @@ os_n_pending_reads--; os_mutex_exit(os_file_count_mutex); + if (trx) + { + ut_usectime(&sec, &ms); + finish_time = (ib_int64_t)sec * 1000000 + ms; + trx->io_reads_wait_timer += (ulint)(finish_time - start_time); + } + return(ret); } #endif @@ -2148,7 +2179,7 @@ Requests a synchronous positioned read operation. */ UNIV_INTERN ibool -os_file_read( +_os_file_read( /*=========*/ /* out: TRUE if request was successful, FALSE if fail */ @@ -2158,7 +2189,8 @@ offset where to read */ ulint offset_high, /* in: most significant 32 bits of offset */ - ulint n) /* in: number of bytes to read */ + ulint n, /* in: number of bytes to read */ + trx_t* trx) { #ifdef __WIN__ BOOL ret; @@ -2173,7 +2205,6 @@ os_n_file_reads++; os_bytes_read_since_printout += n; - try_again: ut_ad(file); ut_ad(buf); @@ -2222,7 +2253,7 @@ os_bytes_read_since_printout += n; try_again: - ret = os_file_pread(file, buf, n, offset, offset_high); + ret = _os_file_pread(file, buf, n, offset, offset_high, trx); if ((ulint)ret == n) { @@ -3204,7 +3235,8 @@ offset */ ulint offset_high, /* in: most significant 32 bits of offset */ - ulint len) /* in: length of the block to read or write */ + ulint len, /* in: length of the block to read or write */ + trx_t* trx) { os_aio_slot_t* slot; #ifdef WIN_ASYNC_IO @@ -3457,7 +3489,8 @@ can be used to identify a completed aio operation); if mode is OS_AIO_SYNC, these are ignored */ - void* message2) + void* message2, + trx_t* trx) { os_aio_array_t* array; os_aio_slot_t* slot; @@ -3496,8 +3529,8 @@ wait in the Windows case. */ if (type == OS_FILE_READ) { - return(os_file_read(file, buf, offset, - offset_high, n)); + return(_os_file_read(file, buf, offset, + offset_high, n, trx)); } ut_a(type == OS_FILE_WRITE); @@ -3530,8 +3563,13 @@ ut_error; } + if (trx && type == OS_FILE_READ) + { + trx->io_reads++; + trx->io_read += n; + } slot = os_aio_array_reserve_slot(type, array, message1, message2, file, - name, buf, offset, offset_high, n); + name, buf, offset, offset_high, n, trx); if (type == OS_FILE_READ) { if (os_aio_use_native_aio) { #ifdef WIN_ASYNC_IO @@ -4068,7 +4106,7 @@ ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs, consecutive_ios[i]->len); - offs += consecutive_ios[i]->len; + offs += consecutive_ios[i]->len; } } @@ -4080,9 +4118,8 @@ /* Mark the i/os done in slots */ - for (i = 0; i < n_consecutive; i++) { + for (i = 0; i < n_consecutive; i++) consecutive_ios[i]->io_already_done = TRUE; - } /* We return the messages for the first slot now, and if there were several slots, the messages will be returned with subsequent calls diff -r eaa1577ff396 storage/innobase/srv/srv0srv.c --- a/storage/innobase/srv/srv0srv.c Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/srv/srv0srv.c Thu Sep 04 12:17:55 2008 -0700 @@ -965,6 +965,10 @@ ibool has_slept = FALSE; srv_conc_slot_t* slot = NULL; ulint i; + ib_int64_t start_time = 0L; + ib_int64_t finish_time = 0L; + ulint sec; + ulint ms; if (trx->mysql_thd != NULL && thd_is_replication_slave_thread(trx->mysql_thd)) { @@ -1032,6 +1036,7 @@ switches. */ if (SRV_THREAD_SLEEP_DELAY > 0) { os_thread_sleep(SRV_THREAD_SLEEP_DELAY); + trx->innodb_que_wait_timer += SRV_THREAD_SLEEP_DELAY; } trx->op_info = ""; @@ -1087,11 +1092,18 @@ /* Go to wait for the event; when a thread leaves InnoDB it will release this thread */ + ut_usectime(&sec, &ms); + start_time = (ib_int64_t)sec * 1000000 + ms; + trx->op_info = "waiting in InnoDB queue"; os_event_wait(slot->event); trx->op_info = ""; + + ut_usectime(&sec, &ms); + finish_time = (ib_int64_t)sec * 1000000 + ms; + trx->innodb_que_wait_timer += (ulint)(finish_time - start_time); os_fast_mutex_lock(&srv_conc_mutex); diff -r eaa1577ff396 storage/innobase/thr/thr0loc.c --- a/storage/innobase/thr/thr0loc.c Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/thr/thr0loc.c Thu Sep 04 12:17:55 2008 -0700 @@ -45,6 +45,7 @@ for this thread */ ibool in_ibuf;/* TRUE if the the thread is doing an ibuf operation */ + trx_t* trx; hash_node_t hash; /* hash chain node */ ulint magic_n; }; @@ -111,6 +112,29 @@ } /*********************************************************************** +Gets trx */ + +trx_t* +thr_local_get_trx( +/*==================*/ + /* out: trx for mysql */ + os_thread_id_t id) /* in: thread id of the thread */ +{ + trx_t* trx; + thr_local_t* local; + + mutex_enter(&thr_local_mutex); + + local = thr_local_get(id); + + trx = local->trx; + + mutex_exit(&thr_local_mutex); + + return(trx); +} + +/*********************************************************************** Sets the slot number in the thread table of a thread. */ UNIV_INTERN void @@ -126,6 +150,26 @@ local = thr_local_get(id); local->slot_no = slot_no; + + mutex_exit(&thr_local_mutex); +} + +/*********************************************************************** +Sets trx */ + +void +thr_local_set_trx( +/*==================*/ + os_thread_id_t id, /* in: thread id of the thread */ + trx_t* trx) /* in: trx */ +{ + thr_local_t* local; + + mutex_enter(&thr_local_mutex); + + local = thr_local_get(id); + + local->trx = trx; mutex_exit(&thr_local_mutex); } @@ -170,6 +214,7 @@ local->magic_n = THR_LOCAL_MAGIC_N; local->in_ibuf = FALSE; + local->trx = NULL; mutex_enter(&thr_local_mutex); diff -r eaa1577ff396 storage/innobase/trx/trx0trx.c --- a/storage/innobase/trx/trx0trx.c Thu Sep 04 12:16:10 2008 -0700 +++ b/storage/innobase/trx/trx0trx.c Thu Sep 04 12:17:55 2008 -0700 @@ -158,6 +158,16 @@ trx->global_read_view_heap = mem_heap_create(256); trx->global_read_view = NULL; trx->read_view = NULL; + + trx->io_reads = 0; + trx->io_read = 0; + trx->io_reads_wait_timer = 0; + trx->lock_que_wait_timer = 0; + trx->innodb_que_wait_timer = 0; + trx->distinct_page_access = 0; + trx->distinct_page_access_hash = NULL; + trx->trx_thread_id = os_thread_get_curr_id(); + thr_local_set_trx(trx->trx_thread_id, NULL); /* Set X/Open XA transaction identification to NULL */ memset(&trx->xid, 0, sizeof(trx->xid)); @@ -193,6 +203,10 @@ trx->mysql_thread_id = os_thread_get_curr_id(); trx->mysql_process_no = os_proc_get_number(); + + trx->distinct_page_access_hash = mem_alloc(DPAH_SIZE); + memset(trx->distinct_page_access_hash, 0, DPAH_SIZE); + thr_local_set_trx(trx->mysql_thread_id, trx); return(trx); } @@ -306,6 +320,8 @@ ut_a(trx->read_view == NULL); + thr_local_free(trx->trx_thread_id); + mem_free(trx); } @@ -317,6 +333,12 @@ /*===============*/ trx_t* trx) /* in, own: trx object */ { + if (trx->distinct_page_access_hash) + { + mem_free(trx->distinct_page_access_hash); + trx->distinct_page_access_hash= NULL; + } + mutex_enter(&kernel_mutex); UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx); @@ -1008,6 +1030,9 @@ trx_t* trx) /* in: transaction */ { que_thr_t* thr; + ulint sec; + ulint ms; + ib_int64_t now; ut_ad(mutex_own(&kernel_mutex)); ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); @@ -1022,6 +1047,9 @@ thr = UT_LIST_GET_FIRST(trx->wait_thrs); } + ut_usectime(&sec, &ms); + now = (ib_int64_t)sec * 1000000 + ms; + trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); trx->que_state = TRX_QUE_RUNNING; } @@ -1035,6 +1063,9 @@ trx_t* trx) /* in: transaction in the TRX_QUE_LOCK_WAIT state */ { que_thr_t* thr; + ulint sec; + ulint ms; + ib_int64_t now; ut_ad(mutex_own(&kernel_mutex)); ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); @@ -1049,6 +1080,9 @@ thr = UT_LIST_GET_FIRST(trx->wait_thrs); } + ut_usectime(&sec, &ms); + now = (ib_int64_t)sec * 1000000 + ms; + trx->lock_que_wait_timer += (ulint)(now - trx->lock_que_wait_ustarted); trx->que_state = TRX_QUE_RUNNING; }