Add new mutex page inspired by: http://www.percona.com/blog/2015/01/06/getting-mutex...
authorSimon J Mudd <sjmudd@pobox.com>
Wed, 7 Jan 2015 22:44:19 +0000 (23:44 +0100)
committerSimon J Mudd <sjmudd@pobox.com>
Wed, 7 Jan 2015 22:44:19 +0000 (23:44 +0100)
BUGS
README.md
main.go
p_s/events_waits_summary_global_by_event_name/events_waits_summary_global_by_event_name.go [new file with mode: 0644]
p_s/events_waits_summary_global_by_event_name/events_waits_summary_global_by_event_name_row.go [new file with mode: 0644]
p_s/events_waits_summary_global_by_event_name/readme.txt [new file with mode: 0644]
state/state.go
version/version.go

diff --git a/BUGS b/BUGS
index 8ef7fba..eb9a34a 100644 (file)
--- a/BUGS
+++ b/BUGS
@@ -29,3 +29,15 @@ xxxxxx.xxxxxxxx                   15.71 s   1.6%| 99.6%   0.4%
 
 with a very long latency. Related to doing backups. Need to check the cause
 of the 5124.10h ++ issue. Probably a subtraction problem of some sort.
+
+4. Mutex page needs to configure P_S and restore confiuration on exit.
+
+This is: set the appropriate settings such as
+
+UPDATE setup_instruments
+SET enabled = 'YES', timed = 'YES'
+WHERE NAME LIKE 'wait/synch/mutex/innodb/%';
+
+However, on exit, it should restore the previous settings back to
+what they were before.  Currently if you've not set p_s appropriately you
+won't get any useful information out of this page.
index 68e7f82..097450a 100644 (file)
--- a/README.md
+++ b/README.md
@@ -57,6 +57,7 @@ some info but if the queries are very short then the integer runtime
 in seconds makes the output far less interesting. Total idle time is also
 shown as this gives an indication of perhaps overly long idle queries,
 and the sum of the values here if there's a pile up may be interesting.
+* Mutex mode: show the ordering by mutex latency (currently requires correct P_S setup).
 
 You can change the polling interval and switch between modes (see below).
 
@@ -70,7 +71,7 @@ The following keys allow you to navigate around the different pstop displays or
 * q - quit
 * t - toggle between showing the statistics since resetting pstop started or you explicitly reset them (with 'z') [REL] or showing the statistics as collected from MySQL [ABS].
 * z - reset statistics. That is counters you see are relative to when you "reset" statistics.
-* <tab> - change display modes between: latency, ops, file I/O, lock modes and user modes.
+* <tab> - change display modes between: latency, ops, file I/O, lock, user and mutex modes.
 * left arrow - change to previous screen
 * right arrow - change to next screen
 
diff --git a/main.go b/main.go
index c7de87b..0197e83 100644 (file)
--- a/main.go
+++ b/main.go
@@ -116,6 +116,7 @@ func usage() {
 // be found.
 func validate_mysql_version(dbh *sql.DB) error {
        var tables = [...]string{
+               "performance_schema.events_waits_summary_global_by_event_name",
                "performance_schema.file_summary_by_instance",
                "performance_schema.table_io_waits_summary_by_table",
                "performance_schema.table_lock_waits_summary_by_table",
diff --git a/p_s/events_waits_summary_global_by_event_name/events_waits_summary_global_by_event_name.go b/p_s/events_waits_summary_global_by_event_name/events_waits_summary_global_by_event_name.go
new file mode 100644 (file)
index 0000000..7560cdb
--- /dev/null
@@ -0,0 +1,141 @@
+// p_s - library routines for pstop.
+//
+// This file contains the library routines for managing the
+// events_waits_summary_global_by_event_name table.
+package events_waits_summary_global_by_event_name
+
+import (
+       "database/sql"
+       "fmt"
+       "time"
+
+       "github.com/sjmudd/pstop/lib"
+       "github.com/sjmudd/pstop/p_s"
+)
+
+// a table of rows
+type Table_events_waits_summary_global_by_event_name struct {
+       p_s.RelativeStats
+       p_s.InitialTime
+       want_latency bool
+       initial      events_waits_summary_global_by_event_name_rows // initial data for relative values
+       current      events_waits_summary_global_by_event_name_rows // last loaded values
+       results      events_waits_summary_global_by_event_name_rows // results (maybe with subtraction)
+       totals       events_waits_summary_global_by_event_name_row  // totals of results
+}
+
+func (t *Table_events_waits_summary_global_by_event_name) SetWantsLatency(want_latency bool) {
+       t.want_latency = want_latency
+}
+
+func (t Table_events_waits_summary_global_by_event_name) WantsLatency() bool {
+       return t.want_latency
+}
+
+// Collect() collects data from the db, updating initial
+// values if needed, and then subtracting initial values if we want
+// relative values, after which it stores totals.
+func (t *Table_events_waits_summary_global_by_event_name) Collect(dbh *sql.DB) {
+       start := time.Now()
+       // lib.Logger.Println("Table_events_waits_summary_global_by_event_name.Collect() BEGIN")
+       t.current = select_tiwsbt_rows(dbh)
+       lib.Logger.Println("t.current collected", len(t.current), "row(s) from SELECT")
+
+       if len(t.initial) == 0 && len(t.current) > 0 {
+               lib.Logger.Println("t.initial: copying from t.current (initial setup)")
+               t.initial = make(events_waits_summary_global_by_event_name_rows, len(t.current))
+               copy(t.initial, t.current)
+       }
+
+       // check for reload initial characteristics
+       if t.initial.needs_refresh(t.current) {
+               lib.Logger.Println("t.initial: copying from t.current (data needs refreshing)")
+               t.initial = make(events_waits_summary_global_by_event_name_rows, len(t.current))
+               copy(t.initial, t.current)
+       }
+
+       t.make_results()
+
+       // lib.Logger.Println( "t.initial:", t.initial )
+       // lib.Logger.Println( "t.current:", t.current )
+       lib.Logger.Println("t.initial.totals():", t.initial.totals())
+       lib.Logger.Println("t.current.totals():", t.current.totals())
+       // lib.Logger.Println("t.results:", t.results)
+       // lib.Logger.Println("t.totals:", t.totals)
+       lib.Logger.Println("Table_events_waits_summary_global_by_event_name.Collect() END, took:", time.Duration(time.Since(start)).String())
+}
+
+func (t *Table_events_waits_summary_global_by_event_name) make_results() {
+       // lib.Logger.Println( "- t.results set from t.current" )
+       t.results = make(events_waits_summary_global_by_event_name_rows, len(t.current))
+       copy(t.results, t.current)
+       if t.WantRelativeStats() {
+               // lib.Logger.Println( "- subtracting t.initial from t.results as WantRelativeStats()" )
+               t.results.subtract(t.initial)
+       }
+
+       // lib.Logger.Println( "- sorting t.results" )
+       t.results.Sort()
+       // lib.Logger.Println( "- collecting t.totals from t.results" )
+       t.totals = t.results.totals()
+}
+
+// reset the statistics to current values
+func (t *Table_events_waits_summary_global_by_event_name) SyncReferenceValues() {
+       // lib.Logger.Println( "Table_events_waits_summary_global_by_event_name.SyncReferenceValues() BEGIN" )
+
+       t.SetNow()
+       t.initial = make(events_waits_summary_global_by_event_name_rows, len(t.current))
+       copy(t.initial, t.current)
+
+       t.make_results()
+
+       // lib.Logger.Println( "Table_events_waits_summary_global_by_event_name.SyncReferenceValues() END" )
+}
+
+func (t Table_events_waits_summary_global_by_event_name) EmptyRowContent() string {
+       return t.emptyRowContent()
+}
+
+func (t *Table_events_waits_summary_global_by_event_name) Headings() string {
+       var r events_waits_summary_global_by_event_name_row
+
+       return r.headings()
+}
+
+func (t Table_events_waits_summary_global_by_event_name) RowContent(max_rows int) []string {
+       rows := make([]string, 0, max_rows)
+
+       for i := range t.results {
+               if i < max_rows {
+                       rows = append(rows, t.results[i].row_content(t.totals))
+               }
+       }
+
+       return rows
+}
+
+func (t Table_events_waits_summary_global_by_event_name) emptyRowContent() string {
+       var r events_waits_summary_global_by_event_name_row
+
+       return r.row_content(r)
+}
+
+func (t Table_events_waits_summary_global_by_event_name) TotalRowContent() string {
+       return t.totals.row_content(t.totals)
+}
+
+func (t Table_events_waits_summary_global_by_event_name) Description() string {
+       count := t.count_rows()
+       return fmt.Sprintf("Mutex Latency (events_waits_summary_global_by_event_name) %d rows", count)
+}
+
+func (t Table_events_waits_summary_global_by_event_name) count_rows() int {
+       var count int
+       for row := range t.results {
+               if t.results[row].SUM_TIMER_WAIT > 0 {
+                       count++
+               }
+       }
+       return count
+}
diff --git a/p_s/events_waits_summary_global_by_event_name/events_waits_summary_global_by_event_name_row.go b/p_s/events_waits_summary_global_by_event_name/events_waits_summary_global_by_event_name_row.go
new file mode 100644 (file)
index 0000000..86b6bb6
--- /dev/null
@@ -0,0 +1,178 @@
+// This file contains the library routines for managing the
+// table_io_waits_by_table table.
+package events_waits_summary_global_by_event_name
+
+import (
+       "database/sql"
+       "fmt"
+       "log"
+       "sort"
+       "strings"
+
+       "github.com/sjmudd/pstop/lib"
+)
+
+// a row from performance_schema.events_waits_summary_global_by_event_name
+// Note: upper case names to match the performance_schema column names.
+// This type is _not_ meant to be exported.
+type events_waits_summary_global_by_event_name_row struct {
+       EVENT_NAME     string
+       SUM_TIMER_WAIT uint64
+       COUNT_STAR     uint64
+}
+type events_waits_summary_global_by_event_name_rows []events_waits_summary_global_by_event_name_row
+
+// trim off the leading 'wait/synch/mutex/innodb/'
+func (r *events_waits_summary_global_by_event_name_row) name() string {
+       var n string
+       if r.EVENT_NAME == "Totals" {
+               n += r.EVENT_NAME
+       } else if len(r.EVENT_NAME) >= 24 {
+               n += r.EVENT_NAME[24:]
+       }
+       return n
+}
+
+func (r *events_waits_summary_global_by_event_name_row) pretty_name() string {
+       s := r.name()
+       if len(s) > 30 {
+               s = s[:29]
+       }
+       return s
+}
+
+func (r *events_waits_summary_global_by_event_name_row) headings() string {
+       return fmt.Sprintf("%-30s %10s %6s", "Mutex Name", "Latency", "%")
+}
+
+// generate a printable result
+func (r *events_waits_summary_global_by_event_name_row) row_content(totals events_waits_summary_global_by_event_name_row) string {
+       name := r.pretty_name()
+       if r.COUNT_STAR == 0 && name != "Totals" {
+               name = ""
+       }
+
+       return fmt.Sprintf("%-30s %10s %6s",
+               name,
+               lib.FormatTime(r.SUM_TIMER_WAIT),
+               lib.FormatPct(lib.MyDivide(r.SUM_TIMER_WAIT, totals.SUM_TIMER_WAIT)))
+}
+
+func (this *events_waits_summary_global_by_event_name_row) add(other events_waits_summary_global_by_event_name_row) {
+       this.SUM_TIMER_WAIT += other.SUM_TIMER_WAIT
+       this.COUNT_STAR += other.COUNT_STAR
+}
+
+// subtract the countable values in one row from another
+func (this *events_waits_summary_global_by_event_name_row) subtract(other events_waits_summary_global_by_event_name_row) {
+       // check for issues here (we have a bug) and log it
+       // - this situation should not happen so there's a logic bug somewhere else
+       if this.SUM_TIMER_WAIT >= other.SUM_TIMER_WAIT {
+               this.SUM_TIMER_WAIT -= other.SUM_TIMER_WAIT
+               this.COUNT_STAR -= other.COUNT_STAR
+       } else {
+               lib.Logger.Println("WARNING: events_waits_summary_global_by_event_name_row.subtract() - subtraction problem! (not subtracting)")
+               lib.Logger.Println("this=", this)
+               lib.Logger.Println("other=", other)
+       }
+}
+
+func (t events_waits_summary_global_by_event_name_rows) totals() events_waits_summary_global_by_event_name_row {
+       var totals events_waits_summary_global_by_event_name_row
+       totals.EVENT_NAME = "Totals"
+
+       for i := range t {
+               totals.add(t[i])
+       }
+
+       return totals
+}
+
+func select_tiwsbt_rows(dbh *sql.DB) events_waits_summary_global_by_event_name_rows {
+       var t events_waits_summary_global_by_event_name_rows
+
+       // we collect all information even if it's mainly empty as we may reference it later
+       sql := "SELECT EVENT_NAME, SUM_TIMER_WAIT, COUNT_STAR FROM events_waits_summary_global_by_event_name WHERE SUM_TIMER_WAIT > 0 AND EVENT_NAME LIKE 'wait/synch/mutex/innodb/%'"
+
+       rows, err := dbh.Query(sql)
+       if err != nil {
+               log.Fatal(err)
+       }
+       defer rows.Close()
+
+       for rows.Next() {
+               var r events_waits_summary_global_by_event_name_row
+               if err := rows.Scan(
+                       &r.EVENT_NAME,
+                       &r.SUM_TIMER_WAIT,
+                       &r.COUNT_STAR); err != nil {
+                       log.Fatal(err)
+               }
+               // we collect all information even if it's mainly empty as we may reference it later
+               t = append(t, r)
+       }
+       if err := rows.Err(); err != nil {
+               log.Fatal(err)
+       }
+
+       return t
+}
+
+func (t events_waits_summary_global_by_event_name_rows) Len() int      { return len(t) }
+func (t events_waits_summary_global_by_event_name_rows) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
+
+// sort by value (descending) but also by "name" (ascending) if the values are the same
+func (t events_waits_summary_global_by_event_name_rows) Less(i, j int) bool {
+       return t[i].SUM_TIMER_WAIT > t[j].SUM_TIMER_WAIT
+}
+
+func (t events_waits_summary_global_by_event_name_rows) Sort() {
+       sort.Sort(t)
+}
+
+// remove the initial values from those rows where there's a match
+// - if we find a row we can't match ignore it
+func (this *events_waits_summary_global_by_event_name_rows) subtract(initial events_waits_summary_global_by_event_name_rows) {
+       initial_by_name := make(map[string]int)
+
+       // iterate over rows by name
+       for i := range initial {
+               initial_by_name[initial[i].name()] = i
+       }
+
+       for i := range *this {
+               this_name := (*this)[i].name()
+               if _, ok := initial_by_name[this_name]; ok {
+                       initial_index := initial_by_name[this_name]
+                       (*this)[i].subtract(initial[initial_index])
+               }
+       }
+}
+
+// if the data in t2 is "newer", "has more values" than t then it needs refreshing.
+// check this by comparing totals.
+func (t events_waits_summary_global_by_event_name_rows) needs_refresh(t2 events_waits_summary_global_by_event_name_rows) bool {
+       my_totals := t.totals()
+       t2_totals := t2.totals()
+
+       return my_totals.SUM_TIMER_WAIT > t2_totals.SUM_TIMER_WAIT
+}
+
+// describe a whole row
+func (r events_waits_summary_global_by_event_name_row) String() string {
+       return fmt.Sprintf("%-30s|%10s %10s %10s %10s %10s|%10s %10s|%10s %10s %10s %10s %10s|%10s %10s",
+               r.pretty_name(),
+               lib.FormatTime(r.SUM_TIMER_WAIT),
+               lib.FormatAmount(r.COUNT_STAR))
+}
+
+// describe a whole table
+func (t events_waits_summary_global_by_event_name_rows) String() string {
+       s := make([]string, len(t))
+
+       for i := range t {
+               s = append(s, t[i].String())
+       }
+
+       return strings.Join(s, "\n")
+}
diff --git a/p_s/events_waits_summary_global_by_event_name/readme.txt b/p_s/events_waits_summary_global_by_event_name/readme.txt
new file mode 100644 (file)
index 0000000..d702655
--- /dev/null
@@ -0,0 +1,25 @@
+This screen is inspired by this post:
+
+http://www.percona.com/blog/2015/01/06/getting-mutex-information-from-mysqls-performance_schema/i
+
+root@localhost [performance_schema]> select * from setup_instruments limit 10;
++---------------------------------------------------------+---------+-------+
+| NAME                                                    | ENABLED | TIMED |
++---------------------------------------------------------+---------+-------+
+| wait/synch/mutex/sql/TC_LOG_MMAP::LOCK_tc               | NO      | NO    |
+| wait/synch/mutex/sql/LOCK_des_key_file                  | NO      | NO    |
+| wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_commit         | NO      | NO    |
+| wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_commit_queue   | NO      | NO    |
+| wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_done           | NO      | NO    |
+| wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_flush_queue    | NO      | NO    |
+| wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_index          | NO      | NO    |
+| wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_log            | NO      | NO    |
+| wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_binlog_end_pos | NO      | NO    |
+| wait/synch/mutex/sql/MYSQL_BIN_LOG::LOCK_sync           | NO      | NO    |
++---------------------------------------------------------+---------+-------+
+10 rows in set (0.00 sec)
+
+root@localhost [performance_schema]> update setup_instruments set enabled = 'YES', timed = 'YES' where NAME like 'wait/synch/mutex/innodb/%';
+Query OK, 49 rows affected (0.02 sec)
+Rows matched: 49  Changed: 49  Warnings: 0
+
index c799ccd..52fb681 100644 (file)
@@ -14,6 +14,7 @@ import (
        fsbi "github.com/sjmudd/pstop/p_s/file_summary_by_instance"
        "github.com/sjmudd/pstop/p_s/ps_table"
        "github.com/sjmudd/pstop/wait_info"
+       ewsgben "github.com/sjmudd/pstop/p_s/events_waits_summary_global_by_event_name"
        tiwsbt "github.com/sjmudd/pstop/p_s/table_io_waits_summary_by_table"
        tlwsbt "github.com/sjmudd/pstop/p_s/table_lock_waits_summary_by_table"
        "github.com/sjmudd/pstop/screen"
@@ -29,6 +30,7 @@ const (
        showIO      = iota
        showLocks   = iota
        showUsers   = iota
+       showMutex   = iota
 )
 
 type State struct {
@@ -40,6 +42,7 @@ type State struct {
        fsbi                ps_table.Tabler // ufsbi.File_summary_by_instance
        tiwsbt              tiwsbt.Table_io_waits_summary_by_table
        tlwsbt              ps_table.Tabler // tlwsbt.Table_lock_waits_summary_by_table
+       ewsgben             ps_table.Tabler // ewsgben.Events_waits_summary_global_by_event_name
        users               i_s.Processlist
        screen              screen.TermboxScreen
        show                Show
@@ -58,6 +61,7 @@ func (state *State) Setup(dbh *sql.DB) {
        // setup to their initial types/values
        state.fsbi = fsbi.NewFileSummaryByInstance(variables)
        state.tlwsbt = new(tlwsbt.Table_lock_waits_summary_by_table)
+       state.ewsgben = new(ewsgben.Table_events_waits_summary_global_by_event_name)
 
        state.want_relative_stats = true // we show info from the point we start collecting data
        state.fsbi.SetWantRelativeStats(state.want_relative_stats)
@@ -68,6 +72,8 @@ func (state *State) Setup(dbh *sql.DB) {
        state.tiwsbt.SetNow()
        state.users.SetWantRelativeStats(state.want_relative_stats) // ignored
        state.users.SetNow()                                        // ignored
+       state.ewsgben.SetWantRelativeStats(state.want_relative_stats) // ignored
+       state.ewsgben.SetNow()                                        // ignored
 
        state.ResetDBStatistics()
 
@@ -131,6 +137,8 @@ func (state *State) Collect() {
                state.tlwsbt.Collect(state.dbh)
        case showUsers:
                state.users.Collect(state.dbh)
+       case showMutex:
+               state.ewsgben.Collect(state.dbh)
        }
        lib.Logger.Println("state.Collect() took", time.Duration(time.Since(start)).String())
 }
@@ -166,7 +174,7 @@ func (state State) Help() bool {
        return state.help
 }
 
-// states go: showLatency -> showOps -> showIO -> showLocks -> showUsers
+// states go: showLatency -> showOps -> showIO -> showLocks -> showUsers -> showMutex
 
 // display the output according to the mode we are in
 func (state *State) Display() {
@@ -183,6 +191,8 @@ func (state *State) Display() {
                        state.displayLocks()
                case showUsers:
                        state.displayUsers()
+               case showMutex:
+                       state.displayMutex()
                }
        }
 }
@@ -201,7 +211,7 @@ func (state *State) fix_latency_setting() {
 // change to the previous display mode
 func (state *State) DisplayPrevious() {
        if state.show == showLatency {
-               state.show = showUsers
+               state.show = showMutex
        } else {
                state.show--
        }
@@ -212,7 +222,7 @@ func (state *State) DisplayPrevious() {
 
 // change to the next display mode
 func (state *State) DisplayNext() {
-       if state.show == showUsers {
+       if state.show == showMutex {
                state.show = showLatency
        } else {
                state.show++
@@ -244,6 +254,8 @@ func (state State) displayLine0() {
                        initial = state.tlwsbt.Last()
                case showUsers:
                        initial = state.users.Last()
+               case showMutex:
+                       initial = state.ewsgben.Last()
                default:
                        // should not get here !
                }
@@ -269,6 +281,8 @@ func (state State) displayDescription() {
                description = state.tlwsbt.Description()
        case showUsers:
                description = state.users.Description()
+       case showMutex:
+               description = state.ewsgben.Description()
        }
 
        state.screen.PrintAt(0, 1, description)
@@ -370,6 +384,30 @@ func (state *State) displayUsers() {
        state.screen.BoldPrintAt(0, state.screen.Height()-1, state.users.TotalRowContent())
 }
 
+func (state *State) displayMutex() {
+       state.screen.BoldPrintAt(0, 2, state.ewsgben.Headings())
+
+       // print out the data
+       max_rows := state.screen.Height() - 3
+       row_content := state.ewsgben.RowContent(max_rows)
+
+       // print out rows
+       for k := range row_content {
+               y := 3 + k
+               state.screen.PrintAt(0, y, row_content[k])
+       }
+       // print out empty rows
+       for k := len(row_content); k < (state.screen.Height() - 3); k++ {
+               y := 3 + k
+               if y < state.screen.Height()-1 {
+                       state.screen.PrintAt(0, y, state.ewsgben.EmptyRowContent())
+               }
+       }
+
+       // print out the totals at the bottom
+       state.screen.BoldPrintAt(0, state.screen.Height()-1, state.ewsgben.TotalRowContent())
+}
+
 // do we want to show all p_s data?
 func (state State) WantRelativeStats() bool {
        return state.want_relative_stats
@@ -382,6 +420,7 @@ func (state *State) SetWantRelativeStats(want_relative_stats bool) {
        state.fsbi.SetWantRelativeStats(want_relative_stats)
        state.tlwsbt.SetWantRelativeStats(state.want_relative_stats)
        state.tiwsbt.SetWantRelativeStats(state.want_relative_stats)
+       state.ewsgben.SetWantRelativeStats(state.want_relative_stats)
 }
 
 // if there's a better way of doing this do it better ...
index 4e282cb..f3f0e95 100644 (file)
@@ -2,7 +2,7 @@
 package version
 
 const (
-       version = "0.1.6"
+       version = "0.2.0"
 )
 
 // return the current application version