Various adjustments
[pstop.git] / performance_schema / file_summary_by_instance / file_summary_by_instance_row.go
1 // This file contains the library routines for managing the
2 // file_summary_by_instance table.
3 package file_summary_by_instance
4
5 import (
6         "database/sql"
7         "fmt"
8         "log"
9         "regexp"
10         "sort"
11         //      "strconv"
12         "time"
13
14         "github.com/sjmudd/pstop/lib"
15 )
16
17 /*
18 CREATE TABLE `file_summary_by_instance` (
19   `FILE_NAME` varchar(512) NOT NULL,
20   `EVENT_NAME` varchar(128) NOT NULL,                           // not collected
21   `OBJECT_INSTANCE_BEGIN` bigint(20) unsigned NOT NULL,         // not collected
22   `COUNT_STAR` bigint(20) unsigned NOT NULL,
23   `SUM_TIMER_WAIT` bigint(20) unsigned NOT NULL,
24   `MIN_TIMER_WAIT` bigint(20) unsigned NOT NULL,
25   `AVG_TIMER_WAIT` bigint(20) unsigned NOT NULL,
26   `MAX_TIMER_WAIT` bigint(20) unsigned NOT NULL,
27   `COUNT_READ` bigint(20) unsigned NOT NULL,
28   `SUM_TIMER_READ` bigint(20) unsigned NOT NULL,
29   `MIN_TIMER_READ` bigint(20) unsigned NOT NULL,
30   `AVG_TIMER_READ` bigint(20) unsigned NOT NULL,
31   `MAX_TIMER_READ` bigint(20) unsigned NOT NULL,
32   `SUM_NUMBER_OF_BYTES_READ` bigint(20) NOT NULL,
33   `COUNT_WRITE` bigint(20) unsigned NOT NULL,
34   `SUM_TIMER_WRITE` bigint(20) unsigned NOT NULL,
35   `MIN_TIMER_WRITE` bigint(20) unsigned NOT NULL,
36   `AVG_TIMER_WRITE` bigint(20) unsigned NOT NULL,
37   `MAX_TIMER_WRITE` bigint(20) unsigned NOT NULL,
38   `SUM_NUMBER_OF_BYTES_WRITE` bigint(20) NOT NULL,
39   `COUNT_MISC` bigint(20) unsigned NOT NULL,
40   `SUM_TIMER_MISC` bigint(20) unsigned NOT NULL,
41   `MIN_TIMER_MISC` bigint(20) unsigned NOT NULL,
42   `AVG_TIMER_MISC` bigint(20) unsigned NOT NULL,
43   `MAX_TIMER_MISC` bigint(20) unsigned NOT NULL
44 ) ENGINE=PERFORMANCE_SCHEMA DEFAULT CHARSET=utf8
45 1 row in set (0.00 sec)
46 */
47
48 type file_summary_by_instance_row struct {
49         FILE_NAME string
50
51         COUNT_STAR  uint64
52         COUNT_READ  uint64
53         COUNT_WRITE uint64
54         COUNT_MISC  uint64
55
56         SUM_TIMER_WAIT  uint64
57         SUM_TIMER_READ  uint64
58         SUM_TIMER_WRITE uint64
59         SUM_TIMER_MISC  uint64
60
61         SUM_NUMBER_OF_BYTES_READ  uint64
62         SUM_NUMBER_OF_BYTES_WRITE uint64
63 }
64
65 // represents a table or set of rows
66 type file_summary_by_instance_rows []file_summary_by_instance_row
67
68 // Return the name using the FILE_NAME attribute.
69 func (r *file_summary_by_instance_row) name() string {
70         return r.FILE_NAME
71 }
72
73 // Return a formatted pretty name for the row.
74 func (r *file_summary_by_instance_row) pretty_name() string {
75         s := r.name()
76         if len(s) > 30 {
77                 s = s[:29]
78         }
79         return fmt.Sprintf("%-30s", s)
80 }
81
82 func (r *file_summary_by_instance_row) headings() string {
83         return fmt.Sprintf("%-30s %10s %6s|%6s %6s %6s|%8s %8s|%8s %6s %6s %6s",
84                 "Table Name",
85                 "Latency",
86                 "%",
87                 "Read",
88                 "Write",
89                 "Misc",
90                 "Rd bytes",
91                 "Wr bytes",
92                 "Ops",
93                 "R Ops",
94                 "W Ops",
95                 "M Ops")
96 }
97
98 // generate a printable result
99 func (row *file_summary_by_instance_row) row_content(totals file_summary_by_instance_row) string {
100         var name string
101
102         // We assume that if COUNT_STAR = 0 then there's no data at all...
103         // when we have no data we really don't want to show the name either.
104         if row.COUNT_STAR == 0 {
105                 name = ""
106         } else {
107                 name = row.pretty_name()
108         }
109
110         return fmt.Sprintf("%-30s %10s %6s|%6s %6s %6s|%8s %8s|%8s %6s %6s %6s",
111                 name,
112                 lib.FormatTime(row.SUM_TIMER_WAIT),
113                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_WAIT, totals.SUM_TIMER_WAIT)),
114                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_READ, row.SUM_TIMER_WAIT)),
115                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_WRITE, row.SUM_TIMER_WAIT)),
116                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_MISC, row.SUM_TIMER_WAIT)),
117                 lib.FormatAmount(row.SUM_NUMBER_OF_BYTES_READ),
118                 lib.FormatAmount(row.SUM_NUMBER_OF_BYTES_WRITE),
119                 lib.FormatAmount(row.COUNT_STAR),
120                 lib.FormatPct(lib.MyDivide(row.COUNT_READ, row.COUNT_STAR)),
121                 lib.FormatPct(lib.MyDivide(row.COUNT_WRITE, row.COUNT_STAR)),
122                 lib.FormatPct(lib.MyDivide(row.COUNT_MISC, row.COUNT_STAR)))
123 }
124
125 func (this *file_summary_by_instance_row) add(other file_summary_by_instance_row) {
126         this.COUNT_STAR += other.COUNT_STAR
127         this.COUNT_READ += other.COUNT_READ
128         this.COUNT_WRITE += other.COUNT_WRITE
129         this.COUNT_MISC += other.COUNT_MISC
130
131         this.SUM_TIMER_WAIT += other.SUM_TIMER_WAIT
132         this.SUM_TIMER_READ += other.SUM_TIMER_READ
133         this.SUM_TIMER_WRITE += other.SUM_TIMER_WRITE
134         this.SUM_TIMER_MISC += other.SUM_TIMER_MISC
135
136         this.SUM_NUMBER_OF_BYTES_READ += other.SUM_NUMBER_OF_BYTES_READ
137         this.SUM_NUMBER_OF_BYTES_WRITE += other.SUM_NUMBER_OF_BYTES_WRITE
138 }
139
140 func (this *file_summary_by_instance_row) subtract(other file_summary_by_instance_row) {
141         this.COUNT_STAR -= other.COUNT_STAR
142         this.COUNT_READ -= other.COUNT_READ
143         this.COUNT_WRITE -= other.COUNT_WRITE
144         this.COUNT_MISC -= other.COUNT_MISC
145
146         this.SUM_TIMER_WAIT -= other.SUM_TIMER_WAIT
147         this.SUM_TIMER_READ -= other.SUM_TIMER_READ
148         this.SUM_TIMER_WRITE -= other.SUM_TIMER_WRITE
149         this.SUM_TIMER_MISC -= other.SUM_TIMER_MISC
150
151         this.SUM_NUMBER_OF_BYTES_READ -= other.SUM_NUMBER_OF_BYTES_READ
152         this.SUM_NUMBER_OF_BYTES_WRITE -= other.SUM_NUMBER_OF_BYTES_WRITE
153 }
154
155 // return the totals of a slice of rows
156 func (t file_summary_by_instance_rows) totals() file_summary_by_instance_row {
157         var totals file_summary_by_instance_row
158         totals.FILE_NAME = "TOTALS"
159
160         for i := range t {
161                 totals.add(t[i])
162         }
163
164         return totals
165 }
166
167 // clean up the given path reducing redundant stuff and return the clean path
168 func cleanup_path(path string) string {
169         //     foo/../bar --> foo/bar   perl: $new =~ s{[^/]+/\.\./}{/};
170         //     /./        --> /         perl: $new =~ s{/\./}{};
171         //     //         --> /         perl: $new =~ s{//}{/};
172         const (
173                 one_or_the_other_re    = `/(\.)?/`
174                 slash_dot_dot_slash_re = `[^/]+/\.\./`
175         )
176         r1 := regexp.MustCompile(one_or_the_other_re)
177         r2 := regexp.MustCompile(slash_dot_dot_slash_re)
178
179         for {
180                 orig_path := path
181                 path = r1.ReplaceAllString(path, "/")
182                 path = r2.ReplaceAllString(path, "/")
183                 if orig_path == path { // no change so give up
184                         break
185                 }
186         }
187
188         return path
189 }
190
191 // From the original FILE_NAME we want to generate a simpler name to use.
192 // This simpler name may also merge several different filenames into one.
193 func (t file_summary_by_instance_row) simple_name(global_variables map[string]string) string {
194         const (
195                 auto_cnf_re      = `/auto\.cnf$`
196                 binlog_re        = `/binlog\.(\d{6}|index)$`
197                 charset_re       = `/share/charsets/Index\.xml$`
198                 current_dir_re   = `^\./`
199                 db_opt_re        = `/db\.opt$`
200                 encoded_re       = `@(\d{4})` // FIXME - add me to catch @0024 --> $ for example
201                 error_msg_re     = `/share/[^/]+/errmsg\.sys$`
202                 ibdata_re        = `/ibdata\d+$`
203                 part_table_re    = `(.+)#P#p\d+`
204                 pid_file_re      = `/[^/]+\.pid$`
205                 redo_log_re      = `/ib_logfile\d+$`
206                 relative_path_re = `^\.\./`
207                 slowlog_re       = `/slowlog$`
208                 table_file_re    = `/([^/]+)/([^/]+)\.(frm|ibd|MYD|MYI|CSM|CSV|par)$`
209                 temp_table_re    = `#sql-[0-9_]+`
210         )
211
212         path := t.FILE_NAME
213
214         // FIXME and make this work.
215         //      re4 := regexp.MustCompile(encoded_re)
216         //      if m4 := re4.FindStringSubmatch(path); m4 != nil {
217         //              if value, err := strconv.ParseInt(m4[1], 16, 16); err != nil {
218         //                      // missing replace @.... with char(value) in path
219         //
220         //              }
221         //      }
222
223         // this should probably be ordered from most expected regexp to least
224         re := regexp.MustCompile(table_file_re)
225         if m1 := re.FindStringSubmatch(path); m1 != nil {
226                 // we may match temporary tables so check for them
227                 re2 := regexp.MustCompile(temp_table_re)
228                 if m2 := re2.FindStringSubmatch(m1[2]); m2 != nil {
229                         return "<temp_table>"
230                 }
231
232                 // we may match partitioned tables so check for them
233                 re3 := regexp.MustCompile(part_table_re)
234                 if m3 := re3.FindStringSubmatch(m1[2]); m3 != nil {
235                         return m1[1] + "." + m3[1] // <schema>.<table> (less partition info)
236                 }
237
238                 return m1[1] + "." + m1[2] // <schema>.<table>
239         }
240         if regexp.MustCompile(ibdata_re).MatchString(path) == true {
241                 return "<ibdata>"
242         }
243         if regexp.MustCompile(redo_log_re).MatchString(path) == true {
244                 return "<redo_log>"
245         }
246         if regexp.MustCompile(binlog_re).MatchString(path) == true {
247                 return "<binlog>"
248         }
249         if regexp.MustCompile(db_opt_re).MatchString(path) == true {
250                 return "<db_opt>"
251         }
252         if regexp.MustCompile(slowlog_re).MatchString(path) == true {
253                 return "<slow_log>"
254         }
255         if regexp.MustCompile(auto_cnf_re).MatchString(path) == true {
256                 return "<auto_cnf>"
257         }
258         // relay logs are a bit complicated. If a full path then easy to
259         // identify,but if a relative path we may need to add $datadir,
260         // but also if as I do we have a ../blah/somewhere/path then we
261         // need to make it match too.
262         if len(global_variables["relay_log"]) > 0 {
263                 relay_log := global_variables["relay_log"]
264                 if relay_log[0] != '/' { // relative path
265                         relay_log = cleanup_path(global_variables["datadir"] + relay_log) // datadir always ends in /
266                 }
267                 relay_log_re := relay_log + `\.(\d{6}|index)$`
268                 if regexp.MustCompile(relay_log_re).MatchString(path) == true {
269                         return "<relay_log>"
270                 }
271         }
272         if regexp.MustCompile(pid_file_re).MatchString(path) == true {
273                 return "<pid_file>"
274         }
275         if regexp.MustCompile(error_msg_re).MatchString(path) == true {
276                 return "<errmsg>"
277         }
278         if regexp.MustCompile(charset_re).MatchString(path) == true {
279                 return "<charset>"
280         }
281         return path
282 }
283
284 // Convert the imported "table" to a merged one with merged data.
285 // Combine all entries with the same "FILE_NAME" by adding their values.
286 func merge_by_table_name(orig file_summary_by_instance_rows, global_variables map[string]string) file_summary_by_instance_rows {
287         start := time.Now()
288         t := make(file_summary_by_instance_rows, 0, len(orig))
289
290         m := make(map[string]file_summary_by_instance_row)
291
292         // iterate over source table
293         for i := range orig {
294                 var file_name string
295                 var new_row file_summary_by_instance_row
296                 orig_row := orig[i]
297
298                 if orig_row.COUNT_STAR > 0 {
299                         file_name = orig_row.simple_name(global_variables)
300
301                         // check if we have an entry in the map
302                         if _, found := m[file_name]; found {
303                                 new_row = m[file_name]
304                         } else {
305                                 new_row.FILE_NAME = file_name
306                         }
307                         new_row.add(orig_row)
308                         m[file_name] = new_row // update the map with the new value
309                 }
310         }
311
312         // add the map contents back into the table
313         for _, row := range m {
314                 t = append(t, row)
315         }
316
317         lib.Logger.Println("merge_by_table_name() took:", time.Duration(time.Since(start)).String())
318         return t
319 }
320
321 // Select the raw data from the database into file_summary_by_instance_rows
322 // - filter out empty values
323 // - merge rows with the same name into a single row
324 // - change FILE_NAME into a more descriptive value.
325 func select_fsbi_rows(dbh *sql.DB) file_summary_by_instance_rows {
326         var t file_summary_by_instance_rows
327         start := time.Now()
328
329         sql := "SELECT FILE_NAME, COUNT_STAR, SUM_TIMER_WAIT, COUNT_READ, SUM_TIMER_READ, SUM_NUMBER_OF_BYTES_READ, COUNT_WRITE, SUM_TIMER_WRITE, SUM_NUMBER_OF_BYTES_WRITE, COUNT_MISC, SUM_TIMER_MISC FROM file_summary_by_instance"
330
331         rows, err := dbh.Query(sql)
332         if err != nil {
333                 log.Fatal(err)
334         }
335         defer rows.Close()
336
337         for rows.Next() {
338                 var r file_summary_by_instance_row
339
340                 if err := rows.Scan(&r.FILE_NAME, &r.COUNT_STAR, &r.SUM_TIMER_WAIT, &r.COUNT_READ, &r.SUM_TIMER_READ, &r.SUM_NUMBER_OF_BYTES_READ, &r.COUNT_WRITE, &r.SUM_TIMER_WRITE, &r.SUM_NUMBER_OF_BYTES_WRITE, &r.COUNT_MISC, &r.SUM_TIMER_MISC); err != nil {
341                         log.Fatal(err)
342                 }
343                 t = append(t, r)
344         }
345         if err := rows.Err(); err != nil {
346                 log.Fatal(err)
347         }
348         lib.Logger.Println("select_fsbi_rows() took:", time.Duration(time.Since(start)).String())
349
350         return t
351 }
352
353 // remove the initial values from those rows where there's a match
354 // - if we find a row we can't match ignore it
355 func (this *file_summary_by_instance_rows) subtract(initial file_summary_by_instance_rows) {
356         i_by_name := make(map[string]int)
357
358         // iterate over rows by name
359         for i := range initial {
360                 i_by_name[initial[i].name()] = i
361         }
362
363         for i := range *this {
364                 if _, ok := i_by_name[(*this)[i].name()]; ok {
365                         initial_i := i_by_name[(*this)[i].name()]
366                         (*this)[i].subtract(initial[initial_i])
367                 }
368         }
369 }
370
371 func (t file_summary_by_instance_rows) Len() int      { return len(t) }
372 func (t file_summary_by_instance_rows) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
373 func (t file_summary_by_instance_rows) Less(i, j int) bool {
374         return (t[i].SUM_TIMER_WAIT > t[j].SUM_TIMER_WAIT) ||
375                 ((t[i].SUM_TIMER_WAIT == t[j].SUM_TIMER_WAIT) && (t[i].FILE_NAME < t[j].FILE_NAME))
376 }
377
378 func (t *file_summary_by_instance_rows) sort() {
379         sort.Sort(t)
380 }
381
382 // if the data in t2 is "newer", "has more values" than t then it needs refreshing.
383 // check this by comparing totals.
384 func (t file_summary_by_instance_rows) needs_refresh(t2 file_summary_by_instance_rows) bool {
385         my_totals := t.totals()
386         t2_totals := t2.totals()
387
388         return my_totals.SUM_TIMER_WAIT > t2_totals.SUM_TIMER_WAIT
389 }