saving a bit
[pstop.git] / performance_schema / file_summary_by_instance / file_summary_by_instance_row.go
1 // This file contains the library routines for managing the
2 // file_summary_by_instance table.
3 package file_summary_by_instance
4
5 import (
6         "database/sql"
7         "fmt"
8         "log"
9         "regexp"
10         "sort"
11         //      "strconv"
12         "time"
13
14         "github.com/sjmudd/pstop/lib"
15 )
16
17 /*
18 CREATE TABLE `file_summary_by_instance` (
19   `FILE_NAME` varchar(512) NOT NULL,
20   `EVENT_NAME` varchar(128) NOT NULL,                           // not collected
21   `OBJECT_INSTANCE_BEGIN` bigint(20) unsigned NOT NULL,         // not collected
22   `COUNT_STAR` bigint(20) unsigned NOT NULL,
23   `SUM_TIMER_WAIT` bigint(20) unsigned NOT NULL,
24   `MIN_TIMER_WAIT` bigint(20) unsigned NOT NULL,
25   `AVG_TIMER_WAIT` bigint(20) unsigned NOT NULL,
26   `MAX_TIMER_WAIT` bigint(20) unsigned NOT NULL,
27   `COUNT_READ` bigint(20) unsigned NOT NULL,
28   `SUM_TIMER_READ` bigint(20) unsigned NOT NULL,
29   `MIN_TIMER_READ` bigint(20) unsigned NOT NULL,
30   `AVG_TIMER_READ` bigint(20) unsigned NOT NULL,
31   `MAX_TIMER_READ` bigint(20) unsigned NOT NULL,
32   `SUM_NUMBER_OF_BYTES_READ` bigint(20) NOT NULL,
33   `COUNT_WRITE` bigint(20) unsigned NOT NULL,
34   `SUM_TIMER_WRITE` bigint(20) unsigned NOT NULL,
35   `MIN_TIMER_WRITE` bigint(20) unsigned NOT NULL,
36   `AVG_TIMER_WRITE` bigint(20) unsigned NOT NULL,
37   `MAX_TIMER_WRITE` bigint(20) unsigned NOT NULL,
38   `SUM_NUMBER_OF_BYTES_WRITE` bigint(20) NOT NULL,
39   `COUNT_MISC` bigint(20) unsigned NOT NULL,
40   `SUM_TIMER_MISC` bigint(20) unsigned NOT NULL,
41   `MIN_TIMER_MISC` bigint(20) unsigned NOT NULL,
42   `AVG_TIMER_MISC` bigint(20) unsigned NOT NULL,
43   `MAX_TIMER_MISC` bigint(20) unsigned NOT NULL
44 ) ENGINE=PERFORMANCE_SCHEMA DEFAULT CHARSET=utf8
45 1 row in set (0.00 sec)
46 */
47
48 //     foo/../bar --> foo/bar   perl: $new =~ s{[^/]+/\.\./}{/};
49 //     /./        --> /         perl: $new =~ s{/\./}{};
50 //     //         --> /         perl: $new =~ s{//}{/};
51 const (
52         re_one_or_the_other    = `/(\.)?/`
53         re_slash_dot_dot_slash = `[^/]+/\.\./`
54         re_auto_cnf            = `/auto\.cnf$`
55         re_binlog              = `/binlog\.(\d{6}|index)$`
56         re_charset             = `/share/charsets/Index\.xml$`
57         re_db_opt              = `/db\.opt$`
58         re_encoded             = `@(\d{4})` // FIXME - add me to catch @0024 --> $ for example
59         re_error_msg           = `/share/[^/]+/errmsg\.sys$`
60         re_ibdata              = `/ibdata\d+$`
61         re_part_table          = `(.+)#P#p(\d+|MAX)`
62         re_pid_file            = `/[^/]+\.pid$`
63         re_redo_log            = `/ib_logfile\d+$`
64         re_relative_path       = `^\.\./`
65         re_slowlog             = `/slowlog$`
66         re_table_file          = `/([^/]+)/([^/]+)\.(frm|ibd|MYD|MYI|CSM|CSV|par)$`
67         re_temp_table          = `#sql-[0-9_]+`
68 )
69
70 var (
71         v_one_or_the_other    *regexp.Regexp = regexp.MustCompile(re_one_or_the_other)
72         v_slash_dot_dot_slash *regexp.Regexp = regexp.MustCompile(re_slash_dot_dot_slash)
73         v_table_file          *regexp.Regexp = regexp.MustCompile(re_table_file)
74         v_temp_table          *regexp.Regexp = regexp.MustCompile(re_temp_table)
75         v_part_table          *regexp.Regexp = regexp.MustCompile(re_part_table)
76 )
77
78 type file_summary_by_instance_row struct {
79         FILE_NAME string
80
81         COUNT_STAR  uint64
82         COUNT_READ  uint64
83         COUNT_WRITE uint64
84         COUNT_MISC  uint64
85
86         SUM_TIMER_WAIT  uint64
87         SUM_TIMER_READ  uint64
88         SUM_TIMER_WRITE uint64
89         SUM_TIMER_MISC  uint64
90
91         SUM_NUMBER_OF_BYTES_READ  uint64
92         SUM_NUMBER_OF_BYTES_WRITE uint64
93 }
94
95 // represents a table or set of rows
96 type file_summary_by_instance_rows []file_summary_by_instance_row
97
98 // Return the name using the FILE_NAME attribute.
99 func (r *file_summary_by_instance_row) name() string {
100         return r.FILE_NAME
101 }
102
103 // Return a formatted pretty name for the row.
104 func (r *file_summary_by_instance_row) pretty_name() string {
105         s := r.name()
106         if len(s) > 30 {
107                 s = s[:29]
108         }
109         return fmt.Sprintf("%-30s", s)
110 }
111
112 func (r *file_summary_by_instance_row) headings() string {
113         return fmt.Sprintf("%-30s %10s %6s|%6s %6s %6s|%8s %8s|%8s %6s %6s %6s",
114                 "Table Name",
115                 "Latency",
116                 "%",
117                 "Read",
118                 "Write",
119                 "Misc",
120                 "Rd bytes",
121                 "Wr bytes",
122                 "Ops",
123                 "R Ops",
124                 "W Ops",
125                 "M Ops")
126 }
127
128 // generate a printable result
129 func (row *file_summary_by_instance_row) row_content(totals file_summary_by_instance_row) string {
130         var name string
131
132         // We assume that if COUNT_STAR = 0 then there's no data at all...
133         // when we have no data we really don't want to show the name either.
134         if row.COUNT_STAR == 0 {
135                 name = ""
136         } else {
137                 name = row.pretty_name()
138         }
139
140         return fmt.Sprintf("%-30s %10s %6s|%6s %6s %6s|%8s %8s|%8s %6s %6s %6s",
141                 name,
142                 lib.FormatTime(row.SUM_TIMER_WAIT),
143                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_WAIT, totals.SUM_TIMER_WAIT)),
144                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_READ, row.SUM_TIMER_WAIT)),
145                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_WRITE, row.SUM_TIMER_WAIT)),
146                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_MISC, row.SUM_TIMER_WAIT)),
147                 lib.FormatAmount(row.SUM_NUMBER_OF_BYTES_READ),
148                 lib.FormatAmount(row.SUM_NUMBER_OF_BYTES_WRITE),
149                 lib.FormatAmount(row.COUNT_STAR),
150                 lib.FormatPct(lib.MyDivide(row.COUNT_READ, row.COUNT_STAR)),
151                 lib.FormatPct(lib.MyDivide(row.COUNT_WRITE, row.COUNT_STAR)),
152                 lib.FormatPct(lib.MyDivide(row.COUNT_MISC, row.COUNT_STAR)))
153 }
154
155 func (this *file_summary_by_instance_row) add(other file_summary_by_instance_row) {
156         this.COUNT_STAR += other.COUNT_STAR
157         this.COUNT_READ += other.COUNT_READ
158         this.COUNT_WRITE += other.COUNT_WRITE
159         this.COUNT_MISC += other.COUNT_MISC
160
161         this.SUM_TIMER_WAIT += other.SUM_TIMER_WAIT
162         this.SUM_TIMER_READ += other.SUM_TIMER_READ
163         this.SUM_TIMER_WRITE += other.SUM_TIMER_WRITE
164         this.SUM_TIMER_MISC += other.SUM_TIMER_MISC
165
166         this.SUM_NUMBER_OF_BYTES_READ += other.SUM_NUMBER_OF_BYTES_READ
167         this.SUM_NUMBER_OF_BYTES_WRITE += other.SUM_NUMBER_OF_BYTES_WRITE
168 }
169
170 func (this *file_summary_by_instance_row) subtract(other file_summary_by_instance_row) {
171         this.COUNT_STAR -= other.COUNT_STAR
172         this.COUNT_READ -= other.COUNT_READ
173         this.COUNT_WRITE -= other.COUNT_WRITE
174         this.COUNT_MISC -= other.COUNT_MISC
175
176         this.SUM_TIMER_WAIT -= other.SUM_TIMER_WAIT
177         this.SUM_TIMER_READ -= other.SUM_TIMER_READ
178         this.SUM_TIMER_WRITE -= other.SUM_TIMER_WRITE
179         this.SUM_TIMER_MISC -= other.SUM_TIMER_MISC
180
181         this.SUM_NUMBER_OF_BYTES_READ -= other.SUM_NUMBER_OF_BYTES_READ
182         this.SUM_NUMBER_OF_BYTES_WRITE -= other.SUM_NUMBER_OF_BYTES_WRITE
183 }
184
185 // return the totals of a slice of rows
186 func (t file_summary_by_instance_rows) totals() file_summary_by_instance_row {
187         var totals file_summary_by_instance_row
188         totals.FILE_NAME = "TOTALS"
189
190         for i := range t {
191                 totals.add(t[i])
192         }
193
194         return totals
195 }
196
197 // clean up the given path reducing redundant stuff and return the clean path
198 func cleanup_path(path string) string {
199
200         for {
201                 orig_path := path
202                 path = v_one_or_the_other.ReplaceAllString(path, "/")
203                 path = v_slash_dot_dot_slash.ReplaceAllString(path, "/")
204                 if orig_path == path { // no change so give up
205                         break
206                 }
207         }
208
209         return path
210 }
211
212 // From the original FILE_NAME we want to generate a simpler name to use.
213 // This simpler name may also merge several different filenames into one.
214 func (t file_summary_by_instance_row) simple_name(global_variables map[string]string) string {
215
216         path := t.FILE_NAME
217
218         // FIXME and make this work.
219         //      re4 := regexp.MustCompile(re_encoded)
220         //      if m4 := re4.FindStringSubmatch(path); m4 != nil {
221         //              if value, err := strconv.ParseInt(m4[1], 16, 16); err != nil {
222         //                      // missing replace @.... with char(value) in path
223         //
224         //              }
225         //      }
226
227         // this should probably be ordered from most expected regexp to least
228         if m1 := v_table_file.FindStringSubmatch(path); m1 != nil {
229                 // we may match temporary tables so check for them
230                 if m2 := v_temp_table.FindStringSubmatch(m1[2]); m2 != nil {
231                         return "<temp_table>"
232                 }
233
234                 // we may match partitioned tables so check for them
235                 if m3 := v_part_table.FindStringSubmatch(m1[2]); m3 != nil {
236                         return m1[1] + "." + m3[1] // <schema>.<table> (less partition info)
237                 }
238
239                 return m1[1] + "." + m1[2] // <schema>.<table>
240         }
241         if regexp.MustCompile(re_ibdata).MatchString(path) == true {
242                 return "<ibdata>"
243         }
244         if regexp.MustCompile(re_redo_log).MatchString(path) == true {
245                 return "<redo_log>"
246         }
247         if regexp.MustCompile(re_binlog).MatchString(path) == true {
248                 return "<binlog>"
249         }
250         if regexp.MustCompile(re_db_opt).MatchString(path) == true {
251                 return "<db_opt>"
252         }
253         if regexp.MustCompile(re_slowlog).MatchString(path) == true {
254                 return "<slow_log>"
255         }
256         if regexp.MustCompile(re_auto_cnf).MatchString(path) == true {
257                 return "<auto_cnf>"
258         }
259         // relay logs are a bit complicated. If a full path then easy to
260         // identify,but if a relative path we may need to add $datadir,
261         // but also if as I do we have a ../blah/somewhere/path then we
262         // need to make it match too.
263         if len(global_variables["relay_log"]) > 0 {
264                 relay_log := global_variables["relay_log"]
265                 if relay_log[0] != '/' { // relative path
266                         relay_log = cleanup_path(global_variables["datadir"] + relay_log) // datadir always ends in /
267                 }
268                 relay_log_re := relay_log + `\.(\d{6}|index)$`
269                 if regexp.MustCompile(relay_log_re).MatchString(path) == true {
270                         return "<relay_log>"
271                 }
272         }
273         if regexp.MustCompile(re_pid_file).MatchString(path) == true {
274                 return "<pid_file>"
275         }
276         if regexp.MustCompile(re_error_msg).MatchString(path) == true {
277                 return "<errmsg>"
278         }
279         if regexp.MustCompile(re_charset).MatchString(path) == true {
280                 return "<charset>"
281         }
282         return path
283 }
284
285 // Convert the imported "table" to a merged one with merged data.
286 // Combine all entries with the same "FILE_NAME" by adding their values.
287 func merge_by_table_name(orig file_summary_by_instance_rows, global_variables map[string]string) file_summary_by_instance_rows {
288         start := time.Now()
289         t := make(file_summary_by_instance_rows, 0, len(orig))
290
291         m := make(map[string]file_summary_by_instance_row)
292
293         // iterate over source table
294         for i := range orig {
295                 var file_name string
296                 var new_row file_summary_by_instance_row
297                 orig_row := orig[i]
298
299                 if orig_row.COUNT_STAR > 0 {
300                         file_name = orig_row.simple_name(global_variables)
301
302                         // check if we have an entry in the map
303                         if _, found := m[file_name]; found {
304                                 new_row = m[file_name]
305                         } else {
306                                 new_row.FILE_NAME = file_name
307                         }
308                         new_row.add(orig_row)
309                         m[file_name] = new_row // update the map with the new value
310                 }
311         }
312
313         // add the map contents back into the table
314         for _, row := range m {
315                 t = append(t, row)
316         }
317
318         lib.Logger.Println("merge_by_table_name() took:", time.Duration(time.Since(start)).String())
319         return t
320 }
321
322 // Select the raw data from the database into file_summary_by_instance_rows
323 // - filter out empty values
324 // - merge rows with the same name into a single row
325 // - change FILE_NAME into a more descriptive value.
326 func select_fsbi_rows(dbh *sql.DB) file_summary_by_instance_rows {
327         var t file_summary_by_instance_rows
328         start := time.Now()
329
330         sql := "SELECT FILE_NAME, COUNT_STAR, SUM_TIMER_WAIT, COUNT_READ, SUM_TIMER_READ, SUM_NUMBER_OF_BYTES_READ, COUNT_WRITE, SUM_TIMER_WRITE, SUM_NUMBER_OF_BYTES_WRITE, COUNT_MISC, SUM_TIMER_MISC FROM file_summary_by_instance"
331
332         rows, err := dbh.Query(sql)
333         if err != nil {
334                 log.Fatal(err)
335         }
336         defer rows.Close()
337
338         for rows.Next() {
339                 var r file_summary_by_instance_row
340
341                 if err := rows.Scan(&r.FILE_NAME, &r.COUNT_STAR, &r.SUM_TIMER_WAIT, &r.COUNT_READ, &r.SUM_TIMER_READ, &r.SUM_NUMBER_OF_BYTES_READ, &r.COUNT_WRITE, &r.SUM_TIMER_WRITE, &r.SUM_NUMBER_OF_BYTES_WRITE, &r.COUNT_MISC, &r.SUM_TIMER_MISC); err != nil {
342                         log.Fatal(err)
343                 }
344                 t = append(t, r)
345         }
346         if err := rows.Err(); err != nil {
347                 log.Fatal(err)
348         }
349         lib.Logger.Println("select_fsbi_rows() took:", time.Duration(time.Since(start)).String())
350
351         return t
352 }
353
354 // remove the initial values from those rows where there's a match
355 // - if we find a row we can't match ignore it
356 func (this *file_summary_by_instance_rows) subtract(initial file_summary_by_instance_rows) {
357         i_by_name := make(map[string]int)
358
359         // iterate over rows by name
360         for i := range initial {
361                 i_by_name[initial[i].name()] = i
362         }
363
364         for i := range *this {
365                 if _, ok := i_by_name[(*this)[i].name()]; ok {
366                         initial_i := i_by_name[(*this)[i].name()]
367                         (*this)[i].subtract(initial[initial_i])
368                 }
369         }
370 }
371
372 func (t file_summary_by_instance_rows) Len() int      { return len(t) }
373 func (t file_summary_by_instance_rows) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
374 func (t file_summary_by_instance_rows) Less(i, j int) bool {
375         return (t[i].SUM_TIMER_WAIT > t[j].SUM_TIMER_WAIT) ||
376                 ((t[i].SUM_TIMER_WAIT == t[j].SUM_TIMER_WAIT) && (t[i].FILE_NAME < t[j].FILE_NAME))
377 }
378
379 func (t *file_summary_by_instance_rows) sort() {
380         sort.Sort(t)
381 }
382
383 // if the data in t2 is "newer", "has more values" than t then it needs refreshing.
384 // check this by comparing totals.
385 func (t file_summary_by_instance_rows) needs_refresh(t2 file_summary_by_instance_rows) bool {
386         my_totals := t.totals()
387         t2_totals := t2.totals()
388
389         return my_totals.SUM_TIMER_WAIT > t2_totals.SUM_TIMER_WAIT
390 }