711b7428ed812959fe97c8ce2f6f14ff61307fe9
[pstop.git] / performance_schema / file_summary_by_instance / file_summary_by_instance_row.go
1 // This file contains the library routines for managing the
2 // file_summary_by_instance table.
3 package file_summary_by_instance
4
5 import (
6         "database/sql"
7         "fmt"
8         "log"
9         "regexp"
10         "sort"
11         "time"
12
13         "github.com/sjmudd/pstop/key_value_cache"
14         "github.com/sjmudd/pstop/lib"
15 )
16
17 /*
18 CREATE TABLE `file_summary_by_instance` (
19   `FILE_NAME` varchar(512) NOT NULL,
20   `EVENT_NAME` varchar(128) NOT NULL,                           // not collected
21   `OBJECT_INSTANCE_BEGIN` bigint(20) unsigned NOT NULL,         // not collected
22   `COUNT_STAR` bigint(20) unsigned NOT NULL,
23   `SUM_TIMER_WAIT` bigint(20) unsigned NOT NULL,
24   `MIN_TIMER_WAIT` bigint(20) unsigned NOT NULL,
25   `AVG_TIMER_WAIT` bigint(20) unsigned NOT NULL,
26   `MAX_TIMER_WAIT` bigint(20) unsigned NOT NULL,
27   `COUNT_READ` bigint(20) unsigned NOT NULL,
28   `SUM_TIMER_READ` bigint(20) unsigned NOT NULL,
29   `MIN_TIMER_READ` bigint(20) unsigned NOT NULL,
30   `AVG_TIMER_READ` bigint(20) unsigned NOT NULL,
31   `MAX_TIMER_READ` bigint(20) unsigned NOT NULL,
32   `SUM_NUMBER_OF_BYTES_READ` bigint(20) NOT NULL,
33   `COUNT_WRITE` bigint(20) unsigned NOT NULL,
34   `SUM_TIMER_WRITE` bigint(20) unsigned NOT NULL,
35   `MIN_TIMER_WRITE` bigint(20) unsigned NOT NULL,
36   `AVG_TIMER_WRITE` bigint(20) unsigned NOT NULL,
37   `MAX_TIMER_WRITE` bigint(20) unsigned NOT NULL,
38   `SUM_NUMBER_OF_BYTES_WRITE` bigint(20) NOT NULL,
39   `COUNT_MISC` bigint(20) unsigned NOT NULL,
40   `SUM_TIMER_MISC` bigint(20) unsigned NOT NULL,
41   `MIN_TIMER_MISC` bigint(20) unsigned NOT NULL,
42   `AVG_TIMER_MISC` bigint(20) unsigned NOT NULL,
43   `MAX_TIMER_MISC` bigint(20) unsigned NOT NULL
44 ) ENGINE=PERFORMANCE_SCHEMA DEFAULT CHARSET=utf8
45 1 row in set (0.00 sec)
46 */
47
48 //     foo/../bar --> foo/bar   perl: $new =~ s{[^/]+/\.\./}{/};
49 //     /./        --> /         perl: $new =~ s{/\./}{};
50 //     //         --> /         perl: $new =~ s{//}{/};
51 const (
52         re_encoded = `@(\d{4})` // FIXME - add me to catch @0024 --> $ for example
53 )
54
55 var (
56         re_one_or_the_other    *regexp.Regexp = regexp.MustCompile(`/(\.)?/`)
57         re_slash_dot_dot_slash *regexp.Regexp = regexp.MustCompile(`[^/]+/\.\./`)
58         re_table_file          *regexp.Regexp = regexp.MustCompile(`/([^/]+)/([^/]+)\.(frm|ibd|MYD|MYI|CSM|CSV|par)$`)
59         re_temp_table          *regexp.Regexp = regexp.MustCompile(`#sql-[0-9_]+`)
60         re_part_table          *regexp.Regexp = regexp.MustCompile(`(.+)#P#p(\d+|MAX)`)
61         re_ibdata              *regexp.Regexp = regexp.MustCompile(`/ibdata\d+$`)
62         re_redo_log            *regexp.Regexp = regexp.MustCompile(`/ib_logfile\d+$`)
63         re_binlog              *regexp.Regexp = regexp.MustCompile(`/binlog\.(\d{6}|index)$`)
64         re_db_opt              *regexp.Regexp = regexp.MustCompile(`/db\.opt$`)
65         re_slowlog             *regexp.Regexp = regexp.MustCompile(`/slowlog$`)
66         re_auto_cnf            *regexp.Regexp = regexp.MustCompile(`/auto\.cnf$`)
67         re_pid_file            *regexp.Regexp = regexp.MustCompile(`/[^/]+\.pid$`)
68         re_error_msg           *regexp.Regexp = regexp.MustCompile(`/share/[^/]+/errmsg\.sys$`)
69         re_charset             *regexp.Regexp = regexp.MustCompile(`/share/charsets/Index\.xml$`)
70         re_dollar              *regexp.Regexp = regexp.MustCompile(`@0024`) // FIXME - add me to catch @0024 --> $ (specific case)
71
72         cache key_value_cache.KeyValueCache
73 )
74
75 type file_summary_by_instance_row struct {
76         FILE_NAME string
77
78         COUNT_STAR  uint64
79         COUNT_READ  uint64
80         COUNT_WRITE uint64
81         COUNT_MISC  uint64
82
83         SUM_TIMER_WAIT  uint64
84         SUM_TIMER_READ  uint64
85         SUM_TIMER_WRITE uint64
86         SUM_TIMER_MISC  uint64
87
88         SUM_NUMBER_OF_BYTES_READ  uint64
89         SUM_NUMBER_OF_BYTES_WRITE uint64
90 }
91
92 // represents a table or set of rows
93 type file_summary_by_instance_rows []file_summary_by_instance_row
94
95 // Return the name using the FILE_NAME attribute.
96 func (r *file_summary_by_instance_row) name() string {
97         return r.FILE_NAME
98 }
99
100 // Return a formatted pretty name for the row.
101 func (r *file_summary_by_instance_row) pretty_name() string {
102         s := r.name()
103         if len(s) > 30 {
104                 s = s[:29]
105         }
106         return fmt.Sprintf("%-30s", s)
107 }
108
109 func (r *file_summary_by_instance_row) headings() string {
110         return fmt.Sprintf("%-30s %10s %6s|%6s %6s %6s|%8s %8s|%8s %6s %6s %6s",
111                 "Table Name",
112                 "Latency",
113                 "%",
114                 "Read",
115                 "Write",
116                 "Misc",
117                 "Rd bytes",
118                 "Wr bytes",
119                 "Ops",
120                 "R Ops",
121                 "W Ops",
122                 "M Ops")
123 }
124
125 // generate a printable result
126 func (row *file_summary_by_instance_row) row_content(totals file_summary_by_instance_row) string {
127         var name string
128
129         // We assume that if COUNT_STAR = 0 then there's no data at all...
130         // when we have no data we really don't want to show the name either.
131         if row.COUNT_STAR == 0 {
132                 name = ""
133         } else {
134                 name = row.pretty_name()
135         }
136
137         return fmt.Sprintf("%-30s %10s %6s|%6s %6s %6s|%8s %8s|%8s %6s %6s %6s",
138                 name,
139                 lib.FormatTime(row.SUM_TIMER_WAIT),
140                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_WAIT, totals.SUM_TIMER_WAIT)),
141                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_READ, row.SUM_TIMER_WAIT)),
142                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_WRITE, row.SUM_TIMER_WAIT)),
143                 lib.FormatPct(lib.MyDivide(row.SUM_TIMER_MISC, row.SUM_TIMER_WAIT)),
144                 lib.FormatAmount(row.SUM_NUMBER_OF_BYTES_READ),
145                 lib.FormatAmount(row.SUM_NUMBER_OF_BYTES_WRITE),
146                 lib.FormatAmount(row.COUNT_STAR),
147                 lib.FormatPct(lib.MyDivide(row.COUNT_READ, row.COUNT_STAR)),
148                 lib.FormatPct(lib.MyDivide(row.COUNT_WRITE, row.COUNT_STAR)),
149                 lib.FormatPct(lib.MyDivide(row.COUNT_MISC, row.COUNT_STAR)))
150 }
151
152 func (this *file_summary_by_instance_row) add(other file_summary_by_instance_row) {
153         this.COUNT_STAR += other.COUNT_STAR
154         this.COUNT_READ += other.COUNT_READ
155         this.COUNT_WRITE += other.COUNT_WRITE
156         this.COUNT_MISC += other.COUNT_MISC
157
158         this.SUM_TIMER_WAIT += other.SUM_TIMER_WAIT
159         this.SUM_TIMER_READ += other.SUM_TIMER_READ
160         this.SUM_TIMER_WRITE += other.SUM_TIMER_WRITE
161         this.SUM_TIMER_MISC += other.SUM_TIMER_MISC
162
163         this.SUM_NUMBER_OF_BYTES_READ += other.SUM_NUMBER_OF_BYTES_READ
164         this.SUM_NUMBER_OF_BYTES_WRITE += other.SUM_NUMBER_OF_BYTES_WRITE
165 }
166
167 func (this *file_summary_by_instance_row) subtract(other file_summary_by_instance_row) {
168         this.COUNT_STAR -= other.COUNT_STAR
169         this.COUNT_READ -= other.COUNT_READ
170         this.COUNT_WRITE -= other.COUNT_WRITE
171         this.COUNT_MISC -= other.COUNT_MISC
172
173         this.SUM_TIMER_WAIT -= other.SUM_TIMER_WAIT
174         this.SUM_TIMER_READ -= other.SUM_TIMER_READ
175         this.SUM_TIMER_WRITE -= other.SUM_TIMER_WRITE
176         this.SUM_TIMER_MISC -= other.SUM_TIMER_MISC
177
178         this.SUM_NUMBER_OF_BYTES_READ -= other.SUM_NUMBER_OF_BYTES_READ
179         this.SUM_NUMBER_OF_BYTES_WRITE -= other.SUM_NUMBER_OF_BYTES_WRITE
180 }
181
182 // return the totals of a slice of rows
183 func (t file_summary_by_instance_rows) totals() file_summary_by_instance_row {
184         var totals file_summary_by_instance_row
185         totals.FILE_NAME = "TOTALS"
186
187         for i := range t {
188                 totals.add(t[i])
189         }
190
191         return totals
192 }
193
194 // clean up the given path reducing redundant stuff and return the clean path
195 func cleanup_path(path string) string {
196
197         for {
198                 orig_path := path
199                 path = re_one_or_the_other.ReplaceAllString(path, "/")
200                 path = re_slash_dot_dot_slash.ReplaceAllString(path, "/")
201                 if orig_path == path { // no change so give up
202                         break
203                 }
204         }
205
206         return path
207 }
208
209 // From the original FILE_NAME we want to generate a simpler name to use.
210 // This simpler name may also merge several different filenames into one.
211 func (t file_summary_by_instance_row) simple_name(global_variables map[string]string) string {
212
213         path := t.FILE_NAME
214
215         if cached_result, err := cache.Get(path); err == nil {
216                 return cached_result
217         }
218
219         // @0024 --> $ (should do this more generically)
220         path = re_dollar.ReplaceAllLiteralString(path, "$")
221
222         // this should probably be ordered from most expected regexp to least
223         if m1 := re_table_file.FindStringSubmatch(path); m1 != nil {
224                 // we may match temporary tables so check for them
225                 if m2 := re_temp_table.FindStringSubmatch(m1[2]); m2 != nil {
226                         return cache.Put(path, "<temp_table>")
227                 }
228
229                 // we may match partitioned tables so check for them
230                 if m3 := re_part_table.FindStringSubmatch(m1[2]); m3 != nil {
231                         return cache.Put(path, m1[1]+"."+m3[1]) // <schema>.<table> (less partition info)
232                 }
233
234                 return cache.Put(path, m1[1]+"."+m1[2]) // <schema>.<table>
235         }
236         if re_ibdata.MatchString(path) == true {
237                 return cache.Put(path, "<ibdata>")
238         }
239         if re_redo_log.MatchString(path) == true {
240                 return cache.Put(path, "<redo_log>")
241         }
242         if re_binlog.MatchString(path) == true {
243                 return cache.Put(path, "<binlog>")
244         }
245         if re_db_opt.MatchString(path) == true {
246                 return cache.Put(path, "<db_opt>")
247         }
248         if re_slowlog.MatchString(path) == true {
249                 return cache.Put(path, "<slow_log>")
250         }
251         if re_auto_cnf.MatchString(path) == true {
252                 return cache.Put(path, "<auto_cnf>")
253         }
254         // relay logs are a bit complicated. If a full path then easy to
255         // identify,but if a relative path we may need to add $datadir,
256         // but also if as I do we have a ../blah/somewhere/path then we
257         // need to make it match too.
258         if len(global_variables["relay_log"]) > 0 {
259                 relay_log := global_variables["relay_log"]
260                 if relay_log[0] != '/' { // relative path
261                         relay_log = cleanup_path(global_variables["datadir"] + relay_log) // datadir always ends in /
262                 }
263                 re_relay_log := relay_log + `\.(\d{6}|index)$`
264                 if regexp.MustCompile(re_relay_log).MatchString(path) == true {
265                         return cache.Put(path, "<relay_log>")
266                 }
267         }
268         if re_pid_file.MatchString(path) == true {
269                 return cache.Put(path, "<pid_file>")
270         }
271         if re_error_msg.MatchString(path) == true {
272                 return cache.Put(path, "<errmsg>")
273         }
274         if re_charset.MatchString(path) == true {
275                 return cache.Put(path, "<charset>")
276         }
277         // clean up datadir to <datadir>
278         if len(global_variables["datadir"]) > 0 {
279                 re_datadir := regexp.MustCompile("^" + global_variables["datadir"])
280                 path = re_datadir.ReplaceAllLiteralString(path, "<datadir>/")
281         }
282
283         return cache.Put(path, path)
284 }
285
286 // Convert the imported "table" to a merged one with merged data.
287 // Combine all entries with the same "FILE_NAME" by adding their values.
288 func merge_by_table_name(orig file_summary_by_instance_rows, global_variables map[string]string) file_summary_by_instance_rows {
289         start := time.Now()
290         t := make(file_summary_by_instance_rows, 0, len(orig))
291
292         m := make(map[string]file_summary_by_instance_row)
293
294         // iterate over source table
295         for i := range orig {
296                 var file_name string
297                 var new_row file_summary_by_instance_row
298                 orig_row := orig[i]
299
300                 if orig_row.COUNT_STAR > 0 {
301                         file_name = orig_row.simple_name(global_variables)
302
303                         // check if we have an entry in the map
304                         if _, found := m[file_name]; found {
305                                 new_row = m[file_name]
306                         } else {
307                                 new_row.FILE_NAME = file_name
308                         }
309                         new_row.add(orig_row)
310                         m[file_name] = new_row // update the map with the new value
311                 }
312         }
313
314         // add the map contents back into the table
315         for _, row := range m {
316                 t = append(t, row)
317         }
318
319         lib.Logger.Println("merge_by_table_name() took:", time.Duration(time.Since(start)).String())
320         return t
321 }
322
323 // Select the raw data from the database into file_summary_by_instance_rows
324 // - filter out empty values
325 // - merge rows with the same name into a single row
326 // - change FILE_NAME into a more descriptive value.
327 func select_fsbi_rows(dbh *sql.DB) file_summary_by_instance_rows {
328         var t file_summary_by_instance_rows
329         start := time.Now()
330
331         sql := "SELECT FILE_NAME, COUNT_STAR, SUM_TIMER_WAIT, COUNT_READ, SUM_TIMER_READ, SUM_NUMBER_OF_BYTES_READ, COUNT_WRITE, SUM_TIMER_WRITE, SUM_NUMBER_OF_BYTES_WRITE, COUNT_MISC, SUM_TIMER_MISC FROM file_summary_by_instance"
332
333         rows, err := dbh.Query(sql)
334         if err != nil {
335                 log.Fatal(err)
336         }
337         defer rows.Close()
338
339         for rows.Next() {
340                 var r file_summary_by_instance_row
341
342                 if err := rows.Scan(&r.FILE_NAME, &r.COUNT_STAR, &r.SUM_TIMER_WAIT, &r.COUNT_READ, &r.SUM_TIMER_READ, &r.SUM_NUMBER_OF_BYTES_READ, &r.COUNT_WRITE, &r.SUM_TIMER_WRITE, &r.SUM_NUMBER_OF_BYTES_WRITE, &r.COUNT_MISC, &r.SUM_TIMER_MISC); err != nil {
343                         log.Fatal(err)
344                 }
345                 t = append(t, r)
346         }
347         if err := rows.Err(); err != nil {
348                 log.Fatal(err)
349         }
350         lib.Logger.Println("select_fsbi_rows() took:", time.Duration(time.Since(start)).String())
351
352         return t
353 }
354
355 // remove the initial values from those rows where there's a match
356 // - if we find a row we can't match ignore it
357 func (this *file_summary_by_instance_rows) subtract(initial file_summary_by_instance_rows) {
358         i_by_name := make(map[string]int)
359
360         // iterate over rows by name
361         for i := range initial {
362                 i_by_name[initial[i].name()] = i
363         }
364
365         for i := range *this {
366                 if _, ok := i_by_name[(*this)[i].name()]; ok {
367                         initial_i := i_by_name[(*this)[i].name()]
368                         (*this)[i].subtract(initial[initial_i])
369                 }
370         }
371 }
372
373 func (t file_summary_by_instance_rows) Len() int      { return len(t) }
374 func (t file_summary_by_instance_rows) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
375 func (t file_summary_by_instance_rows) Less(i, j int) bool {
376         return (t[i].SUM_TIMER_WAIT > t[j].SUM_TIMER_WAIT) ||
377                 ((t[i].SUM_TIMER_WAIT == t[j].SUM_TIMER_WAIT) && (t[i].FILE_NAME < t[j].FILE_NAME))
378 }
379
380 func (t *file_summary_by_instance_rows) sort() {
381         sort.Sort(t)
382 }
383
384 // if the data in t2 is "newer", "has more values" than t then it needs refreshing.
385 // check this by comparing totals.
386 func (t file_summary_by_instance_rows) needs_refresh(t2 file_summary_by_instance_rows) bool {
387         my_totals := t.totals()
388         t2_totals := t2.totals()
389
390         return my_totals.SUM_TIMER_WAIT > t2_totals.SUM_TIMER_WAIT
391 }