MySQL "inspect" performance (#62)

* mysql inspect performance

* tidying up mysql metadata
This commit is contained in:
Neil O'Toole 2020-08-18 13:13:25 -06:00 committed by GitHub
parent 65259754f5
commit 929b81c6d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 270 additions and 228 deletions

View File

@ -3,7 +3,10 @@ package mysql
import ( import (
"testing" "testing"
"github.com/go-sql-driver/mysql"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
"github.com/neilotoole/sq/libsq/errz"
) )
var KindFromDBTypeName = kindFromDBTypeName var KindFromDBTypeName = kindFromDBTypeName
@ -26,3 +29,17 @@ func TestPlaceholders(t *testing.T) {
require.Equal(t, tc.want, got) require.Equal(t, tc.want, got)
} }
} }
func TestHasErrCode(t *testing.T) {
var err error
err = &mysql.MySQLError{
Number: 1146,
Message: "I'm not here",
}
require.True(t, hasErrCode(err, errNumTableNotExist))
// Test that a wrapped error works
err = errz.Err(err)
require.True(t, hasErrCode(err, errNumTableNotExist))
}

View File

@ -16,6 +16,7 @@ import (
"github.com/neilotoole/sq/libsq/errz" "github.com/neilotoole/sq/libsq/errz"
"github.com/neilotoole/sq/libsq/source" "github.com/neilotoole/sq/libsq/source"
"github.com/neilotoole/sq/libsq/sqlz" "github.com/neilotoole/sq/libsq/sqlz"
"github.com/neilotoole/sq/libsq/stringz"
) )
// kindFromDBTypeName determines the sqlz.Kind from the database // kindFromDBTypeName determines the sqlz.Kind from the database
@ -128,57 +129,8 @@ func getNewRecordFunc(rowMeta sqlz.RecordMeta) driver.NewRecordFunc {
} }
} }
func getSourceMetadata(ctx context.Context, log lg.Log, src *source.Source, db sqlz.DB) (*source.Metadata, error) { // getTableMetadata gets the metadata for a single table. It is the
md := &source.Metadata{SourceType: Type, DBDriverType: Type, Handle: src.Handle, Location: src.Location} // implementation of driver.Database.TableMetadata.
const summaryQuery = `SELECT @@GLOBAL.version, @@GLOBAL.version_comment, @@GLOBAL.version_compile_os,
@@GLOBAL.version_compile_machine, DATABASE(), CURRENT_USER(),
(SELECT SUM( data_length + index_length )
FROM information_schema.TABLES WHERE TABLE_SCHEMA = DATABASE()) AS size`
var version, versionComment, versionOS, versionArch, schema string
err := db.QueryRowContext(ctx, summaryQuery).Scan(&version, &versionComment, &versionOS, &versionArch, &schema, &md.User, &md.Size)
if err != nil {
return nil, errz.Err(err)
}
md.Name = schema
md.FQName = schema
md.DBVersion = version
md.DBProduct = fmt.Sprintf("%s %s / %s (%s)", versionComment, version, versionOS, versionArch)
md.DBVars, err = getDBVarsMeta(ctx, log, db)
if err != nil {
return nil, err
}
// Note that this does not populate the RowCount of Columns fields of the
// table metadata.
tblMetas, err := getSchemaTableMetas(ctx, log, db, schema)
if err != nil {
return nil, err
}
// Populate the RowCount and Columns fields of each table metadata.
// Note that this function may set elements of tblMetas to nil
// if the table is not found (can happen if a table is dropped
// during metadata collection).
err = setTableMetaDetails(ctx, log, db, tblMetas)
if err != nil {
return nil, err
}
// Filter any nil tables
md.Tables = make([]*source.TableMetadata, 0, len(tblMetas))
for i := range tblMetas {
if tblMetas[i] != nil {
md.Tables = append(md.Tables, tblMetas[i])
}
}
return md, nil
}
func getTableMetadata(ctx context.Context, log lg.Log, db sqlz.DB, tblName string) (*source.TableMetadata, error) { func getTableMetadata(ctx context.Context, log lg.Log, db sqlz.DB, tblName string) (*source.TableMetadata, error) {
query := `SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE, TABLE_COMMENT, (DATA_LENGTH + INDEX_LENGTH) AS table_size, query := `SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE, TABLE_COMMENT, (DATA_LENGTH + INDEX_LENGTH) AS table_size,
(SELECT COUNT(*) FROM ` + "`" + tblName + "`" + `) AS row_count (SELECT COUNT(*) FROM ` + "`" + tblName + "`" + `) AS row_count
@ -210,94 +162,6 @@ WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = ?`
return tblMeta, nil return tblMeta, nil
} }
// getSchemaTableMetas returns basic metadata for each table in schema. Note
// that the returned items are not fully populated: column metadata
// must be separately populated.
func getSchemaTableMetas(ctx context.Context, log lg.Log, db sqlz.DB, schema string) ([]*source.TableMetadata, error) {
const query = `SELECT TABLE_NAME, TABLE_TYPE, TABLE_COMMENT, (DATA_LENGTH + INDEX_LENGTH) AS table_size
FROM information_schema.TABLES
WHERE TABLE_SCHEMA = ?
ORDER BY TABLE_SCHEMA, TABLE_NAME ASC`
rows, err := db.QueryContext(ctx, query, schema)
if err != nil {
return nil, errz.Err(err)
}
defer log.WarnIfCloseError(rows)
var tblMetas []*source.TableMetadata
for rows.Next() {
tblMeta := &source.TableMetadata{}
var tblSize sql.NullInt64
err = rows.Scan(&tblMeta.Name, &tblMeta.DBTableType, &tblMeta.Comment, &tblSize)
if err != nil {
return nil, errz.Err(err)
}
tblMeta.TableType = canonicalTableType(tblMeta.DBTableType)
tblMeta.FQName = schema + "." + tblMeta.Name
if tblSize.Valid {
// For a view (as opposed to table), tblSize is typically nil
tblMeta.Size = &tblSize.Int64
}
tblMetas = append(tblMetas, tblMeta)
}
err = rows.Err()
if err != nil {
return nil, errz.Err(err)
}
return tblMetas, nil
}
// setTableMetaDetails sets the RowCount and Columns field on each
// of tblMetas. It can happen that a table in tblMetas is dropped
// during the metadata collection process: if so, that element of
// tblMetas is set to nil.
func setTableMetaDetails(ctx context.Context, log lg.Log, db sqlz.DB, tblMetas []*source.TableMetadata) error {
g, gctx := errgroup.WithContextN(ctx, driver.Tuning.ErrgroupNumG, driver.Tuning.ErrgroupQSize)
for i := range tblMetas {
i := i
g.Go(func() error {
err := db.QueryRowContext(ctx, "SELECT COUNT(*) FROM `"+tblMetas[i].Name+"`").Scan(&tblMetas[i].RowCount)
if err != nil {
if hasErrCode(err, errNumTableNotExist) {
// Can happen if the table is dropped while we're collecting metadata,
log.Warnf("table metadata: table %q appears not to exist (continuing regardless): %v", tblMetas[i].Name, err)
// We'll need to delete this nil entry below
tblMetas[i] = nil
return nil
}
}
cols, err := getColumnMetadata(gctx, log, db, tblMetas[i].Name)
if err != nil {
if hasErrCode(err, errNumTableNotExist) {
log.Warnf("table metadata: table %q appears not to exist (continuing regardless): %v", tblMetas[i].Name, err)
tblMetas[i] = nil
return nil
}
return err
}
tblMetas[i].Columns = cols
return nil
})
}
err := g.Wait()
if err != nil {
return errz.Err(err)
}
return nil
}
// getColumnMetadata returns column metadata for tblName. // getColumnMetadata returns column metadata for tblName.
func getColumnMetadata(ctx context.Context, log lg.Log, db sqlz.DB, tblName string) ([]*source.ColMetadata, error) { func getColumnMetadata(ctx context.Context, log lg.Log, db sqlz.DB, tblName string) ([]*source.ColMetadata, error) {
const query = `SELECT column_name, data_type, column_type, ordinal_position, column_default, is_nullable, column_key, column_comment, extra const query = `SELECT column_name, data_type, column_type, ordinal_position, column_default, is_nullable, column_key, column_comment, extra
@ -340,6 +204,75 @@ ORDER BY cols.ordinal_position ASC`
return cols, errz.Err(rows.Err()) return cols, errz.Err(rows.Err())
} }
// getSourceMetadata is the implementation of driver.Database.SourceMetadata.
//
// Multiple queries are required to build the SourceMetadata, and this
// impl makes use of errgroup to make concurrent queries. In the initial
// relatively sequential implementation of this function, the main perf
// roadblock was getting the row count for each table/view. For accuracy
// it is necessary to perform "SELECT COUNT(*) FROM tbl" for each table/view.
// For other databases (such as sqlite) it was performant to UNION ALL
// these SELECTs into one (or a few) queries, e.g.:
//
// SELECT COUNT(*) FROM actor
// UNION ALL
// SELECT COUNT(*) FROM address
// UNION ALL
// [...]
//
// However, this seemed to perform poorly (at least for MySQL 5.6 which
// was the main focus of testing). We do seem to be getting fairly
// reasonable results by spinning off a goroutine (via errgroup) for
// each SELECT COUNT(*) query. That said, the testing/benchmarking was
// far from exhaustive, and this entire thing has a bit of a code smell.
func getSourceMetadata(ctx context.Context, log lg.Log, src *source.Source, db sqlz.DB) (*source.Metadata, error) {
md := &source.Metadata{SourceType: Type, DBDriverType: Type, Handle: src.Handle, Location: src.Location}
g, gctx := errgroup.WithContext(ctx)
g.Go(func() error {
return setSourceSummaryMeta(gctx, db, md)
})
g.Go(func() error {
var err error
md.DBVars, err = getDBVarsMeta(gctx, log, db)
return err
})
g.Go(func() error {
var err error
md.Tables, err = getAllTblMetas(gctx, log, db)
return err
})
err := g.Wait()
if err != nil {
return nil, err
}
return md, nil
}
func setSourceSummaryMeta(ctx context.Context, db sqlz.DB, md *source.Metadata) error {
const summaryQuery = `SELECT @@GLOBAL.version, @@GLOBAL.version_comment, @@GLOBAL.version_compile_os,
@@GLOBAL.version_compile_machine, DATABASE(), CURRENT_USER(),
(SELECT SUM( data_length + index_length )
FROM information_schema.TABLES WHERE TABLE_SCHEMA = DATABASE()) AS size`
var version, versionComment, versionOS, versionArch, schema string
err := db.QueryRowContext(ctx, summaryQuery).Scan(&version, &versionComment, &versionOS, &versionArch, &schema, &md.User, &md.Size)
if err != nil {
return errz.Err(err)
}
md.Name = schema
md.FQName = schema
md.DBVersion = version
md.DBProduct = fmt.Sprintf("%s %s / %s (%s)", versionComment, version, versionOS, versionArch)
return nil
}
// getDBVarsMeta returns the database variables. // getDBVarsMeta returns the database variables.
func getDBVarsMeta(ctx context.Context, log lg.Log, db sqlz.DB) ([]source.DBVar, error) { func getDBVarsMeta(ctx context.Context, log lg.Log, db sqlz.DB) ([]source.DBVar, error) {
var dbVars []source.DBVar var dbVars []source.DBVar
@ -366,6 +299,110 @@ func getDBVarsMeta(ctx context.Context, log lg.Log, db sqlz.DB) ([]source.DBVar,
return dbVars, nil return dbVars, nil
} }
// getAllTblMetas returns TableMetadata for each table/view in db.
func getAllTblMetas(ctx context.Context, log lg.Log, db sqlz.DB) ([]*source.TableMetadata, error) {
const query = `SELECT t.TABLE_SCHEMA, t.TABLE_NAME, t.TABLE_TYPE, t.TABLE_COMMENT, (DATA_LENGTH + INDEX_LENGTH) AS table_size,
c.COLUMN_NAME, c.ORDINAL_POSITION, c.COLUMN_KEY, c.DATA_TYPE, c.COLUMN_TYPE, c.IS_NULLABLE, c.COLUMN_DEFAULT, c.COLUMN_COMMENT, c.EXTRA
FROM information_schema.TABLES t
LEFT JOIN information_schema.COLUMNS c
ON c.TABLE_CATALOG = t.TABLE_CATALOG
AND c.TABLE_SCHEMA = t.TABLE_SCHEMA
AND c.TABLE_NAME = t.TABLE_NAME
WHERE t.TABLE_SCHEMA = DATABASE()
ORDER BY c.TABLE_NAME ASC, c.ORDINAL_POSITION ASC`
// Query results look like:
// +------------+----------+----------+-------------+----------+-----------+----------------+----------+---------+--------------------+-----------+-----------------+--------------+---------------------------+
// |TABLE_SCHEMA|TABLE_NAME|TABLE_TYPE|TABLE_COMMENT|table_size|COLUMN_NAME|ORDINAL_POSITION|COLUMN_KEY|DATA_TYPE|COLUMN_TYPE |IS_NULLABLE|COLUMN_DEFAULT |COLUMN_COMMENT|EXTRA |
// +------------+----------+----------+-------------+----------+-----------+----------------+----------+---------+--------------------+-----------+-----------------+--------------+---------------------------+
// |sakila |actor |BASE TABLE| |32768 |actor_id |1 |PRI |smallint |smallint(5) unsigned|NO |NULL | |auto_increment |
// |sakila |actor |BASE TABLE| |32768 |first_name |2 | |varchar |varchar(45) |NO |NULL | | |
// |sakila |actor |BASE TABLE| |32768 |last_name |3 |MUL |varchar |varchar(45) |NO |NULL | | |
// |sakila |actor |BASE TABLE| |32768 |last_update|4 | |timestamp|timestamp |NO |CURRENT_TIMESTAMP| |on update CURRENT_TIMESTAMP|
// |sakila |actor_info|VIEW |VIEW |NULL |actor_id |1 | |smallint |smallint(5) unsigned|NO |0 | | |
var tblMetas []*source.TableMetadata
var schema, curTblName, curTblType, curTblComment string
var curTblSize sql.NullInt64
var curTblMeta *source.TableMetadata
// gRowCount is an errgroup for fetching the
// row count for each table.
gRowCount, gctx := errgroup.WithContextN(ctx, 32, 1024)
rows, err := db.QueryContext(ctx, query)
if err != nil {
return nil, errz.Err(err)
}
defer log.WarnIfCloseError(rows)
for rows.Next() {
select {
case <-ctx.Done():
return nil, ctx.Err()
default:
}
col := &source.ColMetadata{}
var colNullable, colKey, colExtra string
colDefault := sql.NullString{}
err = rows.Scan(&schema, &curTblName, &curTblType, &curTblComment, &curTblSize, &col.Name, &col.Position,
&colKey, &col.BaseType, &col.ColumnType, &colNullable, &colDefault, &col.Comment, &colExtra)
if err != nil {
return nil, errz.Err(err)
}
if curTblMeta == nil || curTblMeta.Name != curTblName {
// On our first time encountering a new table name, we create a new TableMetadata
curTblMeta = &source.TableMetadata{
Name: curTblName,
FQName: schema + "." + curTblName,
DBTableType: curTblType,
TableType: canonicalTableType(curTblType),
Comment: curTblComment,
}
if curTblSize.Valid {
size := curTblSize.Int64
curTblMeta.Size = &size
}
rowCountTbl, rowCount := curTblName, &curTblMeta.RowCount
gRowCount.Go(func() error {
return errz.Err(db.QueryRowContext(gctx, "SELECT COUNT(*) FROM `"+rowCountTbl+"`").Scan(rowCount))
})
tblMetas = append(tblMetas, curTblMeta)
}
col.Nullable, err = stringz.ParseBool(colNullable)
if err != nil {
return nil, err
}
col.DefaultValue = colDefault.String
col.Kind = kindFromDBTypeName(log, col.Name, col.BaseType)
if strings.Contains(colKey, "PRI") {
col.PrimaryKey = true
}
curTblMeta.Columns = append(curTblMeta.Columns, col)
}
err = gRowCount.Wait()
if err != nil {
return nil, err
}
err = rows.Err()
if err != nil {
return nil, errz.Err(err)
}
return tblMetas, nil
}
// newInsertMungeFunc is lifted from driver.DefaultInsertMungeFunc. // newInsertMungeFunc is lifted from driver.DefaultInsertMungeFunc.
func newInsertMungeFunc(destTbl string, destMeta sqlz.RecordMeta) driver.InsertMungeFunc { func newInsertMungeFunc(destTbl string, destMeta sqlz.RecordMeta) driver.InsertMungeFunc {
return func(rec sqlz.Record) error { return func(rec sqlz.Record) error {

View File

@ -1,86 +0,0 @@
package sqlite3
import (
"context"
"database/sql"
"os"
"github.com/neilotoole/lg"
"github.com/neilotoole/sq/libsq/driver"
"github.com/neilotoole/sq/libsq/errz"
"github.com/neilotoole/sq/libsq/source"
)
// database implements driver.Database.
type database struct {
log lg.Log
db *sql.DB
src *source.Source
drvr *driveri
}
// DB implements driver.Database.
func (d *database) DB() *sql.DB {
return d.db
}
// SQLDriver implements driver.Database.
func (d *database) SQLDriver() driver.SQLDriver {
return d.drvr
}
// Source implements driver.Database.
func (d *database) Source() *source.Source {
return d.src
}
// TableMetadata implements driver.Database.
func (d *database) TableMetadata(ctx context.Context, tblName string) (*source.TableMetadata, error) {
return getTableMetadata(ctx, d.log, d.DB(), tblName)
}
// SourceMetadata implements driver.Database.
func (d *database) SourceMetadata(ctx context.Context) (*source.Metadata, error) {
// https://stackoverflow.com/questions/9646353/how-to-find-sqlite-database-file-version
meta := &source.Metadata{Handle: d.src.Handle, SourceType: Type, DBDriverType: dbDrvr}
dsn, err := PathFromLocation(d.src)
if err != nil {
return nil, err
}
const q = "SELECT sqlite_version(), (SELECT name FROM pragma_database_list ORDER BY seq LIMIT 1);"
var schemaName string // typically "main"
err = d.DB().QueryRowContext(ctx, q).Scan(&meta.DBVersion, &schemaName)
if err != nil {
return nil, errz.Err(err)
}
meta.DBProduct = "SQLite3 v" + meta.DBVersion
fi, err := os.Stat(dsn)
if err != nil {
return nil, errz.Err(err)
}
meta.Size = fi.Size()
meta.Name = fi.Name()
meta.FQName = fi.Name() + "/" + schemaName
meta.Location = d.src.Location
meta.Tables, err = getAllTblMeta(ctx, d.log, d.db)
if err != nil {
return nil, err
}
return meta, nil
}
// Close implements driver.Database.
func (d *database) Close() error {
d.log.Debugf("Close database: %s", d.src)
return errz.Err(d.db.Close())
}

View File

@ -338,10 +338,10 @@ ORDER BY m.name, p.cid
col := &source.ColMetadata{} col := &source.ColMetadata{}
var notnull int64 var notnull int64
defaultValue := &sql.NullString{} colDefault := &sql.NullString{}
pkValue := &sql.NullInt64{} pkValue := &sql.NullInt64{}
err = rows.Scan(&curTblName, &curTblType, &col.Position, &col.Name, &col.BaseType, &notnull, defaultValue, pkValue) err = rows.Scan(&curTblName, &curTblType, &col.Position, &col.Name, &col.BaseType, &notnull, colDefault, pkValue)
if err != nil { if err != nil {
return nil, errz.Err(err) return nil, errz.Err(err)
} }
@ -374,7 +374,7 @@ ORDER BY m.name, p.cid
col.PrimaryKey = pkValue.Int64 > 0 // pkVal can be 0,1,2 etc col.PrimaryKey = pkValue.Int64 > 0 // pkVal can be 0,1,2 etc
col.ColumnType = col.BaseType col.ColumnType = col.BaseType
col.Nullable = notnull == 0 col.Nullable = notnull == 0
col.DefaultValue = defaultValue.String col.DefaultValue = colDefault.String
col.Kind = kindFromDBTypeName(log, col.Name, col.BaseType, nil) col.Kind = kindFromDBTypeName(log, col.Name, col.BaseType, nil)
curTblMeta.Columns = append(curTblMeta.Columns, col) curTblMeta.Columns = append(curTblMeta.Columns, col)

View File

@ -7,6 +7,7 @@ import (
"context" "context"
"database/sql" "database/sql"
"fmt" "fmt"
"os"
"path/filepath" "path/filepath"
"strings" "strings"
@ -365,6 +366,79 @@ func (d *driveri) getTableRecordMeta(ctx context.Context, db sqlz.DB, tblName st
return destCols, nil return destCols, nil
} }
// database implements driver.Database.
type database struct {
log lg.Log
db *sql.DB
src *source.Source
drvr *driveri
}
// DB implements driver.Database.
func (d *database) DB() *sql.DB {
return d.db
}
// SQLDriver implements driver.Database.
func (d *database) SQLDriver() driver.SQLDriver {
return d.drvr
}
// Source implements driver.Database.
func (d *database) Source() *source.Source {
return d.src
}
// TableMetadata implements driver.Database.
func (d *database) TableMetadata(ctx context.Context, tblName string) (*source.TableMetadata, error) {
return getTableMetadata(ctx, d.log, d.DB(), tblName)
}
// SourceMetadata implements driver.Database.
func (d *database) SourceMetadata(ctx context.Context) (*source.Metadata, error) {
// https://stackoverflow.com/questions/9646353/how-to-find-sqlite-database-file-version
meta := &source.Metadata{Handle: d.src.Handle, SourceType: Type, DBDriverType: dbDrvr}
dsn, err := PathFromLocation(d.src)
if err != nil {
return nil, err
}
const q = "SELECT sqlite_version(), (SELECT name FROM pragma_database_list ORDER BY seq LIMIT 1);"
var schemaName string // typically "main"
err = d.DB().QueryRowContext(ctx, q).Scan(&meta.DBVersion, &schemaName)
if err != nil {
return nil, errz.Err(err)
}
meta.DBProduct = "SQLite3 v" + meta.DBVersion
fi, err := os.Stat(dsn)
if err != nil {
return nil, errz.Err(err)
}
meta.Size = fi.Size()
meta.Name = fi.Name()
meta.FQName = fi.Name() + "/" + schemaName
meta.Location = d.src.Location
meta.Tables, err = getAllTblMeta(ctx, d.log, d.db)
if err != nil {
return nil, err
}
return meta, nil
}
// Close implements driver.Database.
func (d *database) Close() error {
d.log.Debugf("Close database: %s", d.src)
return errz.Err(d.db.Close())
}
// NewScratchSource returns a new scratch src. Currently this // NewScratchSource returns a new scratch src. Currently this
// defaults to a sqlite-backed source. // defaults to a sqlite-backed source.
func NewScratchSource(log lg.Log, name string) (src *source.Source, clnup func() error, err error) { func NewScratchSource(log lg.Log, name string) (src *source.Source, clnup func() error, err error) {