2020-08-06 20:58:47 +03:00
|
|
|
package mysql
|
|
|
|
|
|
|
|
import (
|
|
|
|
"context"
|
|
|
|
"database/sql"
|
2023-05-24 03:42:36 +03:00
|
|
|
"errors"
|
2020-08-06 20:58:47 +03:00
|
|
|
"fmt"
|
|
|
|
"reflect"
|
2023-05-19 17:24:18 +03:00
|
|
|
"strconv"
|
2020-08-06 20:58:47 +03:00
|
|
|
"strings"
|
|
|
|
"time"
|
|
|
|
|
2023-05-24 03:42:36 +03:00
|
|
|
"github.com/go-sql-driver/mysql"
|
|
|
|
|
|
|
|
"github.com/samber/lo"
|
|
|
|
|
2023-05-22 18:08:14 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/record"
|
|
|
|
|
2023-05-03 15:36:10 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/options"
|
|
|
|
|
2023-04-03 00:17:15 +03:00
|
|
|
"golang.org/x/sync/errgroup"
|
|
|
|
|
2023-04-02 22:49:45 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/lg/lga"
|
|
|
|
|
|
|
|
"github.com/neilotoole/sq/libsq/core/lg/lgm"
|
|
|
|
|
|
|
|
"github.com/neilotoole/sq/libsq/core/lg"
|
|
|
|
|
|
|
|
"golang.org/x/exp/slog"
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2020-08-23 13:42:15 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/core/errz"
|
|
|
|
"github.com/neilotoole/sq/libsq/core/kind"
|
|
|
|
"github.com/neilotoole/sq/libsq/core/sqlz"
|
|
|
|
"github.com/neilotoole/sq/libsq/core/stringz"
|
2020-08-06 20:58:47 +03:00
|
|
|
"github.com/neilotoole/sq/libsq/driver"
|
|
|
|
"github.com/neilotoole/sq/libsq/source"
|
|
|
|
)
|
|
|
|
|
2020-08-23 13:42:15 +03:00
|
|
|
// kindFromDBTypeName determines the Kind from the database
|
|
|
|
// type name. For example, "VARCHAR(64)" -> kind.Text.
|
2023-04-02 22:49:45 +03:00
|
|
|
func kindFromDBTypeName(log *slog.Logger, colName, dbTypeName string) kind.Kind {
|
2020-08-23 13:42:15 +03:00
|
|
|
var knd kind.Kind
|
2020-08-06 20:58:47 +03:00
|
|
|
dbTypeName = strings.ToUpper(dbTypeName)
|
|
|
|
|
|
|
|
// Given variations such as VARCHAR(255), we first trim the parens
|
|
|
|
// parts. Thus VARCHAR(255) becomes VARCHAR.
|
|
|
|
i := strings.IndexRune(dbTypeName, '(')
|
|
|
|
if i > 0 {
|
|
|
|
dbTypeName = dbTypeName[0:i]
|
|
|
|
}
|
|
|
|
|
|
|
|
switch dbTypeName {
|
|
|
|
default:
|
2023-04-02 22:49:45 +03:00
|
|
|
log.Warn(
|
|
|
|
"Unknown MySQL column type: using alt type",
|
|
|
|
lga.DBType, dbTypeName,
|
|
|
|
lga.Col, colName,
|
|
|
|
lga.Alt, kind.Unknown,
|
|
|
|
)
|
|
|
|
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Unknown
|
2020-08-06 20:58:47 +03:00
|
|
|
case "":
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Unknown
|
2022-12-17 05:09:49 +03:00
|
|
|
case "INTEGER", "INT", "TINYINT", "SMALLINT", "MEDIUMINT", "BIGINT", "YEAR", "BIT",
|
|
|
|
"UNSIGNED INTEGER", "UNSIGNED INT", "UNSIGNED TINYINT",
|
|
|
|
"UNSIGNED SMALLINT", "UNSIGNED MEDIUMINT", "UNSIGNED BIGINT":
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Int
|
2020-08-06 20:58:47 +03:00
|
|
|
case "DECIMAL", "NUMERIC":
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Decimal
|
2020-08-06 20:58:47 +03:00
|
|
|
case "CHAR", "VARCHAR", "TEXT", "TINYTEXT", "MEDIUMTEXT", "LONGTEXT":
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Text
|
2020-08-06 20:58:47 +03:00
|
|
|
case "ENUM", "SET":
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Text
|
2020-08-06 20:58:47 +03:00
|
|
|
case "JSON":
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Text
|
2020-08-06 20:58:47 +03:00
|
|
|
case "VARBINARY", "BINARY", "BLOB", "MEDIUMBLOB", "LONGBLOB", "TINYBLOB":
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Bytes
|
2020-08-06 20:58:47 +03:00
|
|
|
case "DATETIME", "TIMESTAMP":
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Datetime
|
2020-08-06 20:58:47 +03:00
|
|
|
case "DATE":
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Date
|
2022-12-18 09:42:11 +03:00
|
|
|
case "TIME": //nolint:goconst
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Time
|
2020-08-06 20:58:47 +03:00
|
|
|
case "FLOAT", "DOUBLE", "DOUBLE PRECISION", "REAL":
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Float
|
2020-08-06 20:58:47 +03:00
|
|
|
case "BOOL", "BOOLEAN":
|
|
|
|
// In practice these are not returned by the mysql driver.
|
2020-08-23 13:42:15 +03:00
|
|
|
knd = kind.Bool
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2020-08-23 13:42:15 +03:00
|
|
|
return knd
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2023-05-22 18:08:14 +03:00
|
|
|
func recordMetaFromColumnTypes(log *slog.Logger, colTypes []*sql.ColumnType) record.Meta {
|
|
|
|
recMeta := make(record.Meta, len(colTypes))
|
2020-08-06 20:58:47 +03:00
|
|
|
|
|
|
|
for i, colType := range colTypes {
|
2021-03-13 20:41:15 +03:00
|
|
|
knd := kindFromDBTypeName(log, colType.Name(), colType.DatabaseTypeName())
|
2023-05-22 18:08:14 +03:00
|
|
|
colTypeData := record.NewColumnTypeData(colType, knd)
|
|
|
|
recMeta[i] = record.NewFieldMeta(colTypeData)
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return recMeta
|
|
|
|
}
|
|
|
|
|
|
|
|
// getNewRecordFunc returns a NewRecordFunc that, after interacting
|
|
|
|
// with the standard driver.NewRecordFromScanRow, munges any skipped fields.
|
2022-12-18 09:07:38 +03:00
|
|
|
// In particular sql.NullTime is unboxed to *time.Time, and TIME fields
|
2020-08-06 20:58:47 +03:00
|
|
|
// are munged from RawBytes to string.
|
2023-05-22 18:08:14 +03:00
|
|
|
func getNewRecordFunc(rowMeta record.Meta) driver.NewRecordFunc {
|
|
|
|
return func(row []any) (record.Record, error) {
|
2020-08-06 20:58:47 +03:00
|
|
|
rec, skipped := driver.NewRecordFromScanRow(rowMeta, row, nil)
|
|
|
|
// We iterate over each element of val, checking for certain
|
|
|
|
// conditions. A more efficient approach might be to (in
|
|
|
|
// the outside func) iterate over the column metadata, and
|
|
|
|
// build a list of val elements to visit.
|
|
|
|
for _, i := range skipped {
|
2022-12-18 09:07:38 +03:00
|
|
|
if nullTime, ok := rec[i].(*sql.NullTime); ok {
|
2020-08-06 20:58:47 +03:00
|
|
|
if nullTime.Valid {
|
|
|
|
// Make a copy of the value
|
|
|
|
t := nullTime.Time
|
|
|
|
rec[i] = &t
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Else
|
|
|
|
rec[i] = nil
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if rowMeta[i].DatabaseTypeName() == "TIME" && rec[i] != nil {
|
|
|
|
// MySQL may return TIME as RawBytes... convert to a string.
|
|
|
|
// https://github.com/go-sql-driver/mysql#timetime-support
|
|
|
|
if rb, ok := rec[i].(*sql.RawBytes); ok {
|
|
|
|
if len(*rb) == 0 {
|
|
|
|
// shouldn't happen
|
|
|
|
zero := "00:00"
|
|
|
|
rec[i] = &zero
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// Else
|
|
|
|
text := string(*rb)
|
|
|
|
rec[i] = &text
|
|
|
|
}
|
|
|
|
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// else, we don't know what to do with this col
|
2022-12-18 05:43:53 +03:00
|
|
|
return nil, errz.Errorf("column %d %s: unknown type db(%T) with kind(%s), val(%v)", i, rowMeta[i].Name(),
|
|
|
|
rec[i], rowMeta[i].Kind(), rec[i])
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
return rec, nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2020-08-18 22:13:25 +03:00
|
|
|
// getTableMetadata gets the metadata for a single table. It is the
|
|
|
|
// implementation of driver.Database.TableMetadata.
|
2023-04-02 22:49:45 +03:00
|
|
|
func getTableMetadata(ctx context.Context, db sqlz.DB, tblName string) (*source.TableMetadata, error) {
|
2020-08-16 00:06:40 +03:00
|
|
|
query := `SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE, TABLE_COMMENT, (DATA_LENGTH + INDEX_LENGTH) AS table_size,
|
|
|
|
(SELECT COUNT(*) FROM ` + "`" + tblName + "`" + `) AS row_count
|
|
|
|
FROM information_schema.TABLES
|
|
|
|
WHERE TABLE_SCHEMA = DATABASE() AND TABLE_NAME = ?`
|
|
|
|
|
|
|
|
var schema string
|
|
|
|
var tblSize sql.NullInt64
|
|
|
|
tblMeta := &source.TableMetadata{}
|
|
|
|
|
|
|
|
err := db.QueryRowContext(ctx, query, tblName).
|
|
|
|
Scan(&schema, &tblMeta.Name, &tblMeta.DBTableType, &tblMeta.Comment, &tblSize, &tblMeta.RowCount)
|
2020-08-06 20:58:47 +03:00
|
|
|
if err != nil {
|
2023-05-22 18:08:14 +03:00
|
|
|
return nil, errw(err)
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2020-08-16 00:06:40 +03:00
|
|
|
tblMeta.TableType = canonicalTableType(tblMeta.DBTableType)
|
|
|
|
tblMeta.FQName = schema + "." + tblMeta.Name
|
|
|
|
if tblSize.Valid {
|
|
|
|
// For a view (as opposed to table), tblSize is typically nil
|
|
|
|
tblMeta.Size = &tblSize.Int64
|
|
|
|
}
|
|
|
|
|
2023-04-02 22:49:45 +03:00
|
|
|
tblMeta.Columns, err = getColumnMetadata(ctx, db, tblMeta.Name)
|
2020-08-06 20:58:47 +03:00
|
|
|
if err != nil {
|
2020-08-16 00:06:40 +03:00
|
|
|
return nil, err
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2020-08-16 00:06:40 +03:00
|
|
|
return tblMeta, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// getColumnMetadata returns column metadata for tblName.
|
2023-04-02 22:49:45 +03:00
|
|
|
func getColumnMetadata(ctx context.Context, db sqlz.DB, tblName string) ([]*source.ColMetadata, error) {
|
2023-05-03 15:36:10 +03:00
|
|
|
log := lg.FromContext(ctx)
|
2023-04-02 22:49:45 +03:00
|
|
|
|
2022-12-18 05:43:53 +03:00
|
|
|
const query = `SELECT column_name, data_type, column_type, ordinal_position, column_default,
|
|
|
|
is_nullable, column_key, column_comment, extra
|
2020-08-16 00:06:40 +03:00
|
|
|
FROM information_schema.columns cols
|
|
|
|
WHERE cols.TABLE_SCHEMA = DATABASE() AND cols.TABLE_NAME = ?
|
|
|
|
ORDER BY cols.ordinal_position ASC`
|
|
|
|
|
|
|
|
rows, err := db.QueryContext(ctx, query, tblName)
|
|
|
|
if err != nil {
|
2023-05-22 18:08:14 +03:00
|
|
|
return nil, errw(err)
|
2020-08-16 00:06:40 +03:00
|
|
|
}
|
2023-04-02 22:49:45 +03:00
|
|
|
defer lg.WarnIfCloseError(log, lgm.CloseDBRows, rows)
|
2020-08-06 20:58:47 +03:00
|
|
|
|
2020-08-16 00:06:40 +03:00
|
|
|
var cols []*source.ColMetadata
|
|
|
|
|
2020-08-06 20:58:47 +03:00
|
|
|
for rows.Next() {
|
|
|
|
col := &source.ColMetadata{}
|
|
|
|
var isNullable, colKey, extra string
|
|
|
|
|
|
|
|
defVal := &sql.NullString{}
|
2022-12-18 05:43:53 +03:00
|
|
|
err = rows.Scan(&col.Name, &col.BaseType, &col.ColumnType, &col.Position, defVal, &isNullable, &colKey,
|
|
|
|
&col.Comment, &extra)
|
2020-08-06 20:58:47 +03:00
|
|
|
if err != nil {
|
2023-05-22 18:08:14 +03:00
|
|
|
return nil, errw(err)
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if strings.EqualFold("YES", isNullable) {
|
|
|
|
col.Nullable = true
|
|
|
|
}
|
|
|
|
|
|
|
|
if strings.Contains(colKey, "PRI") {
|
|
|
|
col.PrimaryKey = true
|
|
|
|
}
|
|
|
|
|
|
|
|
col.DefaultValue = defVal.String
|
|
|
|
col.Kind = kindFromDBTypeName(log, col.Name, col.BaseType)
|
|
|
|
|
2020-08-16 00:06:40 +03:00
|
|
|
cols = append(cols, col)
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2023-05-22 18:08:14 +03:00
|
|
|
return cols, errw(rows.Err())
|
2020-08-16 00:06:40 +03:00
|
|
|
}
|
|
|
|
|
2020-08-18 22:13:25 +03:00
|
|
|
// getSourceMetadata is the implementation of driver.Database.SourceMetadata.
|
|
|
|
//
|
|
|
|
// Multiple queries are required to build the SourceMetadata, and this
|
|
|
|
// impl makes use of errgroup to make concurrent queries. In the initial
|
|
|
|
// relatively sequential implementation of this function, the main perf
|
2023-04-30 17:18:56 +03:00
|
|
|
// roadblock was getting the row count for each table/view. For accuracy,
|
2020-08-18 22:13:25 +03:00
|
|
|
// it is necessary to perform "SELECT COUNT(*) FROM tbl" for each table/view.
|
|
|
|
// For other databases (such as sqlite) it was performant to UNION ALL
|
|
|
|
// these SELECTs into one (or a few) queries, e.g.:
|
|
|
|
//
|
2022-12-17 01:54:09 +03:00
|
|
|
// SELECT COUNT(*) FROM actor
|
|
|
|
// UNION ALL
|
|
|
|
// SELECT COUNT(*) FROM address
|
|
|
|
// UNION ALL
|
|
|
|
// [...]
|
2020-08-18 22:13:25 +03:00
|
|
|
//
|
|
|
|
// However, this seemed to perform poorly (at least for MySQL 5.6 which
|
|
|
|
// was the main focus of testing). We do seem to be getting fairly
|
|
|
|
// reasonable results by spinning off a goroutine (via errgroup) for
|
|
|
|
// each SELECT COUNT(*) query. That said, the testing/benchmarking was
|
|
|
|
// far from exhaustive, and this entire thing has a bit of a code smell.
|
2023-04-02 22:49:45 +03:00
|
|
|
func getSourceMetadata(ctx context.Context, src *source.Source, db sqlz.DB) (*source.Metadata, error) {
|
2023-05-03 15:36:10 +03:00
|
|
|
ctx = options.NewContext(ctx, src.Options)
|
|
|
|
|
2023-04-30 17:18:56 +03:00
|
|
|
md := &source.Metadata{
|
2023-05-19 17:24:18 +03:00
|
|
|
Driver: Type,
|
|
|
|
DBDriver: Type,
|
|
|
|
Handle: src.Handle,
|
|
|
|
Location: src.Location,
|
2023-04-30 17:18:56 +03:00
|
|
|
}
|
2020-08-18 22:13:25 +03:00
|
|
|
|
2023-04-03 00:17:15 +03:00
|
|
|
g, gCtx := errgroup.WithContext(ctx)
|
2023-05-03 15:36:10 +03:00
|
|
|
g.SetLimit(driver.OptTuningErrgroupLimit.Get(src.Options))
|
2020-08-18 22:13:25 +03:00
|
|
|
|
|
|
|
g.Go(func() error {
|
2023-04-30 17:18:56 +03:00
|
|
|
return doRetry(gCtx, func() error {
|
|
|
|
return setSourceSummaryMeta(gCtx, db, md)
|
|
|
|
})
|
2020-08-18 22:13:25 +03:00
|
|
|
})
|
|
|
|
|
|
|
|
g.Go(func() error {
|
2023-04-30 17:18:56 +03:00
|
|
|
return doRetry(gCtx, func() error {
|
|
|
|
var err error
|
2023-05-19 17:24:18 +03:00
|
|
|
md.DBProperties, err = getDBProperties(gCtx, db)
|
2023-04-30 17:18:56 +03:00
|
|
|
return err
|
|
|
|
})
|
2020-08-18 22:13:25 +03:00
|
|
|
})
|
|
|
|
|
|
|
|
g.Go(func() error {
|
2023-04-30 17:18:56 +03:00
|
|
|
return doRetry(gCtx, func() error {
|
|
|
|
var err error
|
|
|
|
md.Tables, err = getAllTblMetas(gCtx, db)
|
|
|
|
return err
|
|
|
|
})
|
2020-08-18 22:13:25 +03:00
|
|
|
})
|
|
|
|
|
|
|
|
err := g.Wait()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
2023-05-26 06:58:43 +03:00
|
|
|
md.TableCount = int64(len(md.Tables))
|
|
|
|
|
2020-08-18 22:13:25 +03:00
|
|
|
return md, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func setSourceSummaryMeta(ctx context.Context, db sqlz.DB, md *source.Metadata) error {
|
|
|
|
const summaryQuery = `SELECT @@GLOBAL.version, @@GLOBAL.version_comment, @@GLOBAL.version_compile_os,
|
|
|
|
@@GLOBAL.version_compile_machine, DATABASE(), CURRENT_USER(),
|
|
|
|
(SELECT SUM( data_length + index_length )
|
|
|
|
FROM information_schema.TABLES WHERE TABLE_SCHEMA = DATABASE()) AS size`
|
|
|
|
|
|
|
|
var version, versionComment, versionOS, versionArch, schema string
|
2022-12-18 05:43:53 +03:00
|
|
|
err := db.QueryRowContext(ctx, summaryQuery).Scan(&version, &versionComment, &versionOS, &versionArch, &schema,
|
|
|
|
&md.User, &md.Size)
|
2020-08-18 22:13:25 +03:00
|
|
|
if err != nil {
|
2023-05-22 18:08:14 +03:00
|
|
|
return errw(err)
|
2020-08-18 22:13:25 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
md.Name = schema
|
2023-03-22 09:17:34 +03:00
|
|
|
md.Schema = schema
|
2020-08-18 22:13:25 +03:00
|
|
|
md.FQName = schema
|
|
|
|
md.DBVersion = version
|
|
|
|
md.DBProduct = fmt.Sprintf("%s %s / %s (%s)", versionComment, version, versionOS, versionArch)
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
|
2023-05-19 17:24:18 +03:00
|
|
|
// getDBProperties returns the db settings as observed via "SHOW VARIABLES".
|
|
|
|
func getDBProperties(ctx context.Context, db sqlz.DB) (map[string]any, error) {
|
2023-05-03 15:36:10 +03:00
|
|
|
log := lg.FromContext(ctx)
|
2020-08-16 00:06:40 +03:00
|
|
|
|
|
|
|
rows, err := db.QueryContext(ctx, "SHOW VARIABLES")
|
|
|
|
if err != nil {
|
2023-05-22 18:08:14 +03:00
|
|
|
return nil, errw(err)
|
2020-08-16 00:06:40 +03:00
|
|
|
}
|
2023-04-02 22:49:45 +03:00
|
|
|
defer lg.WarnIfCloseError(log, lgm.CloseDBRows, rows)
|
2020-08-16 00:06:40 +03:00
|
|
|
|
2023-05-19 17:24:18 +03:00
|
|
|
m := map[string]any{}
|
2020-08-16 00:06:40 +03:00
|
|
|
for rows.Next() {
|
2023-05-19 17:24:18 +03:00
|
|
|
var name string
|
|
|
|
var val string
|
|
|
|
|
|
|
|
err = rows.Scan(&name, &val)
|
2020-08-16 00:06:40 +03:00
|
|
|
if err != nil {
|
2023-05-22 18:08:14 +03:00
|
|
|
return nil, errw(err)
|
2020-08-16 00:06:40 +03:00
|
|
|
}
|
2023-05-19 17:24:18 +03:00
|
|
|
|
|
|
|
// Narrow setting to bool or int if possible.
|
|
|
|
var (
|
|
|
|
v any = val
|
|
|
|
i int
|
|
|
|
b bool
|
|
|
|
)
|
|
|
|
if i, err = strconv.Atoi(val); err == nil {
|
|
|
|
v = i
|
|
|
|
} else if b, err = stringz.ParseBool(val); err == nil {
|
|
|
|
v = b
|
|
|
|
}
|
|
|
|
|
|
|
|
m[name] = v
|
2020-08-16 00:06:40 +03:00
|
|
|
}
|
2023-05-19 17:24:18 +03:00
|
|
|
|
|
|
|
if err = rows.Err(); err != nil {
|
2023-05-22 18:08:14 +03:00
|
|
|
return nil, errw(err)
|
2020-08-16 00:06:40 +03:00
|
|
|
}
|
|
|
|
|
2023-05-19 17:24:18 +03:00
|
|
|
return m, nil
|
2020-08-06 20:58:47 +03:00
|
|
|
}
|
|
|
|
|
2020-08-18 22:13:25 +03:00
|
|
|
// getAllTblMetas returns TableMetadata for each table/view in db.
|
2023-04-02 22:49:45 +03:00
|
|
|
func getAllTblMetas(ctx context.Context, db sqlz.DB) ([]*source.TableMetadata, error) {
|
2023-05-03 15:36:10 +03:00
|
|
|
log := lg.FromContext(ctx)
|
2023-04-02 22:49:45 +03:00
|
|
|
|
2022-12-18 05:43:53 +03:00
|
|
|
const query = `SELECT t.TABLE_SCHEMA, t.TABLE_NAME, t.TABLE_TYPE, t.TABLE_COMMENT,
|
|
|
|
(DATA_LENGTH + INDEX_LENGTH) AS table_size,
|
|
|
|
c.COLUMN_NAME, c.ORDINAL_POSITION, c.COLUMN_KEY, c.DATA_TYPE, c.COLUMN_TYPE,
|
2023-03-26 04:20:53 +03:00
|
|
|
c.IS_NULLABLE, c.COLUMN_DEFAULT, c.COLUMN_COMMENT, c.EXTRA
|
2020-08-18 22:13:25 +03:00
|
|
|
FROM information_schema.TABLES t
|
|
|
|
LEFT JOIN information_schema.COLUMNS c
|
|
|
|
ON c.TABLE_CATALOG = t.TABLE_CATALOG
|
|
|
|
AND c.TABLE_SCHEMA = t.TABLE_SCHEMA
|
|
|
|
AND c.TABLE_NAME = t.TABLE_NAME
|
|
|
|
WHERE t.TABLE_SCHEMA = DATABASE()
|
|
|
|
ORDER BY c.TABLE_NAME ASC, c.ORDINAL_POSITION ASC`
|
|
|
|
|
2022-12-18 02:11:33 +03:00
|
|
|
//nolint:lll
|
2020-08-18 22:13:25 +03:00
|
|
|
// Query results look like:
|
|
|
|
// +------------+----------+----------+-------------+----------+-----------+----------------+----------+---------+--------------------+-----------+-----------------+--------------+---------------------------+
|
|
|
|
// |TABLE_SCHEMA|TABLE_NAME|TABLE_TYPE|TABLE_COMMENT|table_size|COLUMN_NAME|ORDINAL_POSITION|COLUMN_KEY|DATA_TYPE|COLUMN_TYPE |IS_NULLABLE|COLUMN_DEFAULT |COLUMN_COMMENT|EXTRA |
|
|
|
|
// +------------+----------+----------+-------------+----------+-----------+----------------+----------+---------+--------------------+-----------+-----------------+--------------+---------------------------+
|
|
|
|
// |sakila |actor |BASE TABLE| |32768 |actor_id |1 |PRI |smallint |smallint(5) unsigned|NO |NULL | |auto_increment |
|
|
|
|
// |sakila |actor |BASE TABLE| |32768 |first_name |2 | |varchar |varchar(45) |NO |NULL | | |
|
|
|
|
// |sakila |actor |BASE TABLE| |32768 |last_name |3 |MUL |varchar |varchar(45) |NO |NULL | | |
|
|
|
|
// |sakila |actor |BASE TABLE| |32768 |last_update|4 | |timestamp|timestamp |NO |CURRENT_TIMESTAMP| |on update CURRENT_TIMESTAMP|
|
|
|
|
// |sakila |actor_info|VIEW |VIEW |NULL |actor_id |1 | |smallint |smallint(5) unsigned|NO |0 | | |
|
|
|
|
|
2023-05-24 03:42:36 +03:00
|
|
|
var (
|
|
|
|
tblMetas []*source.TableMetadata
|
|
|
|
schema string
|
|
|
|
curTblName, curTblType, curTblComment sql.NullString
|
|
|
|
curTblSize sql.NullInt64
|
|
|
|
curTblMeta *source.TableMetadata
|
|
|
|
)
|
2020-08-18 22:13:25 +03:00
|
|
|
|
|
|
|
rows, err := db.QueryContext(ctx, query)
|
|
|
|
if err != nil {
|
2023-05-22 18:08:14 +03:00
|
|
|
return nil, errw(err)
|
2020-08-18 22:13:25 +03:00
|
|
|
}
|
2023-04-02 22:49:45 +03:00
|
|
|
defer lg.WarnIfCloseError(log, lgm.CloseDBRows, rows)
|
2020-08-18 22:13:25 +03:00
|
|
|
|
|
|
|
for rows.Next() {
|
|
|
|
select {
|
|
|
|
case <-ctx.Done():
|
|
|
|
return nil, ctx.Err()
|
|
|
|
default:
|
|
|
|
}
|
|
|
|
|
2020-08-20 23:19:16 +03:00
|
|
|
var colName, colDefault, colNullable, colKey, colBaseType, colColumnType, colComment, colExtra sql.NullString
|
|
|
|
var colPosition sql.NullInt64
|
2020-08-18 22:13:25 +03:00
|
|
|
|
2020-08-20 23:19:16 +03:00
|
|
|
err = rows.Scan(&schema, &curTblName, &curTblType, &curTblComment, &curTblSize, &colName, &colPosition,
|
|
|
|
&colKey, &colBaseType, &colColumnType, &colNullable, &colDefault, &colComment, &colExtra)
|
2020-08-18 22:13:25 +03:00
|
|
|
if err != nil {
|
2023-05-22 18:08:14 +03:00
|
|
|
return nil, errw(err)
|
2020-08-18 22:13:25 +03:00
|
|
|
}
|
|
|
|
|
2020-08-20 23:19:16 +03:00
|
|
|
if !curTblName.Valid || !colName.Valid {
|
|
|
|
// table may have been dropped during metadata collection
|
2023-04-02 22:49:45 +03:00
|
|
|
log.Debug("Table not found during metadata collection")
|
2020-08-20 23:19:16 +03:00
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
if curTblMeta == nil || curTblMeta.Name != curTblName.String {
|
2020-08-18 22:13:25 +03:00
|
|
|
// On our first time encountering a new table name, we create a new TableMetadata
|
|
|
|
curTblMeta = &source.TableMetadata{
|
2020-08-20 23:19:16 +03:00
|
|
|
Name: curTblName.String,
|
|
|
|
FQName: schema + "." + curTblName.String,
|
|
|
|
DBTableType: curTblType.String,
|
|
|
|
TableType: canonicalTableType(curTblType.String),
|
|
|
|
Comment: curTblComment.String,
|
2020-08-18 22:13:25 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if curTblSize.Valid {
|
|
|
|
size := curTblSize.Int64
|
|
|
|
curTblMeta.Size = &size
|
|
|
|
}
|
|
|
|
|
2020-08-20 23:19:16 +03:00
|
|
|
tblMetas = append(tblMetas, curTblMeta)
|
2020-08-18 22:13:25 +03:00
|
|
|
}
|
|
|
|
|
2020-08-20 23:19:16 +03:00
|
|
|
col := &source.ColMetadata{
|
|
|
|
Name: colName.String,
|
|
|
|
Position: colPosition.Int64,
|
|
|
|
BaseType: colBaseType.String,
|
|
|
|
ColumnType: colColumnType.String,
|
|
|
|
DefaultValue: colDefault.String,
|
|
|
|
Comment: colComment.String,
|
|
|
|
}
|
|
|
|
|
|
|
|
col.Nullable, err = stringz.ParseBool(colNullable.String)
|
2020-08-18 22:13:25 +03:00
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
col.Kind = kindFromDBTypeName(log, col.Name, col.BaseType)
|
2020-08-20 23:19:16 +03:00
|
|
|
if strings.Contains(colKey.String, "PRI") {
|
2020-08-18 22:13:25 +03:00
|
|
|
col.PrimaryKey = true
|
|
|
|
}
|
|
|
|
|
|
|
|
curTblMeta.Columns = append(curTblMeta.Columns, col)
|
|
|
|
}
|
|
|
|
|
|
|
|
err = rows.Err()
|
|
|
|
if err != nil {
|
2023-05-22 18:08:14 +03:00
|
|
|
return nil, errw(err)
|
2020-08-18 22:13:25 +03:00
|
|
|
}
|
|
|
|
|
2020-08-20 23:19:16 +03:00
|
|
|
// tblMetas may contain nil elements if we failed to get the row
|
|
|
|
// count for the table (which can happen if the table is dropped
|
|
|
|
// during the metadata collection process). So we filter out any
|
|
|
|
// nil elements.
|
2023-05-24 03:42:36 +03:00
|
|
|
tblMetas = lo.Reject(tblMetas, func(item *source.TableMetadata, index int) bool {
|
|
|
|
return item == nil
|
|
|
|
})
|
|
|
|
|
|
|
|
tblNames := make([]string, 0, len(tblMetas))
|
2020-08-20 23:19:16 +03:00
|
|
|
for i := range tblMetas {
|
|
|
|
if tblMetas[i] != nil {
|
2023-05-24 03:42:36 +03:00
|
|
|
tblNames = append(tblNames, tblMetas[i].Name)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// We still need to populate table row counts.
|
|
|
|
var mTblRowCounts map[string]int64
|
|
|
|
if mTblRowCounts, err = getTableRowCountsBatch(ctx, db, tblNames); err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range tblMetas {
|
|
|
|
tblMetas[i].RowCount = mTblRowCounts[tblMetas[i].Name]
|
|
|
|
}
|
|
|
|
|
|
|
|
return tblMetas, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// getTableRowCountsBatch invokes getTableRowCounts, but batches tblNames
|
|
|
|
// into chunks, because MySQL will error if the query becomes too large.
|
|
|
|
func getTableRowCountsBatch(ctx context.Context, db sqlz.DB, tblNames []string) (map[string]int64, error) {
|
|
|
|
// TODO: What value should batchSize be?
|
|
|
|
const batchSize = 100
|
|
|
|
var (
|
|
|
|
all = map[string]int64{}
|
|
|
|
batches = lo.Chunk(tblNames, batchSize)
|
|
|
|
)
|
|
|
|
|
|
|
|
for i := range batches {
|
|
|
|
got, err := getTableRowCounts(ctx, db, batches[i])
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
2020-08-20 23:19:16 +03:00
|
|
|
}
|
2023-05-24 03:42:36 +03:00
|
|
|
|
|
|
|
for k, v := range got {
|
|
|
|
all[k] = v
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return all, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// getTableRowCounts returns a map of table name to count of rows
|
|
|
|
// in that table, as returned by "SELECT COUNT(*) FROM tblName".
|
|
|
|
// If a table does not exist, the error is logged, but then
|
|
|
|
// disregarded, as it's possible that a table can be deleted during
|
|
|
|
// the operation. Thus the length of the returned map may be less
|
|
|
|
// than len(tblNames).
|
|
|
|
func getTableRowCounts(ctx context.Context, db sqlz.DB, tblNames []string) (map[string]int64, error) {
|
|
|
|
log := lg.FromContext(ctx)
|
|
|
|
|
|
|
|
var rows *sql.Rows
|
|
|
|
var err error
|
|
|
|
|
|
|
|
for {
|
|
|
|
if len(tblNames) == 0 {
|
|
|
|
return map[string]int64{}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
var qb strings.Builder
|
|
|
|
for i := 0; i < len(tblNames); i++ {
|
|
|
|
if i > 0 {
|
|
|
|
qb.WriteString("\nUNION\n")
|
|
|
|
}
|
|
|
|
qb.WriteString(fmt.Sprintf("SELECT %s AS tn, COUNT(*) AS rc FROM %s",
|
|
|
|
stringz.SingleQuote(tblNames[i]), stringz.BacktickQuote(tblNames[i])))
|
|
|
|
}
|
|
|
|
|
|
|
|
query := qb.String()
|
|
|
|
rows, err = db.QueryContext(ctx, query)
|
|
|
|
if err == nil {
|
|
|
|
// The query is good, continue below.
|
|
|
|
break
|
|
|
|
}
|
|
|
|
|
|
|
|
err = errw(err)
|
|
|
|
if errz.IsErrNotExist(err) {
|
|
|
|
// Sometimes a table can get deleted during the operation. If so,
|
|
|
|
// we just remove that table from the list, and try again.
|
|
|
|
// We could also do this entire thing in a transaction, but where's
|
|
|
|
// the fun in that...
|
|
|
|
schema, tblName, ok := extractTblNameFromNotExistErr(err)
|
|
|
|
if ok && tblName != "" {
|
|
|
|
log.Warn("Table doesn't exist, will try again without that table",
|
|
|
|
lga.Schema, schema, lga.Table, tblName)
|
|
|
|
tblNames = lo.Without(tblNames, tblName)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
|
|
|
|
defer lg.WarnIfCloseError(log, lgm.CloseDBRows, rows)
|
|
|
|
|
|
|
|
var (
|
|
|
|
m = make(map[string]int64, len(tblNames))
|
|
|
|
count int64
|
|
|
|
tblName string
|
|
|
|
)
|
|
|
|
for rows.Next() {
|
|
|
|
if err = rows.Scan(&tblName, &count); err != nil {
|
|
|
|
return nil, errw(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
m[tblName] = count
|
|
|
|
}
|
|
|
|
|
|
|
|
if err = rows.Err(); err != nil {
|
|
|
|
return nil, errw(err)
|
|
|
|
}
|
|
|
|
|
|
|
|
return m, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// extractTblNameFromNotExistErr is a filthy hack to extract schema
|
|
|
|
// and table from err 1146 (table doesn't exist).
|
|
|
|
func extractTblNameFromNotExistErr(err error) (schema, table string, ok bool) {
|
|
|
|
if err == nil {
|
|
|
|
return "", "", false
|
|
|
|
}
|
|
|
|
|
|
|
|
var e *mysql.MySQLError
|
|
|
|
if !errors.As(err, &e) {
|
|
|
|
return "", "", false
|
|
|
|
}
|
|
|
|
|
|
|
|
if e.Number != errNumTableNotExist {
|
|
|
|
return "", "", false
|
|
|
|
}
|
|
|
|
|
|
|
|
s := strings.TrimSuffix(strings.TrimPrefix(e.Message, "Table '"), "' doesn't exist")
|
|
|
|
|
|
|
|
if s == "" {
|
|
|
|
return "", "", false
|
|
|
|
}
|
|
|
|
|
|
|
|
if schema, table, ok = strings.Cut(s, "."); ok {
|
|
|
|
return schema, table, ok
|
2020-08-20 23:19:16 +03:00
|
|
|
}
|
|
|
|
|
2023-05-24 03:42:36 +03:00
|
|
|
return "", "", false
|
2020-08-18 22:13:25 +03:00
|
|
|
}
|
|
|
|
|
2020-08-06 20:58:47 +03:00
|
|
|
// newInsertMungeFunc is lifted from driver.DefaultInsertMungeFunc.
|
2023-05-22 18:08:14 +03:00
|
|
|
func newInsertMungeFunc(destTbl string, destMeta record.Meta) driver.InsertMungeFunc {
|
|
|
|
return func(rec record.Record) error {
|
2020-08-06 20:58:47 +03:00
|
|
|
if len(rec) != len(destMeta) {
|
|
|
|
return errz.Errorf("insert record has %d vals but dest table %s has %d cols (%s)",
|
|
|
|
len(rec), destTbl, len(destMeta), strings.Join(destMeta.Names(), ","))
|
|
|
|
}
|
|
|
|
|
|
|
|
for i := range rec {
|
|
|
|
nullable, _ := destMeta[i].Nullable()
|
|
|
|
if rec[i] == nil && !nullable {
|
|
|
|
mungeSetZeroValue(i, rec, destMeta)
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
2020-08-23 13:42:15 +03:00
|
|
|
if destMeta[i].Kind() == kind.Text {
|
2020-08-06 20:58:47 +03:00
|
|
|
// text doesn't need our help
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
|
|
|
|
// The dest col kind is something other than text, let's inspect
|
|
|
|
// the actual value and check its type.
|
|
|
|
switch val := rec[i].(type) {
|
|
|
|
default:
|
|
|
|
continue
|
|
|
|
case string:
|
|
|
|
if val == "" {
|
|
|
|
if nullable {
|
|
|
|
rec[i] = nil
|
|
|
|
} else {
|
|
|
|
mungeSetZeroValue(i, rec, destMeta)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// else we let the DB figure it out
|
|
|
|
|
|
|
|
case *string:
|
|
|
|
if *val == "" {
|
|
|
|
if nullable {
|
|
|
|
rec[i] = nil
|
|
|
|
} else {
|
|
|
|
mungeSetZeroValue(i, rec, destMeta)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// string is non-empty
|
2020-08-23 13:42:15 +03:00
|
|
|
if destMeta[i].Kind() == kind.Datetime {
|
2020-08-06 20:58:47 +03:00
|
|
|
// special handling for datetime
|
|
|
|
mungeSetDatetimeFromString(*val, i, rec)
|
|
|
|
}
|
|
|
|
|
|
|
|
// else we let the DB figure it out
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// datetimeLayouts are layouts attempted with time.Parse to
|
|
|
|
// try to give mysql a time.Time instead of string.
|
|
|
|
var datetimeLayouts = []string{time.RFC3339Nano, time.RFC3339}
|
|
|
|
|
|
|
|
// mungeSetDatetimeFromString attempts to parse s into time.Time and
|
|
|
|
// sets rec[i] to that value. If unable to parse, rec is unchanged,
|
|
|
|
// and it's up to mysql to deal with the text.
|
2022-12-17 02:34:33 +03:00
|
|
|
func mungeSetDatetimeFromString(s string, i int, rec []any) {
|
2020-08-06 20:58:47 +03:00
|
|
|
var t time.Time
|
|
|
|
var err error
|
|
|
|
|
|
|
|
for _, layout := range datetimeLayouts {
|
|
|
|
t, err = time.Parse(layout, s)
|
|
|
|
if err == nil {
|
|
|
|
rec[i] = t
|
|
|
|
return
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// mungeSetZeroValue is invoked when rec[i] is nil, but
|
|
|
|
// destMeta[i] is not nullable.
|
2023-05-22 18:08:14 +03:00
|
|
|
func mungeSetZeroValue(i int, rec []any, destMeta record.Meta) {
|
2020-08-06 20:58:47 +03:00
|
|
|
// REVISIT: do we need to do special handling for kind.Datetime
|
|
|
|
// and kind.Time (e.g. "00:00" for time)?
|
|
|
|
z := reflect.Zero(destMeta[i].ScanType()).Interface()
|
|
|
|
rec[i] = z
|
|
|
|
}
|
2020-08-16 00:06:40 +03:00
|
|
|
|
|
|
|
// canonicalTableType returns the canonical name for "BASE TABLE"
|
2022-12-18 09:07:38 +03:00
|
|
|
// and "VIEW".
|
2020-08-16 00:06:40 +03:00
|
|
|
func canonicalTableType(dbType string) string {
|
|
|
|
switch dbType {
|
|
|
|
default:
|
|
|
|
return ""
|
|
|
|
case "BASE TABLE":
|
|
|
|
return sqlz.TableTypeTable
|
|
|
|
case "VIEW":
|
|
|
|
return sqlz.TableTypeView
|
|
|
|
}
|
|
|
|
}
|