CSV: check for mismatch field count vs explicitly specified column names (#147)

* csv: check for mismatch field count vs explicitly specified column names

* CHANGELOG update
This commit is contained in:
Neil O'Toole 2023-03-14 23:04:49 -06:00 committed by GitHub
parent fa1677cff6
commit 7148505a84
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 33 additions and 12 deletions

View File

@ -5,6 +5,14 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
## [v0.24.3] - 2023-03-14
### Added
- When a CSV source has explicit column names (via `--opts cols=A,B,C`), `sq` now verifies
that the CSV data record field count matches the number of explicit columns.
## [v0.24.2] - 2023-03-13
### Fixed
@ -145,6 +153,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- [#89]: Bug with SQL generated for joins.
[v0.24.3]: https://github.com/neilotoole/sq/compare/v0.24.2...v0.24.3
[v0.24.2]: https://github.com/neilotoole/sq/compare/v0.24.1...v0.24.2
[v0.24.1]: https://github.com/neilotoole/sq/compare/v0.24.0...v0.24.1
[v0.24.0]: https://github.com/neilotoole/sq/compare/v0.23.0...v0.24.0

View File

@ -364,10 +364,9 @@ func getColNames(cr *csv.Reader, src *source.Source, readAheadRecs *[][]string)
return headerRec, nil
}
// The CSV file does not have a header record. We will generate
// col names [A,B,C...]. To do so, we need to know how many fields
// there are in the first record.
firstDataRecord, err := cr.Read()
// Read ahead the first record. We need this to determine the number
// of columns.
firstRec, err := cr.Read()
if err == io.EOF { //nolint:errorlint
return nil, errz.Errorf("data source %s is empty", src.Handle)
}
@ -375,13 +374,27 @@ func getColNames(cr *csv.Reader, src *source.Source, readAheadRecs *[][]string)
return nil, errz.Wrapf(err, "read from data source %s", src.Handle)
}
// firstRecord contains actual data, so append it to initialRecs.
*readAheadRecs = append(*readAheadRecs, firstDataRecord)
// firstRec contains actual data, so append it to readAheadRecs.
*readAheadRecs = append(*readAheadRecs, firstRec)
// If we have explicit column names, we still need to verify the
// column name count against the data.
if len(explicitColNames) > 0 {
if len(explicitColNames) != len(firstRec) {
return nil, errz.Errorf("mismatch: source has %d explicit column names specified, but first data record has %d fields", //nolint:lll
len(explicitColNames), len(firstRec))
}
return explicitColNames, nil
}
// The CSV file does not have a header record. We will generate
// col names [A,B,C...]. To do so, we need to know how many fields
// there are in the first record.
// If no column names yet, we generate them based on the number
// of fields in firstDataRecord.
generatedColNames := make([]string, len(firstDataRecord))
for i := range firstDataRecord {
// of fields in firstRec.
generatedColNames := make([]string, len(firstRec))
for i := range firstRec {
generatedColNames[i] = stringz.GenerateAlphaColName(i, false)
}
@ -429,13 +442,12 @@ func getDelimFromOptions(opts options.Options) (r rune, ok bool, err error) {
return 0, false, nil
}
const key = "delim"
_, ok = opts[key]
_, ok = opts[options.OptDelim]
if !ok {
return 0, false, nil
}
val := opts.Get(key)
val := opts.Get(options.OptDelim)
if val == "" {
return 0, false, nil
}