Some fixes to make linter happy (#10004)

- A few import orderings.
- Missing doc comments on Text_Cleanse.
- Indent correction.
This commit is contained in:
James Dunkerley 2024-05-20 17:15:11 +01:00 committed by GitHub
parent befd938dbf
commit 31dd5944d7
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 64 additions and 39 deletions

View File

@ -163,6 +163,7 @@ Map.to_js_object self =
map_vector.map p-> [p.first.to_js_object, p.second.to_js_object]
## PRIVATE
ICON convert
Extension for Text to allow use.
Deprecated: The `parse_json` method uses a faster implementation now by default
Text.parse_fast_json : Nothing | Boolean | Number | Text | Vector | JS_Object

View File

@ -36,9 +36,9 @@ from project.Data.Boolean import Boolean, False, True
from project.Data.Json import Invalid_JSON, JS_Object, Json
from project.Data.Numbers import Float, Integer, Number, Number_Parse_Error
from project.Data.Range.Extensions import all
from project.Data.Text.Text_Cleanse import Cleansable_Text, Text_Cleanse
from project.Metadata import Display, Widget
from project.Data.Text.Text_Cleanse import Text_Cleanse, Cleansable_Text
from project.Widget_Helpers import make_date_format_selector, make_date_time_format_selector, make_delimiter_selector, make_regex_text_widget, make_time_format_selector, make_data_cleanse_vector_selector
from project.Widget_Helpers import make_data_cleanse_vector_selector, make_date_format_selector, make_date_time_format_selector, make_delimiter_selector, make_regex_text_widget, make_time_format_selector
polyglot java import com.ibm.icu.lang.UCharacter
polyglot java import com.ibm.icu.text.BreakIterator

View File

@ -2,37 +2,59 @@ from Standard.Base import all
## Defines a Text_Cleanse operation
type Text_Cleanse
Duplicate_Whitespace
Leading_Whitespace
Trailing_Whitespace
All_Whitespace
Leading_Numbers
Trailing_Numbers
Non_ASCII
Tabs
Letters
Numbers
Punctuation
Symbols
## More than one whitespace character is replaced with a first character.
Duplicate_Whitespace
## Apply the cleanse operation to the text.
apply self input:Cleansable_Text -> Any =
case self of
Text_Cleanse.Leading_Whitespace -> input.replace "^\s+" ""
Text_Cleanse.Trailing_Whitespace -> input.replace "\s+$" ""
Text_Cleanse.Duplicate_Whitespace -> input.replace "(\s)+" "$1"
Text_Cleanse.All_Whitespace -> input.replace "\s+" ""
Text_Cleanse.Leading_Numbers -> input.replace "^\d+" ""
Text_Cleanse.Trailing_Numbers -> input.replace "\d+$" ""
Text_Cleanse.Non_ASCII -> input.replace "[^\x00-\x7F]" ""
Text_Cleanse.Tabs -> input.replace "\t" ""
Text_Cleanse.Letters -> input.replace "[a-zA-Z]" ""
Text_Cleanse.Numbers -> input.replace "\d" ""
Text_Cleanse.Punctuation -> input.replace '[,.!?():;\'\"]' ""
Text_Cleanse.Symbols -> input.replace "[^a-zA-Z\d\s]" ""
## Whitespace at the start of the value is removed.
Leading_Whitespace
## Whitespace at the end of the value is removed.
Trailing_Whitespace
## All whitespace characters are removed.
All_Whitespace
## Number at the beginning of the value is removed.
Leading_Numbers
## Number at the end of the value is removed.
Trailing_Numbers
## Characters outside of the ASCII range (0-127) are removed.
Non_ASCII
## All tab characters are removed.
Tabs
## All letters are removed.
Letters
## All digits are removed.
Numbers
## All punctuation characters are removed.
Punctuation
## All symbols are removed. Symbols are everything other than letters, digits or whitespace.
Symbols
## Apply the cleanse operation to the text.
apply self input:Cleansable_Text -> Any = case self of
Text_Cleanse.Leading_Whitespace -> input.replace "^\s+" ""
Text_Cleanse.Trailing_Whitespace -> input.replace "\s+$" ""
Text_Cleanse.Duplicate_Whitespace -> input.replace "(\s)+" "$1"
Text_Cleanse.All_Whitespace -> input.replace "\s+" ""
Text_Cleanse.Leading_Numbers -> input.replace "^\d+" ""
Text_Cleanse.Trailing_Numbers -> input.replace "\d+$" ""
Text_Cleanse.Non_ASCII -> input.replace "[^\x00-\x7F]" ""
Text_Cleanse.Tabs -> input.replace "\t" ""
Text_Cleanse.Letters -> input.replace "[a-zA-Z]" ""
Text_Cleanse.Numbers -> input.replace "\d" ""
Text_Cleanse.Punctuation -> input.replace '[,.!?():;\'\"]' ""
Text_Cleanse.Symbols -> input.replace "[^a-zA-Z\d\s]" ""
## PRIVATE
Defines the interface for cleansable object.
type Cleansable_Text
Value replace
## PRIVATE
Value replace

View File

@ -35,8 +35,8 @@ export project.Data.Text.Matching_Mode.Matching_Mode
export project.Data.Text.Normalization.Normalization
export project.Data.Text.Regex.Regex
export project.Data.Text.Text
export project.Data.Text.Text_Cleanse.Text_Cleanse
export project.Data.Text.Text_Cleanse.Cleansable_Text
export project.Data.Text.Text_Cleanse.Text_Cleanse
export project.Data.Text.Text_Ordering.Text_Ordering
export project.Data.Text.Text_Sub_Range.Text_Sub_Range
export project.Data.Time.Date.Date

View File

@ -124,6 +124,8 @@ make_format_chooser include_number:Boolean=True include_date:Boolean=True includ
boolean = if include_boolean.not then [] else ['Yes|No', '1|0'].map f-> [f + " (Boolean)", f.pretty]
make_single_choice (numeric + date + date_time + time + custom_locale_format + boolean)
## PRIVATE
Creates a Multiple_Choice Widget for selecting data cleanse operations.
make_data_cleanse_vector_selector : Display -> Widget
make_data_cleanse_vector_selector display:Display=Display.Always =
builder = Vector.new_builder

View File

@ -3,7 +3,7 @@ import Standard.Base.Errors.Common.Index_Out_Of_Bounds
import Standard.Base.Errors.Illegal_Argument.Illegal_Argument
import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Internal.Rounding_Helpers
from Standard.Base.Widget_Helpers import make_format_chooser, make_regex_text_widget, make_data_cleanse_vector_selector
from Standard.Base.Widget_Helpers import make_data_cleanse_vector_selector, make_format_chooser, make_regex_text_widget
import Standard.Table.Internal.Column_Naming_Helper.Column_Naming_Helper
import Standard.Table.Internal.Date_Time_Helpers
@ -1462,7 +1462,7 @@ type DB_Column
## GROUP Standard.Base.Text
ICON dataframe_map_column
Applies the specified cleansings to the text in each row.
Arguments:
- remove: A vector of the text cleanings to remove from the text. The text cleansings are
applied in the order they are provided. The same text cleansing can be used multiple

View File

@ -15,7 +15,7 @@ import Standard.Base.Errors.Unimplemented.Unimplemented
import Standard.Base.System.File.Generic.Writable_File.Writable_File
from Standard.Base.Metadata import Display, make_single_choice, Widget
from Standard.Base.Runtime import assert
from Standard.Base.Widget_Helpers import make_delimiter_selector, make_format_chooser, make_data_cleanse_vector_selector
from Standard.Base.Widget_Helpers import make_data_cleanse_vector_selector, make_delimiter_selector, make_format_chooser
import Standard.Table.Column_Operation.Column_Operation
import Standard.Table.Expression.Expression
@ -370,7 +370,7 @@ type DB_Table
new_columns = self.columns_helper.select_blank_columns_helper when treat_nans_as_blank
self.updated_columns new_columns
## ALIAS drop_missing_columns, drop_na, select_blank_fields, select_missing_columns, select_na, select_blank_columns
## ALIAS drop_missing_columns, drop_na, select_blank_columns, select_blank_fields, select_missing_columns, select_na
GROUP Standard.Base.Selections
ICON select_column

View File

@ -11,7 +11,7 @@ import Standard.Base.Errors.Illegal_State.Illegal_State
import Standard.Base.Internal.Polyglot_Helpers
import Standard.Base.Internal.Rounding_Helpers
from Standard.Base.Metadata.Widget import Numeric_Input
from Standard.Base.Widget_Helpers import make_format_chooser, make_regex_text_widget, make_data_cleanse_vector_selector
from Standard.Base.Widget_Helpers import make_data_cleanse_vector_selector, make_format_chooser, make_regex_text_widget
import project.Constants.Previous_Value
import project.Data_Formatter.Data_Formatter

View File

@ -17,7 +17,7 @@ import Standard.Base.Errors.Unimplemented.Unimplemented
import Standard.Base.Runtime.Context
import Standard.Base.System.File.Generic.Writable_File.Writable_File
from Standard.Base.Metadata import Display, make_single_choice, Widget
from Standard.Base.Widget_Helpers import make_delimiter_selector, make_format_chooser, make_data_cleanse_vector_selector
from Standard.Base.Widget_Helpers import make_data_cleanse_vector_selector, make_delimiter_selector, make_format_chooser
import project.Aggregate_Column.Aggregate_Column
import project.Blank_Selector.Blank_Selector
@ -500,7 +500,7 @@ type Table
new_columns = self.columns_helper.select_blank_columns_helper when treat_nans_as_blank
Table.new new_columns
## ALIAS drop_missing_columns, drop_na, select_blank_fields, select_missing_columns, select_na, select_blank_columns
## ALIAS drop_missing_columns, drop_na, select_blank_columns, select_blank_fields, select_missing_columns, select_na
GROUP Standard.Base.Selections
ICON select_column