Implement auto_value_type operation (#7908)

Closes #6113
2024-12-23 02:21:54 +03:00 · 2023-09-27 17:45:34 +02:00 · 2023-09-27 17:45:34 +02:00 · c690559ec4
commit c690559ec4
parent cf16d32894
29 changed files with 985 additions and 45 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -578,6 +578,7 @@
 - [Renamed `Decimal` to `Float`.][7807]
 - [Implemented `Date_Time_Formatter` for more user-friendly date/time format
  parsing.][7826]
+- [Implemented `Table.auto_value_types` for in-memory tables.][7908]

 [debug-shortcuts]:
  https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
@ -820,6 +821,7 @@
 [7776]: https://github.com/enso-org/enso/pull/7776
 [7807]: https://github.com/enso-org/enso/pull/7807
 [7826]: https://github.com/enso-org/enso/pull/7826
+[7908]: https://github.com/enso-org/enso/pull/7908

 #### Enso Compiler

--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Boolean.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Boolean.enso
@ -1,6 +1,4 @@
 import project.Any.Any
-import project.Data.Ordering.Comparable
-import project.Data.Ordering.Ordering
 import project.Nothing.Nothing
 from project.Data.Boolean.Boolean import False, True

@ -98,4 +96,3 @@ type Boolean
             if (27 % 3) == 0 then IO.println "Fizz"
    if_then : Any -> Any | Nothing
    if_then self ~on_true = @Builtin_Method "Boolean.if_then"
-
--- a/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso
+++ b/distribution/lib/Standard/Base/0.0.0-dev/src/Data/Numbers.enso
@ -1,4 +1,5 @@
 import project.Any.Any
+import project.Data.Ordering.Comparable
 import project.Data.Locale.Locale
 import project.Data.Text.Text
 import project.Error.Error
@ -1169,3 +1170,39 @@ type Number_Parse_Error
    to_display_text : Text
    to_display_text self =
        "Could not parse " + self.text.to_text + " as a double."
+
+## A wrapper type that ensures that a function may only take positive integers.
+type Positive_Integer
+    ## PRIVATE
+       This constructor should not be used by user code as it can be used to
+       break the invariants. Instead, this type should only be created by `new`
+       or conversions.
+    Value (integer : Integer)
+
+    ## PRIVATE
+       ADVANCED
+       Constructor to create a `Positive_Integer` from an `Integer` - checking
+       if it satisfies the condition. User code should prefer the
+       `Positive_Integer.from` conversion.
+    new (integer : Integer) =
+        if integer > 0 then Positive_Integer.Value integer else
+            Error.throw (Illegal_Argument.Error "Expected a positive integer, but got "+integer.to_display_text)
+
+## Allows to create a `Positive_Integer` from an `Integer`.
+   It will throw `Illegal_Argument` if the provided integer is not positive.
+Positive_Integer.from (that : Integer) = Positive_Integer.new that
+
+## PRIVATE
+Integer.from (that : Positive_Integer) = that.integer
+
+## PRIVATE
+type Positive_Integer_Comparator
+    ## PRIVATE
+    compare x y =
+        Comparable.from x.integer . compare x.integer y.integer
+
+    ## PRIVATE
+    hash x = Comparable.from x.integer . hash x.integer
+
+## PRIVATE
+Comparable.from (_:Positive_Integer) = Positive_Integer_Comparator
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Column.enso
@ -1579,6 +1579,15 @@ type Column
        check_cast_compatibility self.value_type value_type <|
            self.internal_do_cast value_type on_problems

+    ## Change the value type of the column to a more specific one, based on its
+       contents.
+
+       This operation is currently not available in the Database backend.
+    auto_value_type : Boolean -> Column
+    auto_value_type self shrink_types=False =
+        _ = shrink_types
+        Error.throw <| Unsupported_Database_Operation.Error "`Column.auto_value_type` is not supported in the Database backends."
+
    ## PRIVATE
       Shares the core CAST logic between `cast` and `parse`.
    internal_do_cast : Value_Type -> Problem_Behavior -> Column
--- a/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Database/0.0.0-dev/src/Data/Table.enso
@ -1979,6 +1979,15 @@ type Table
            new_column = column_to_cast.cast value_type on_problems
            table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update

+    ## Change the value type of table columns to a more specific one, based on
+       their contents.
+
+       This operation is currently not available in the Database backend.
+    auto_value_types : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Boolean -> Boolean -> Problem_Behavior -> Table
+    auto_value_types self columns=self.column_names shrink_types=False error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
+        _ = [columns, shrink_types, error_on_missing_columns, on_problems]
+        Error.throw (Unsupported_Database_Operation.Error "Table.auto_value_types is not supported in the Database backends.")
+
    ## ALIAS drop_missing_rows, dropna
       GROUP Standard.Base.Selections
       Remove rows which are all blank or containing blank values.
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Column.enso
@ -1762,6 +1762,43 @@ type Column
            on_problems.attach_problems_before problems <|
                Column.from_storage self.name new_storage

+    ## Change the value type of the column to a more specific one, based on its
+       contents.
+
+       Arguments:
+       - shrink_types: If set `True`, smaller types will be chosen if possible,
+         according to the rules below. Defaults to `False`.
+
+       ? Auto Type Selection Rules
+
+         - If a `Mixed` column can be assigned a single type, like `Char` or
+           `Integer`, that will be used.
+         - Text columns are not parsed. To do that, use the `parse` method.
+         - If a `Float` column contains only integers, it will be converted to
+           an Integer column.
+         - If a `Decimal` column contains only integers that could fit in a
+           64-bit integer storage, it will be converted to an Integer column.
+         - If `shrink_types` is `False` (default), no other transformations are
+           applied.
+         - However, if `shrink_types` is set to `True`, then:
+           - Integer columns will be assigned the smallest size that can fit all
+             values (down to 16-bit integers; converting to the `Byte` type has
+             to be done manually through `cast`).
+           - If all elements in a text column have the same length, the type
+             will become fixed length.
+           - Otherwise, if a text column is variable length, but all text
+             elements are no longer than 255 characters, the column will get a
+             max length of 255. Otherwise, the column size limit will stay
+             unchanged.
+    auto_value_type : Boolean -> Column
+    auto_value_type self shrink_types=False =
+        new_value_type = case shrink_types of
+            False -> self.inferred_precise_value_type
+            True ->
+                Storage.to_value_type self.java_column.getStorage.inferPreciseTypeShrunk
+        # We run with Report_Error because we do not expect any problems.
+        self.cast new_value_type on_problems=Problem_Behavior.Report_Error
+
    ## ALIAS transform column

       Applies `function` to each item in this column and returns the column
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Table.enso
@ -88,10 +88,20 @@ type Table
                        Column.from_vector (v.at 0) (v.at 1) . java_column
                Column.Value java_col -> java_col
                _ -> invalid_input_shape
-        if cols.is_empty then Error.throw (Illegal_Argument.Error "Cannot create a table with no columns.") else
-            if (cols.all c-> c.getSize == cols.first.getSize).not then Error.throw (Illegal_Argument.Error "All columns must have the same row count.") else
-                if cols.distinct .getName . length != cols.length then Error.throw (Illegal_Argument.Error "Column names must be distinct.") else
-                    Table.Value (Java_Table.new cols)
+        Panic.recover Illegal_Argument <|
+            if cols.is_empty then
+                Panic.throw (Illegal_Argument.Error "Cannot create a table with no columns.")
+
+            if cols.distinct .getName . length != cols.length then
+                Panic.throw (Illegal_Argument.Error "Column names must be distinct.")
+
+            mismatched_size_column = cols.find if_missing=Nothing c->
+                c.getSize != cols.first.getSize
+            if mismatched_size_column.is_nothing.not then
+                msg = "All columns must have the same row count, but the column [" + mismatched_size_column.getName + "] has " + mismatched_size_column.getSize.to_text + " rows, while the column [" + cols.first.getName + "] has " + cols.first.getSize.to_text + " rows."
+                Panic.throw (Illegal_Argument.Error msg)
+
+            Table.Value (Java_Table.new cols)

    ## GROUP Standard.Base.Constants
       Creates a new table from a vector of column names and a vector of vectors
@ -946,6 +956,9 @@ type Table
       Arguments:
       - columns: The selection of columns to cast.
       - value_type: The `Value_Type` to cast the column to.
+       - error_on_missing_columns: Specifies if a missing input column should
+         result in an error regardless of the `on_problems` settings. Defaults
+         to `True`.
       - on_problems: Specifies how to handle problems if they occur, reporting
         them as warnings by default.

@ -996,6 +1009,50 @@ type Table
            new_column = column_to_cast.cast value_type on_problems
            table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update

+    ## Change the value type of table columns to a more specific one, based on
+       their contents.
+
+       This is most useful for `Mixed` type columns and will allow to narrow
+       down the type if all values in the column fit a more specific type.
+
+       Arguments:
+       - columns: The selection of columns to convert.
+       - shrink_types: If set `True`, smaller types will be chosen if possible,
+         according to the rules below. Defaults to `False`.
+       - error_on_missing_columns: Specifies if a missing input column should
+         result in an error regardless of the `on_problems` settings. Defaults
+         to `True`.
+       - on_problems: Specifies how to handle problems if they occur, reporting
+         them as warnings by default.
+
+       ? Auto Type Selection Rules
+
+         - If a `Mixed` column can be assigned a single type, like `Char` or
+           `Integer`, that will be used.
+         - Text columns are not parsed. To do that, use the `parse` method.
+         - If a `Float` column contains only integers, it will be converted to
+           an Integer column.
+         - If a `Decimal` column contains only integers that could fit in a
+           64-bit integer storage, it will be converted to an Integer column.
+         - If `shrink_types` is `False` (default), no other transformations are
+           applied.
+         - However, if `shrink_types` is set to `True`, then:
+           - Integer columns will be assigned the smallest size that can fit all
+             values (down to 16-bit integers; converting to the `Byte` type has
+             to be done manually through `cast`).
+           - If all elements in a text column have the same length, the type
+             will become fixed length.
+           - Otherwise, if a text column is variable length, but all text
+             elements are no longer than 255 characters, the column will get a
+             max length of 255. Otherwise, the column size limit will stay
+             unchanged.
+    auto_value_types : Vector (Text | Integer | Regex) | Text | Integer | Regex -> Boolean -> Boolean -> Problem_Behavior -> Table
+    auto_value_types self columns=self.column_names shrink_types=False error_on_missing_columns=True on_problems=Problem_Behavior.Report_Warning =
+        selected = self.columns_helper.select_columns columns Case_Sensitivity.Default reorder=False error_on_missing_columns=error_on_missing_columns on_problems=on_problems error_on_empty=False
+        selected.fold self table-> column_to_cast->
+            new_column = column_to_cast.auto_value_type shrink_types
+            table.set new_column new_name=column_to_cast.name set_mode=Set_Mode.Update
+
    ## GROUP Standard.Base.Conversions
       Splits a column of text into a set of new columns.
       The original column will be removed from the table.
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Enso_Types.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Enso_Types.enso
@ -18,7 +18,7 @@ polyglot java import org.enso.table.data.column.storage.type.IntegerType
 most_specific_value_type : Any -> Boolean -> Value_Type
 most_specific_value_type value use_smallest=False =
    case value of
-        _ : Float     -> Value_Type.Float Bits.Bits_64
+        _ : Float       -> Value_Type.Float Bits.Bits_64
        _ : Boolean     -> Value_Type.Boolean
        _ : Date        -> Value_Type.Date
        _ : Time_Of_Day -> Value_Type.Time
@ -33,9 +33,12 @@ most_specific_value_type value use_smallest=False =
                        # We do a small rewrite here - for integers we always return the Integer type, even if the value is small enough to fit in a Byte.
                        if value_type == Value_Type.Byte then Value_Type.Integer Bits.Bits_16 else value_type
                True -> Value_Type.Decimal precision=Nothing scale=0
-        text : Text     -> case use_smallest of
-            False -> Value_Type.Char size=Nothing variable_length=True
-            True  -> Value_Type.Char size=text.length variable_length=False
+        text : Text     ->
+            length = text.length
+            # Not using Char size=0 for empty strings, because that would be an invalid value.
+            case use_smallest && length > 0 of
+                True  -> Value_Type.Char size=text.length variable_length=False
+                False -> Value_Type.Char size=Nothing variable_length=True
        ## TODO [RW] once we add Enso Native Object Type Value Type, we probably
           want to prefer it over Mixed
        _               -> Value_Type.Mixed
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Storage.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Storage.enso
@ -54,7 +54,7 @@ closest_storage_type value_type = case value_type of
        Error.throw (Illegal_Argument.Error "Value_Type.Char with fixed length must have a non-nothing size")
    Value_Type.Char max_length variable_length ->
        fixed_length = variable_length.not
-        TextType.new max_length fixed_length
+        TextType.new (max_length : Integer) fixed_length
    Value_Type.Date -> DateType.INSTANCE
    # We currently will not support storing dates without timezones in in-memory mode.
    Value_Type.Date_Time _ -> DateTimeType.INSTANCE
--- a/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso
+++ b/distribution/lib/Standard/Table/0.0.0-dev/src/Data/Type/Value_Type.enso
@ -1,4 +1,5 @@
 from Standard.Base import all
+import Standard.Base.Data.Numbers.Positive_Integer
 import Standard.Base.Errors.Illegal_Argument.Illegal_Argument

 import project.Data.Type.Value_Type_Helpers
@ -95,12 +96,22 @@ type Value_Type

       ANSI SQL: CHAR, VARCHAR, TEXT, LONGVARCHAR, NCHAR, NVARCHAR, TEXT, CLOB, NCLOB

+       ! Counting Characters
+
+         Note that different backends may count the text in different ways.
+         The in-memory backend treats a single grapheme cluster (e.g. 💡) as a
+         single character unit. In most database systems more complex grapheme
+         clusters may be counted as multiple characters. So there isn't a 1-1
+         correspondence between these limits across backends which may cause
+         strings to be truncated if they contain such characters and are close
+         to the limit.
+
       Arguments:
       - size: the maximum number of characters that can be stored in the
-         column. It can be nothing to indicate no limit.
+         column. It can be nothing to indicate no limit. It cannot be 0.
       - variable_length: whether the size is a maximum or a fixed length.
         A fixed length string must have a non-nothing size.
-    Char size:(Integer|Nothing)=Nothing variable_length:Boolean=True
+    Char (size : (Positive_Integer | Nothing) = Nothing) variable_length:Boolean=True

    ## Date

@ -383,15 +394,23 @@ type Value_Type
        Value_Type.Integer size -> "Integer (" + size.to_text + ")"
        Value_Type.Float size -> "Float (" + size.to_text + ")"
        Value_Type.Decimal precision scale -> "Decimal (precision=" + precision.to_text + ", scale=" + scale.to_text + ")"
-        Value_Type.Char size variable_length -> case variable_length of
-            True  -> "Char (variable length, max_size=" + size.to_text + ")"
-            False -> "Char (fixed length, size=" + size.to_text + ")"
+        Value_Type.Char size variable_length ->
+            size_text = case size of
+                Nothing -> "unlimited"
+                _ -> size.to Integer . to_text
+            case variable_length of
+                True  -> "Char (variable length, max_size=" + size_text + ")"
+                False -> "Char (fixed length, size=" + size_text + ")"
        Value_Type.Date -> "Date"
        Value_Type.Date_Time with_timezone -> "Date_Time (with_timezone=" + with_timezone.to_text + ")"
        Value_Type.Time -> "Time"
-        Value_Type.Binary size variable_length -> case variable_length of
-            True  -> "Binary (variable length, max_size=" + size.to_text + " bytes)"
-            False -> "Binary (fixed length, size=" + size.to_text + " bytes)"
+        Value_Type.Binary size variable_length ->
+            size_text = case size of
+                Nothing -> "unlimited"
+                _ -> size.to Integer . to_text + " bytes"
+            case variable_length of
+                True  -> "Binary (variable length, max_size=" + size_text + ")"
+                False -> "Binary (fixed length, size=" + size_text + ")"
        Value_Type.Unsupported_Data_Type type_name _ -> case type_name of
            Nothing  -> "Unsupported_Data_Type"
            _ : Text -> "Unsupported_Data_Type (" + type_name + ")"
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/BigIntegerBuilder.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/BigIntegerBuilder.java
@ -10,6 +10,7 @@ import org.enso.table.data.column.storage.type.BigIntegerType;
 import org.enso.table.data.column.storage.type.FloatType;
 import org.enso.table.data.column.storage.type.StorageType;
 import org.enso.table.error.ValueTypeMismatchException;
+import org.graalvm.polyglot.Context;

 // For now the BigInteger builder is just a stub, reusing the ObjectBuilder and adding a warning.
 public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
@ -88,10 +89,12 @@ public class BigIntegerBuilder extends TypedBuilderImpl<BigInteger> {
  }

  public static BigIntegerBuilder retypeFromLongBuilder(LongBuilder longBuilder) {
+    BigIntegerBuilder res = new BigIntegerBuilder(longBuilder.data.length);
    int n = longBuilder.currentSize;
-    BigIntegerBuilder res = new BigIntegerBuilder(n);
+    Context context = Context.getCurrent();
    for (int i = 0; i < n; i++) {
      res.appendNoGrow(BigInteger.valueOf(longBuilder.data[i]));
+      context.safepoint();
    }
    return res;
  }
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilderChecked.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilderChecked.java
@ -26,10 +26,10 @@ public class LongBuilderChecked extends LongBuilder {
    if (o == null) {
      isMissing.set(currentSize++);
    } else {
-      try {
-        long x = NumericConverter.coerceToLong(o);
+      Long x = NumericConverter.tryConvertingToLong(o);
+      if (x != null) {
        appendLongNoGrow(x);
-      } catch (UnsupportedOperationException e) {
+      } else {
        throw new ValueTypeMismatchException(type, o);
      }
    }
--- a/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilderUnchecked.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/builder/LongBuilderUnchecked.java
@ -19,10 +19,10 @@ public class LongBuilderUnchecked extends LongBuilder {
    if (o == null) {
      isMissing.set(currentSize++);
    } else {
-      try {
-        long x = NumericConverter.coerceToLong(o);
-        data[currentSize++] = x;
-      } catch (UnsupportedOperationException e) {
+      Long x = NumericConverter.tryConvertingToLong(o);
+      if (x != null) {
+        appendLongNoGrow(x);
+      } else {
        throw new ValueTypeMismatchException(getType(), o);
      }
    }
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTextStorageConverter.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/cast/ToTextStorageConverter.java
@ -29,8 +29,8 @@ public class ToTextStorageConverter implements StorageConverter<String> {

  public Storage<String> cast(Storage<?> storage, CastProblemBuilder problemBuilder) {
    if (storage instanceof StringStorage stringStorage) {
-      if (stringStorage.getType().equals(targetType)) {
-        return stringStorage;
+      if (canAvoidCopying(stringStorage)) {
+        return retypeStringStorage(stringStorage);
      } else {
        return adaptStringStorage(stringStorage, problemBuilder);
      }
@ -150,7 +150,8 @@ public class ToTextStorageConverter implements StorageConverter<String> {
    return builder.seal();
  }

-  private <T> Storage<String> castDateTimeStorage(Storage<T> storage, Function<T, String> converter, CastProblemBuilder problemBuilder) {
+  private <T> Storage<String> castDateTimeStorage(Storage<T> storage, Function<T, String> converter,
+                                                  CastProblemBuilder problemBuilder) {
    Context context = Context.getCurrent();
    StringBuilder builder = new StringBuilder(storage.size(), targetType);
    for (int i = 0; i < storage.size(); i++) {
@ -204,4 +205,43 @@ public class ToTextStorageConverter implements StorageConverter<String> {
    problemBuilder.aggregateOtherProblems(builder.getProblems());
    return builder.seal();
  }
+
+  private boolean canAvoidCopying(StringStorage stringStorage) {
+    if (targetType.fitsExactly(stringStorage.getType())) {
+      return true;
+    }
+
+    long maxLength = Long.MIN_VALUE;
+    long minLength = Long.MAX_VALUE;
+    for (int i = 0; i < stringStorage.size(); i++) {
+      String value = stringStorage.getItem(i);
+      if (value == null) {
+        continue;
+      }
+
+      long length = value.length();
+      if (length > maxLength) {
+        maxLength = length;
+      }
+      if (length < minLength) {
+        minLength = length;
+      }
+    }
+
+    if (targetType.fixedLength()) {
+      boolean effectivelyFixedLength = minLength == maxLength;
+      return effectivelyFixedLength && targetType.maxLength() == maxLength;
+    } else {
+      return targetType.maxLength() == -1 || maxLength <= targetType.maxLength();
+    }
+  }
+
+  /**
+   * Creates a new storage re-using the existing array.
+   * <p>
+   * This can only be done if the values do not need any adaptations, checked by {@code canAvoidCopying}.
+   */
+  private Storage<String> retypeStringStorage(StringStorage stringStorage) {
+    return new StringStorage(stringStorage.getData(), stringStorage.size(), targetType);
+  }
 }
--- a/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringStringOp.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/operation/map/text/StringStringOp.java
@ -23,7 +23,7 @@ public abstract class StringStringOp extends BinaryMapOperation<String, Speciali
  public Storage<?> runBinaryMap(SpecializedStorage<String> storage, Object arg, MapOperationProblemBuilder problemBuilder) {
    int size = storage.size();
    if (arg == null) {
-      StringBuilder builder = new StringBuilder(size, TextType.variableLengthWithLimit(0));
+      StringBuilder builder = new StringBuilder(size, TextType.VARIABLE_LENGTH);
      builder.appendNulls(size);
      return builder.seal();
    } else if (arg instanceof String argString) {
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorage.java
@ -105,6 +105,20 @@ public final class MixedStorage extends ObjectStorage {
    return inferredType;
  }

+  @Override
+  public StorageType inferPreciseTypeShrunk() {
+    Storage<?> specialized = getInferredStorage();
+    if (specialized == null) {
+      // If no specialized type is available, it means that:
+      assert inferredType instanceof AnyObjectType;
+      return AnyObjectType.INSTANCE;
+    }
+
+    // If we are able to get a more specialized storage for more specific type - we delegate to its
+    // own shrinking logic.
+    return specialized.inferPreciseTypeShrunk();
+  }
+
  private Storage<?> getInferredStorage() {
    if (!hasSpecializedStorageBeenInferred) {
      StorageType inferredType = inferPreciseType();
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/MixedStorageFacade.java
@ -43,6 +43,11 @@ public class MixedStorageFacade extends Storage<Object> {
    return underlyingStorage.inferPreciseType();
  }

+  @Override
+  public StorageType inferPreciseTypeShrunk() {
+    return underlyingStorage.inferPreciseTypeShrunk();
+  }
+
  @Override
  public boolean isNa(long idx) {
    return underlyingStorage.isNa(idx);
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/Storage.java
@ -39,6 +39,19 @@ public abstract class Storage<T> {
    return getType();
  }

+  /**
+   * Returns the smallest type (according to Column.auto_value_type rules) that may still fit all
+   * values in this column.
+   *
+   * <p>It is a sibling of `inferPreciseType` that allows some further shrinking. It is kept
+   * separate, because `inferPreciseType` should be quick to compute (cached if needed) as it is
+   * used in typechecking of lots of operations. This one however, is only used in a specific
+   * `auto_value_type` use-case and rarely will need to be computed more than once.
+   */
+  public StorageType inferPreciseTypeShrunk() {
+    return getType();
+  }
+
  /**
   * Returns a more specialized storage, if available.
   *
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/StringStorage.java
@ -3,9 +3,9 @@ package org.enso.table.data.column.storage;
 import org.enso.base.Text_Utils;
 import org.enso.table.data.column.builder.Builder;
 import org.enso.table.data.column.builder.StringBuilder;
-import org.enso.table.data.column.operation.map.MapOperationStorage;
 import org.enso.table.data.column.operation.map.BinaryMapOperation;
 import org.enso.table.data.column.operation.map.MapOperationProblemBuilder;
+import org.enso.table.data.column.operation.map.MapOperationStorage;
 import org.enso.table.data.column.operation.map.UnaryMapOperation;
 import org.enso.table.data.column.operation.map.text.LikeOp;
 import org.enso.table.data.column.operation.map.text.StringBooleanOp;
@ -19,10 +19,13 @@ import org.graalvm.polyglot.Value;

 import java.util.BitSet;

-/** A column storing strings. */
+/**
+ * A column storing strings.
+ */
 public final class StringStorage extends SpecializedStorage<String> {

  private final TextType type;
+
  /**
   * @param data the underlying data
   * @param size the number of items stored
@ -111,7 +114,8 @@ public final class StringStorage extends SpecializedStorage<String> {
    t.add(
        new UnaryMapOperation<>(Maps.IS_EMPTY) {
          @Override
-          protected BoolStorage runUnaryMap(SpecializedStorage<String> storage, MapOperationProblemBuilder problemBuilder) {
+          protected BoolStorage runUnaryMap(SpecializedStorage<String> storage,
+                                            MapOperationProblemBuilder problemBuilder) {
            BitSet r = new BitSet();
            Context context = Context.getCurrent();
            for (int i = 0; i < storage.size; i++) {
@ -162,4 +166,40 @@ public final class StringStorage extends SpecializedStorage<String> {
        });
    return t;
  }
+
+  @Override
+  public StorageType inferPreciseTypeShrunk() {
+    if (type.fixedLength()) {
+      return type;
+    }
+
+    long minLength = Long.MAX_VALUE;
+    long maxLength = Long.MIN_VALUE;
+    for (int i = 0; i < size(); i++) {
+      String s = getItem(i);
+      if (s != null) {
+        long length = Text_Utils.grapheme_length(s);
+        minLength = Math.min(minLength, length);
+        maxLength = Math.max(maxLength, length);
+      }
+    }
+
+    // maxLength will be <0 if all values were null and will be ==0 if all values were empty strings.
+    // In both of these cases, we avoid shrinking the type and return the original type instead.
+    if (maxLength <= 0) {
+      return getType();
+    }
+
+    final long SHORT_LENGTH_THRESHOLD = 255;
+    if (minLength == maxLength) {
+      return TextType.fixedLength(minLength);
+    } else if (maxLength <= SHORT_LENGTH_THRESHOLD && (type.maxLength() < 0 || SHORT_LENGTH_THRESHOLD < type.maxLength())) {
+      // If the string was unbounded or the bound was larger than 255, we shrink it to 255.
+      return TextType.variableLengthWithLimit(SHORT_LENGTH_THRESHOLD);
+    } else {
+      // Otherwise, we return the original type (because it was either smaller than the proposed 255 bound, or the
+      // existing elements to do not fit into the 255 bound).
+      return getType();
+    }
+  }
 }
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/AbstractLongStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/AbstractLongStorage.java
@ -23,6 +23,8 @@ import org.enso.table.data.column.operation.map.numeric.isin.LongIsInOp;
 import org.enso.table.data.column.storage.BoolStorage;
 import org.enso.table.data.column.storage.Storage;
 import org.enso.table.data.column.storage.type.IntegerType;
+import org.enso.table.data.column.storage.type.StorageType;
+import org.graalvm.polyglot.Context;

 public abstract class AbstractLongStorage extends NumericStorage<Long> {
  public abstract long getItem(int idx);
@ -77,6 +79,46 @@ public abstract class AbstractLongStorage extends NumericStorage<Long> {
  @Override
  public abstract IntegerType getType();

+  @Override
+  public StorageType inferPreciseType() {
+    return getType();
+  }
+
+  @Override
+  public StorageType inferPreciseTypeShrunk() {
+    // If the type is already smallest possible, we return it unchanged (we will return 8-bit
+    // columns as-is, although
+    // we will not shrink 16-bit columns to 8-bits even if it were possible).
+    if (getType().bits().toInteger() <= 16) {
+      return getType();
+    }
+
+    IntegerType[] possibleTypes =
+        new IntegerType[] {IntegerType.INT_16, IntegerType.INT_32, IntegerType.INT_64};
+
+    int currentTypeIdx = 0;
+    int n = size();
+    Context context = Context.getCurrent();
+    for (int i = 0; i < n; i++) {
+      if (isNa(i)) {
+        continue;
+      }
+
+      long item = getItem(i);
+      while (!possibleTypes[currentTypeIdx].fits(item)) {
+        currentTypeIdx++;
+      }
+
+      if (currentTypeIdx >= possibleTypes.length - 1) {
+        break;
+      }
+
+      context.safepoint();
+    }
+
+    return possibleTypes[currentTypeIdx];
+  }
+
  private static MapOperationStorage<Long, AbstractLongStorage> buildOps() {
    MapOperationStorage<Long, AbstractLongStorage> ops = new MapOperationStorage<>();
    ops.add(new AddOp<>())
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/BigIntegerStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/BigIntegerStorage.java
@ -20,6 +20,7 @@ import org.enso.table.data.column.operation.map.numeric.isin.BigIntegerIsInOp;
 import org.enso.table.data.column.storage.ObjectStorage;
 import org.enso.table.data.column.storage.SpecializedStorage;
 import org.enso.table.data.column.storage.type.BigIntegerType;
+import org.enso.table.data.column.storage.type.IntegerType;
 import org.enso.table.data.column.storage.type.StorageType;

 public class BigIntegerStorage extends SpecializedStorage<BigInteger> {
@ -59,7 +60,7 @@ public class BigIntegerStorage extends SpecializedStorage<BigInteger> {

  @Override
  protected BigInteger[] newUnderlyingArray(int size) {
-    return new BigInteger[0];
+    return new BigInteger[size];
  }

  @Override
@ -96,4 +97,67 @@ public class BigIntegerStorage extends SpecializedStorage<BigInteger> {

    return cachedMaxPrecisionStored;
  }
+
+  private StorageType inferredType = null;
+
+  @Override
+  public StorageType inferPreciseType() {
+    if (inferredType == null) {
+      boolean allFitInLong = true;
+      int visitedCount = 0;
+
+      for (int i = 0; i < size; i++) {
+        BigInteger value = data[i];
+        if (value == null) {
+          continue;
+        }
+
+        visitedCount++;
+        boolean fitsInLong = IntegerType.INT_64.fits(value);
+        if (!fitsInLong) {
+          allFitInLong = false;
+          break;
+        }
+      }
+
+      inferredType =
+          (allFitInLong && visitedCount > 0) ? IntegerType.INT_64 : BigIntegerType.INSTANCE;
+    }
+
+    return inferredType;
+  }
+
+  @Override
+  public StorageType inferPreciseTypeShrunk() {
+    StorageType preciseType = inferPreciseType();
+    if (preciseType instanceof IntegerType) {
+      return findSmallestIntegerTypeThatFits();
+    }
+
+    return preciseType;
+  }
+
+  private StorageType findSmallestIntegerTypeThatFits() {
+    // This method assumes that all values _do_ fit in some integer type.
+    assert inferredType instanceof IntegerType;
+
+    final BigIntegerStorage parent = this;
+
+    // We create a Long storage that gets values by converting our storage.
+    ComputedNullableLongStorage longAdapter =
+        new ComputedNullableLongStorage(size) {
+          @Override
+          protected Long computeItem(int idx) {
+            BigInteger bigInteger = parent.getItem(idx);
+            if (bigInteger == null) {
+              return null;
+            }
+
+            return bigInteger.longValueExact();
+          }
+        };
+
+    // And rely on its shrinking logic.
+    return longAdapter.inferPreciseTypeShrunk();
+  }
 }
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/ComputedNullableLongStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/ComputedNullableLongStorage.java
@ -0,0 +1,197 @@
+package org.enso.table.data.column.storage.numeric;
+
+import java.util.BitSet;
+import java.util.List;
+import org.enso.table.data.column.storage.Storage;
+import org.enso.table.data.column.storage.type.IntegerType;
+import org.enso.table.data.index.Index;
+import org.enso.table.data.mask.OrderMask;
+import org.enso.table.data.mask.SliceRange;
+import org.graalvm.polyglot.Context;
+
+/**
+ * Implements a storage that computes the ith stored value using some function.
+ *
+ * <p>This storage allows for missing values. Prefer {@link ComputedLongStorage} for non-nullable
+ * case.
+ */
+public abstract class ComputedNullableLongStorage extends AbstractLongStorage {
+  protected final int size;
+
+  protected abstract Long computeItem(int idx);
+
+  protected ComputedNullableLongStorage(int size) {
+    this.size = size;
+  }
+
+  @Override
+  public int size() {
+    return size;
+  }
+
+  @Override
+  public int countMissing() {
+    return 0;
+  }
+
+  @Override
+  public IntegerType getType() {
+    return IntegerType.INT_64;
+  }
+
+  @Override
+  public boolean isNa(long idx) {
+    if (idx < 0 || idx >= size) {
+      throw new IndexOutOfBoundsException(
+          "Index " + idx + " is out of bounds for range of length " + size + ".");
+    }
+
+    return computeItem((int) idx) == null;
+  }
+
+  @Override
+  public Long getItemBoxed(int idx) {
+    if (idx < 0 || idx >= size) {
+      throw new IndexOutOfBoundsException(
+          "Index " + idx + " is out of bounds for range of length " + size + ".");
+    }
+
+    return computeItem(idx);
+  }
+
+  public long getItem(int idx) {
+    return getItemBoxed(idx);
+  }
+
+  @Override
+  public BitSet getIsMissing() {
+    BitSet missing = new BitSet();
+    Context context = Context.getCurrent();
+    for (int i = 0; i < size; i++) {
+      if (computeItem(i) == null) {
+        missing.set(i);
+      }
+
+      context.safepoint();
+    }
+    return missing;
+  }
+
+  @Override
+  public Storage<Long> mask(BitSet mask, int cardinality) {
+    BitSet newMissing = new BitSet();
+    long[] newData = new long[cardinality];
+    int resIx = 0;
+    Context context = Context.getCurrent();
+    for (int i = 0; i < size; i++) {
+      if (mask.get(i)) {
+        Long item = computeItem(i);
+        if (item == null) {
+          newMissing.set(resIx++);
+        } else {
+          newData[resIx++] = item;
+        }
+      }
+
+      context.safepoint();
+    }
+    return new LongStorage(newData, cardinality, newMissing, getType());
+  }
+
+  @Override
+  public Storage<Long> applyMask(OrderMask mask) {
+    int[] positions = mask.getPositions();
+    long[] newData = new long[positions.length];
+    BitSet newMissing = new BitSet();
+    Context context = Context.getCurrent();
+    for (int i = 0; i < positions.length; i++) {
+      if (positions[i] == Index.NOT_FOUND) {
+        newMissing.set(i);
+      } else {
+        Long item = computeItem(positions[i]);
+        if (item == null) {
+          newMissing.set(i);
+        } else {
+          newData[i] = item;
+        }
+      }
+
+      context.safepoint();
+    }
+    return new LongStorage(newData, positions.length, newMissing, getType());
+  }
+
+  @Override
+  public Storage<Long> countMask(int[] counts, int total) {
+    long[] newData = new long[total];
+    BitSet newMissing = new BitSet();
+    int pos = 0;
+    Context context = Context.getCurrent();
+    for (int i = 0; i < counts.length; i++) {
+      Long item = computeItem(i);
+      if (item == null) {
+        newMissing.set(pos, pos + counts[i]);
+        pos += counts[i];
+      } else {
+        long nonNullItem = item;
+        for (int j = 0; j < counts[i]; j++) {
+          newData[pos++] = nonNullItem;
+        }
+      }
+
+      context.safepoint();
+    }
+    return new LongStorage(newData, total, newMissing, getType());
+  }
+
+  @Override
+  public Storage<Long> slice(int offset, int limit) {
+    int newSize = Math.min(size - offset, limit);
+    long[] newData = new long[newSize];
+    BitSet newMissing = new BitSet();
+    Context context = Context.getCurrent();
+    for (int i = 0; i < newSize; i++) {
+      Long item = computeItem(offset + i);
+      if (item == null) {
+        newMissing.set(i);
+      } else {
+        newData[i] = item;
+      }
+      context.safepoint();
+    }
+    return new LongStorage(newData, newSize, newMissing, getType());
+  }
+
+  @Override
+  public Storage<Long> slice(List<SliceRange> ranges) {
+    int newSize = SliceRange.totalLength(ranges);
+    long[] newData = new long[newSize];
+    BitSet newMissing = new BitSet(newSize);
+    int offset = 0;
+    Context context = Context.getCurrent();
+    for (SliceRange range : ranges) {
+      int rangeStart = range.start();
+      int length = range.end() - rangeStart;
+      for (int i = 0; i < length; i++) {
+        Long item = computeItem(rangeStart + i);
+        if (item == null) {
+          newMissing.set(offset + i);
+        } else {
+          newData[offset + i] = item;
+        }
+        context.safepoint();
+      }
+      offset += length;
+    }
+
+    return new LongStorage(newData, newSize, newMissing, getType());
+  }
+
+  @Override
+  public AbstractLongStorage widen(IntegerType widerType) {
+    // Currently the implementation only reports 64-bit type so there is no widening to do - we can
+    // just return self.
+    assert getType().equals(IntegerType.INT_64);
+    return this;
+  }
+}
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/DoubleStorage.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/numeric/DoubleStorage.java
@ -26,6 +26,7 @@ import org.enso.table.data.column.operation.map.numeric.isin.DoubleIsInOp;
 import org.enso.table.data.column.storage.BoolStorage;
 import org.enso.table.data.column.storage.Storage;
 import org.enso.table.data.column.storage.type.FloatType;
+import org.enso.table.data.column.storage.type.IntegerType;
 import org.enso.table.data.column.storage.type.StorageType;
 import org.enso.table.data.index.Index;
 import org.enso.table.data.mask.OrderMask;
@ -388,4 +389,68 @@ public final class DoubleStorage extends NumericStorage<Double> implements Doubl

    return new DoubleStorage(newData, newSize, newMissing);
  }
+
+  private StorageType inferredType = null;
+
+  @Override
+  public StorageType inferPreciseType() {
+    if (inferredType == null) {
+      boolean areAllIntegers = true;
+      int visitedNumbers = 0;
+      for (int i = 0; i < size; i++) {
+        if (isMissing.get(i)) {
+          continue;
+        }
+
+        double value = Double.longBitsToDouble(data[i]);
+        visitedNumbers++;
+        boolean isWholeNumber = value % 1.0 == 0.0;
+        boolean canBeInteger = isWholeNumber && IntegerType.INT_64.fits(value);
+        if (!canBeInteger) {
+          areAllIntegers = false;
+          break;
+        }
+      }
+
+      // We only switch to integers if there was at least one number.
+      inferredType = (areAllIntegers && visitedNumbers > 0) ? IntegerType.INT_64 : getType();
+    }
+
+    return inferredType;
+  }
+
+  @Override
+  public StorageType inferPreciseTypeShrunk() {
+    StorageType inferred = inferPreciseType();
+    if (inferred instanceof IntegerType) {
+      return findSmallestIntegerTypeThatFits();
+    } else {
+      return inferred;
+    }
+  }
+
+  private StorageType findSmallestIntegerTypeThatFits() {
+    assert inferredType instanceof IntegerType;
+
+    final DoubleStorage parent = this;
+
+    // We create a Long storage that gets values by converting our storage.
+    ComputedNullableLongStorage longAdapter =
+        new ComputedNullableLongStorage(size) {
+          @Override
+          protected Long computeItem(int idx) {
+            if (parent.isNa(idx)) {
+              return null;
+            }
+
+            double value = parent.getItem(idx);
+            assert value % 1.0 == 0.0
+                : "The value " + value + " should be a whole number (guaranteed by checks).";
+            return (long) value;
+          }
+        };
+
+    // And rely on its shrinking logic.
+    return longAdapter.inferPreciseTypeShrunk();
+  }
 }
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/StorageType.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/StorageType.java
@ -11,9 +11,11 @@ import java.time.ZonedDateTime;
 /**
 * Represents an underlying internal storage type that can be mapped to the Value Type that is exposed to users.
 */
-public sealed interface StorageType permits AnyObjectType, BigIntegerType, BooleanType, DateTimeType, DateType, FloatType, IntegerType, TextType, TimeOfDayType {
+public sealed interface StorageType permits AnyObjectType, BigIntegerType, BooleanType, DateTimeType, DateType,
+    FloatType, IntegerType, TextType, TimeOfDayType {
  /**
-   * @return the StorageType that represents a given boxed item.
+   * @return the StorageType that represents a given boxed item. This has special handling for floating-point values -
+   *     if they represent a whole number, they will be treated as integers.
   */
  static StorageType forBoxedItem(Object item) {
    if (NumericConverter.isCoercibleToLong(item)) {
@ -21,6 +23,11 @@ public sealed interface StorageType permits AnyObjectType, BigIntegerType, Boole
    }

    if (NumericConverter.isFloatLike(item)) {
+      double value = NumericConverter.coerceToDouble(item);
+      if (value % 1.0 == 0.0 && IntegerType.INT_64.fits(value)) {
+        return IntegerType.INT_64;
+      }
+
      return FloatType.FLOAT_64;
    }

@ -32,7 +39,7 @@ public sealed interface StorageType permits AnyObjectType, BigIntegerType, Boole
      case LocalTime t -> TimeOfDayType.INSTANCE;
      case LocalDateTime d -> DateTimeType.INSTANCE;
      case ZonedDateTime d -> DateTimeType.INSTANCE;
-      default -> null;
+      default -> AnyObjectType.INSTANCE;
    };
  }
 }
--- a/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/TextType.java
+++ b/std-bits/table/src/main/java/org/enso/table/data/column/storage/type/TextType.java
@ -3,6 +3,12 @@ package org.enso.table.data.column.storage.type;
 import org.enso.base.Text_Utils;

 public record TextType(long maxLength, boolean fixedLength) implements StorageType {
+  public TextType {
+    if (maxLength == 0) {
+      throw new IllegalArgumentException("The maxLength of a text type must be positive or -1 to indicate unlimited length.");
+    }
+  }
+
  public static final TextType VARIABLE_LENGTH = new TextType(-1, false);

  public static TextType fixedLength(long length) {
@ -10,7 +16,7 @@ public record TextType(long maxLength, boolean fixedLength) implements StorageTy
  }

  public static TextType variableLengthWithLimit(long maxLength) {
-    assert maxLength >= 0;
+    assert maxLength > 0;
    return new TextType(maxLength, false);
  }

@ -90,6 +96,10 @@ public record TextType(long maxLength, boolean fixedLength) implements StorageTy

    boolean bothFixed = type1.fixedLength && type2.fixedLength;
    long lengthSum = type1.maxLength + type2.maxLength;
+    if (lengthSum == 0) {
+      return VARIABLE_LENGTH;
+    }
+
    return new TextType(lengthSum, bothFixed);
  }
 }
--- a/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso
+++ b/test/Table_Tests/src/Common_Table_Operations/Conversion_Spec.enso
@ -67,7 +67,6 @@ spec setup =
                c.value_type.is_text . should_be_true
                c.to_vector . should_equal ["{{{MY Type [x=42] }}}", "{{{MY Type [x=X] }}}"]

-            # TODO what to test here?
            Test.specify "should allow to cast an integer column to a decimal type" <|
                t = table_builder [["X", [1, 2, 3]]]
                c = t.at "X" . cast Value_Type.Decimal
@ -116,6 +115,15 @@ spec setup =
                w2 = Problems.expect_warning Conversion_Failure c2
                w2.affected_rows_count . should_equal 4

+            Test.specify "should not allow 0-length Char type" <|
+                c1 = table_builder [["X", ["a", "", "bcd"]]] . at "X"
+                r1 = c1.cast (Value_Type.Char size=0 variable_length=False)
+                r1.should_fail_with Illegal_Argument
+                r1.catch.to_display_text . should_contain "positive"
+
+                r2 = c1.cast (Value_Type.Char size=0 variable_length=True)
+                r2.should_fail_with Illegal_Argument
+
    Test.group prefix+"Table/Column.cast - numeric" <|
        Test.specify "should allow to cast a boolean column to integer" <|
            t = table_builder [["X", [True, False, True]]]
@ -531,3 +539,254 @@ spec setup =
            r3 = t.parse ["X", "Y"] Value_Type.Integer
            r3.should_fail_with Missing_Input_Columns
            r3.catch.criteria . should_equal ["Y"]
+
+    if setup.is_database then Test.group prefix+"Table/Column auto value type" <|
+        Test.specify "should report unsupported" <|
+            t = table_builder [["X", [1, 2, 3]]]
+            t.auto_value_types . should_fail_with Unsupported_Database_Operation
+            t.at "X" . auto_value_type . should_fail_with Unsupported_Database_Operation
+
+    if setup.is_database.not then Test.group prefix+"Table/Column auto value type" <|
+        Test.specify "should allow to narrow down types of a Mixed column" <|
+            [True, False].each shrink_types->
+                mixer = My_Type.Value 1
+                t0 = table_builder [["strs", [mixer, "a", "b"]], ["ints", [mixer, 2, 3]], ["floats", [mixer, 1.5, 2.5]], ["mix", [1, mixer, "a"]], ["dates", [mixer, Date.new 2022, Date.new 2020]], ["datetimes", [mixer, Date_Time.new 2022 12 30 13 45, Date_Time.new 2020]], ["times", [mixer, Time_Of_Day.new 12 30, Time_Of_Day.new 13 45]], ["mixed_time", [Date.new 2022, Time_Of_Day.new 12 30, Date_Time.new 2019]], ["bools", [mixer, True, False]]]
+                t1 = t0.drop 1
+
+                t1.at "strs" . value_type . should_equal Value_Type.Mixed
+                t1.at "ints" . value_type . should_equal Value_Type.Mixed
+                t1.at "floats" . value_type . should_equal Value_Type.Mixed
+                t1.at "mix" . value_type . should_equal Value_Type.Mixed
+                t1.at "dates" . value_type . should_equal Value_Type.Mixed
+                t1.at "datetimes" . value_type . should_equal Value_Type.Mixed
+                t1.at "times" . value_type . should_equal Value_Type.Mixed
+                t1.at "mixed_time" . value_type . should_equal Value_Type.Mixed
+                t1.at "bools" . value_type . should_equal Value_Type.Mixed
+
+                t2 = t1.auto_value_types shrink_types=shrink_types
+                # Depending on shrink_types value the size of the Char/Integer types may vary - exact details tested elsewhere.
+                t2.at "strs" . value_type . should_be_a (Value_Type.Char ...)
+                t2.at "ints" . value_type . should_be_a (Value_Type.Integer ...)
+                t2.at "floats" . value_type . should_equal Value_Type.Float
+                t2.at "mix" . value_type . should_equal Value_Type.Mixed
+                t2.at "dates" . value_type . should_equal Value_Type.Date
+                t2.at "datetimes" . value_type . should_equal Value_Type.Date_Time
+                t2.at "times" . value_type . should_equal Value_Type.Time
+                t2.at "mixed_time" . value_type . should_equal Value_Type.Mixed
+                t2.at "bools" . value_type . should_equal Value_Type.Boolean
+
+        Test.specify "will only modify selected columns" <|
+            mixer = My_Type.Value 1
+            t0 = table_builder [["strs", [mixer, "a", "b"]], ["ints", [mixer, 2, 3]], ["floats", [mixer, 1.5, 2.5]]]
+            t1 = t0.drop 1
+
+            t2 = t1.auto_value_types []
+            t2.at "strs" . value_type . should_equal Value_Type.Mixed
+            t2.at "ints" . value_type . should_equal Value_Type.Mixed
+            t2.at "floats" . value_type . should_equal Value_Type.Mixed
+
+            t3 = t1.auto_value_types ["strs"]
+            t3.at "strs" . value_type . should_equal Value_Type.Char
+            t3.at "ints" . value_type . should_equal Value_Type.Mixed
+            t3.at "floats" . value_type . should_equal Value_Type.Mixed
+
+            # should match ints and floats but not strs
+            t4 = t1.auto_value_types "[if].*".to_regex
+            t4.at "strs" . value_type . should_equal Value_Type.Mixed
+            t4.at "ints" . value_type . should_equal Value_Type.Integer
+            t4.at "floats" . value_type . should_equal Value_Type.Float
+
+        Test.specify "will convert a Float column to Integer if all values can be represented as long" <|
+            t1 = table_builder [["X", [1.0, 2.0, 3.0]], ["Y", [1.0, 2.5, 3.0]], ["Z", [1.0, 2.0, (2.0^100)]]]
+            t1.at "X" . value_type . should_equal Value_Type.Float
+            t1.at "Y" . value_type . should_equal Value_Type.Float
+            t1.at "Z" . value_type . should_equal Value_Type.Float
+
+            t2 = t1.auto_value_types shrink_types=False
+            t2.at "X" . to_vector . should_equal [1, 2, 3]
+            t2.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
+            t2.at "Y" . value_type . should_equal Value_Type.Float
+            ## Technically, Z could get converted to Decimal type. But IMO that
+               is not desirable - at this scale the Float is no longer a
+               precise type (as not even consecutive integers are exactly
+               representable). And Decimal is expected to be precise. So such a
+               conversion should only happen by explicit request, not
+               automatically.
+            t2.at "Z" . value_type . should_equal Value_Type.Float
+
+        Test.specify "will not parse text columns" <|
+            t1 = table_builder [["X", ["1", "2", "3"]]]
+            c2 = t1.at "X" . auto_value_type
+            c2.value_type . should_equal Value_Type.Char
+
+        Test.specify "will 'undo' a cast to Mixed" <|
+            t1 = table_builder [["X", [1, 2, 3]], ["Y", ["a", "b", "c"]]]
+            t2 = t1.cast ["X", "Y"] Value_Type.Mixed
+            t2.at "X" . value_type . should_equal Value_Type.Mixed
+            t2.at "Y" . value_type . should_equal Value_Type.Mixed
+
+            t3 = t2.auto_value_types
+            t3.at "X" . value_type . should_equal Value_Type.Integer
+            t3.at "Y" . value_type . should_equal Value_Type.Char
+
+        Test.specify "will choose Decimal type if all values are integers but cannot fit long" <|
+            c0 = table_builder [["X", [My_Type.Value 42, 1, 2, 2^100]]] . at "X"
+            c1 = c0.drop 1
+
+            c1.value_type . should_equal Value_Type.Mixed
+            c2 = c1.auto_value_type
+            c2.value_type . should_be_a (Value_Type.Decimal ...)
+            c2.to_vector . should_equal [1, 2, 2^100]
+
+        Test.specify "will try to find the smallest integer type to fit the value (if shrink_types=True)" <|
+            [False, True].each is_mixed->
+                prefix = if is_mixed then "mixed" else 0
+                t0 = table_builder [["X", [prefix, 1, 2, 3]], ["Y", [prefix, 2^20, 2, 3]], ["Z", [prefix, 2^50, 2, 3]], ["F", [prefix, 1.0, 2.0, 3.0]]]
+                t1 = t0.drop 1
+
+                case is_mixed of
+                    True  -> t1.at "Z" . value_type . should_equal Value_Type.Mixed
+                    False -> t1.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
+
+                case is_mixed of
+                    True  -> t1.at "F" . value_type . should_equal Value_Type.Mixed
+                    False -> t1.at "F" . value_type . should_equal Value_Type.Float
+
+                t2 = t1.auto_value_types shrink_types=False
+                t2.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
+                t2.at "Y" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
+                t2.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
+                t2.at "F" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
+
+                t3 = t1.auto_value_types shrink_types=True
+                # Even though X's values are small enough to fit in a Byte, we stick to 16-bit Integers.
+                t3.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_16)
+                t3.at "Y" . value_type . should_equal (Value_Type.Integer Bits.Bits_32)
+                t3.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
+                # Shrinking Floats also finds the smallest type that fits.
+                t3.at "F" . value_type . should_equal (Value_Type.Integer Bits.Bits_16)
+
+        Test.specify "will not return Byte columns by default, but should leave existing Byte columns intact" <|
+            c1 = table_builder [["X", [1, 2, 3]]] . at "X" . cast Value_Type.Byte
+            c1.value_type . should_equal Value_Type.Byte
+
+            [True, False].each shrink_types->
+                c2 = c1.auto_value_type shrink_types=shrink_types
+                c2.value_type . should_equal Value_Type.Byte
+
+        Test.specify "Decimal (scale=0, i.e. integer) columns should also be shrinked if possible and shrink_types=True" <|
+            t0 = table_builder [["X", [2^100, 1, 2, 3]], ["Y", [10, 20, 2^100, 30]], ["Z", [1, 2, 3, 4]]] . cast "Z" (Value_Type.Decimal scale=0)
+            t1 = t0.drop 1
+
+            t1.at "X" . value_type . should_equal (Value_Type.Decimal scale=0)
+            t1.at "Y" . value_type . should_equal (Value_Type.Decimal scale=0)
+            t1.at "Z" . value_type . should_equal (Value_Type.Decimal scale=0)
+
+            t2 = t1.auto_value_types shrink_types=False
+
+            # Without shrinking we get an integer type, but not the smallest one - just the default 64-bit.
+            t2.at "X" . to_vector . should_equal [1, 2, 3]
+            t2.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
+            t2.at "Y" . value_type . should_equal (Value_Type.Decimal scale=0)
+            t2.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_64)
+
+            t3 = t1.auto_value_types shrink_types=True
+            t3.at "X" . value_type . should_equal (Value_Type.Integer Bits.Bits_16)
+            t3.at "Y" . value_type . should_equal (Value_Type.Decimal scale=0)
+            t3.at "Z" . value_type . should_equal (Value_Type.Integer Bits.Bits_16)
+
+        Test.specify "if all text values have the same length, will change the type to fixed-length string (if shrink_types=True)" <|
+            [False, True].each is_mixed->
+                prefix = if is_mixed then 42 else "FOOBARBAZ"
+                c0 = table_builder [["X", [prefix, "aa", "bb", "cc"]]] . at "X"
+                c1 = c0.drop 1
+                c1.to_vector . should_equal ["aa", "bb", "cc"]
+
+                case is_mixed of
+                    True  -> c1.value_type . should_equal Value_Type.Mixed
+                    False -> c1.value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
+
+                c2 = c1.auto_value_type shrink_types=False
+                c2.value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
+
+                c3 = c1.auto_value_type shrink_types=True
+                c3.value_type . should_equal (Value_Type.Char size=2 variable_length=False)
+
+            c4 = table_builder [["X", ["a", "x", "y"]]] . at "X" . cast (Value_Type.Char size=100 variable_length=True)
+            c4.to_vector . should_equal ["a", "x", "y"]
+            c4.value_type . should_equal (Value_Type.Char size=100 variable_length=True)
+
+            c5 = c4.auto_value_type shrink_types=False
+            c5.value_type . should_equal (Value_Type.Char size=100 variable_length=True)
+
+            c6 = c4.auto_value_type shrink_types=True
+            c6.value_type . should_equal (Value_Type.Char size=1 variable_length=False)
+
+        Test.specify "if all text values are empty string, the type will remain unchanged" <|
+            c1 = table_builder [["X", ["", ""]]] . at "X"
+            c2 = c1.cast (Value_Type.Char size=100 variable_length=True)
+
+            c1.value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
+            c2.value_type . should_equal (Value_Type.Char size=100 variable_length=True)
+
+            [True, False].each shrink_types->
+                c1_b = c1.auto_value_type shrink_types=shrink_types
+                c1_b.value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
+
+                c2_b = c2.auto_value_type shrink_types=shrink_types
+                c2_b.value_type . should_equal (Value_Type.Char size=100 variable_length=True)
+
+        Test.specify "if all text values fit under 255 characters, will add a 255 length limit (if shrink_types=True)" <|
+            t1 = table_builder [["short_unbounded", ["a", "bb", "ccc"]], ["long_unbounded", ["a"*100, "b"*200, "c"*300]]]
+
+            t2 = t1 . set (t1.at "short_unbounded" . cast (Value_Type.Char size=1000)) "short_1000" . set (t1.at "short_unbounded" . cast (Value_Type.Char size=10)) "short_10" . set (t1.at "long_unbounded" . cast (Value_Type.Char size=400)) "long_400" . set (t1.at "short_unbounded" . cast Value_Type.Mixed) "short_mixed"
+            t2.at "short_mixed" . value_type . should_equal Value_Type.Mixed
+
+            t3 = t2.auto_value_types shrink_types=False
+            t3.at "short_unbounded" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
+            t3.at "short_1000" . value_type . should_equal (Value_Type.Char size=1000 variable_length=True)
+            t3.at "short_10" . value_type . should_equal (Value_Type.Char size=10 variable_length=True)
+            # Mixed column gets to be text again.
+            t3.at "short_mixed" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
+            t3.at "long_unbounded" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
+            t3.at "long_400" . value_type . should_equal (Value_Type.Char size=400 variable_length=True)
+
+            t4 = t2.auto_value_types shrink_types=True
+            # Short ones get shortened to 255 unless they were shorter already.
+            t4.at "short_unbounded" . value_type . should_equal (Value_Type.Char size=255 variable_length=True)
+            t4.at "short_1000" . value_type . should_equal (Value_Type.Char size=255 variable_length=True)
+            t4.at "short_10" . value_type . should_equal (Value_Type.Char size=10 variable_length=True)
+            t4.at "short_mixed" . value_type . should_equal (Value_Type.Char size=255 variable_length=True)
+            # Long ones cannot fit in 255 so they are kept as-is.
+            t4.at "long_unbounded" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
+            t4.at "long_400" . value_type . should_equal (Value_Type.Char size=400 variable_length=True)
+
+        Test.specify "can deal with all-null columns" <|
+            t0 = table_builder [["mix", [My_Type.Value 1, Nothing, Nothing]], ["int", [42, Nothing, Nothing]], ["str", ["a", Nothing, Nothing]], ["float", [1.5, Nothing, Nothing]], ["decimal", [2^100, 2^10, 2]]]
+            t1 = t0.drop 1
+
+            t1.at "mix" . value_type . should_equal Value_Type.Mixed
+            t1.at "int" . value_type . should_equal Value_Type.Integer
+            t1.at "float" . value_type . should_equal Value_Type.Float
+            t1.at "str" . value_type . should_equal Value_Type.Char
+            t1.at "decimal" . value_type . should_equal (Value_Type.Decimal scale=0)
+
+            t2 = t1.auto_value_types shrink_types=False
+            t2.at "mix" . value_type . should_equal Value_Type.Mixed
+            t2.at "int" . value_type . should_equal Value_Type.Integer
+            ## Technically, if there are no elements, "all of elements" are
+               whole integers (quantification over empty domain is trivially true).
+               However, that would be rather not useful, so instead we keep the
+               original type.
+            t2.at "float" . value_type . should_equal Value_Type.Float
+            t1.at "decimal" . value_type . should_equal (Value_Type.Decimal scale=0)
+            t2.at "str" . value_type . should_equal Value_Type.Char
+
+            t3 = t1.auto_value_types shrink_types=True
+            t3.at "mix" . value_type . should_equal Value_Type.Mixed
+            # Technically, if there are no elements, then they can be fit inside of the smallest types available:
+            t3.at "int" . value_type . should_equal (Value_Type.Integer Bits.Bits_16)
+            t3.at "float" . value_type . should_equal Value_Type.Float
+            t1.at "decimal" . value_type . should_equal (Value_Type.Decimal scale=0)
+            # But for Text we make an exception and keep the type unbounded: 0-length fixed length string simply would not make any sense.
+            t3.at "str" . value_type . should_equal (Value_Type.Char size=Nothing variable_length=True)
--- a/test/Table_Tests/src/Common_Table_Operations/Map_Spec.enso
+++ b/test/Table_Tests/src/Common_Table_Operations/Map_Spec.enso
@ -125,7 +125,7 @@ spec setup =
                k x = if x == 2 then Time_Of_Day.new 13 05 else (x+1).to_text
                r7 = c1.map k expected_value_type=Value_Type.Char
                r7.should_fail_with Invalid_Value_Type
-                r7.catch.to_display_text . should_contain "Expected type Char (variable length, max_size=Nothing), but got a value 13:05:00 of type Time"
+                r7.catch.to_display_text . should_contain "Expected type Char (variable length, max_size=unlimited), but got a value 13:05:00 of type Time"

                l x = if x == 2 then 42 else Date.new 2022 05 x
                r8 = c1.map l expected_value_type=Value_Type.Date
--- a/test/Table_Tests/src/Helpers/Value_Type_Spec.enso
+++ b/test/Table_Tests/src/Helpers/Value_Type_Spec.enso
@ -17,7 +17,7 @@ spec =
            Value_Type.Float.to_display_text . should_equal "Float (64 bits)"
            Value_Type.Decimal.to_display_text . should_equal "Decimal (precision=Nothing, scale=Nothing)"

-            Value_Type.Char.to_display_text . should_equal "Char (variable length, max_size=Nothing)"
+            Value_Type.Char.to_display_text . should_equal "Char (variable length, max_size=unlimited)"
            (Value_Type.Binary 8 False).to_display_text . should_equal "Binary (fixed length, size=8 bytes)"

            Value_Type.Date.to_display_text . should_equal "Date"
--- a/test/Table_Tests/src/In_Memory/Column_Spec.enso
+++ b/test/Table_Tests/src/In_Memory/Column_Spec.enso
@ -166,10 +166,14 @@ spec =
            c8.value_type . should_equal Value_Type.Mixed
            c8.to_vector . should_equal ["aaa", 42, Date.new 2022 08 22]

+            c9 = Column.from_vector "X" [Time_Of_Day.new 10 11 12, Time_Of_Day.new 11 30] Value_Type.Time
+            c9.value_type . should_equal Value_Type.Time
+            c9.to_vector . should_equal [Time_Of_Day.new 10 11 12, Time_Of_Day.new 11 30]
+
        Test.specify "will fail if unexpected values are encountered for the requested type" <|
            r1 = Column.from_vector "X" ["a", 2] Value_Type.Char
            r1.should_fail_with Invalid_Value_Type
-            r1.catch.to_display_text.should_contain "Expected type Char (variable length, max_size=Nothing), but got a value 2 of type Integer (16 bits)"
+            r1.catch.to_display_text.should_contain "Expected type Char (variable length, max_size=unlimited), but got a value 2 of type Integer (16 bits)"

            r2 = Column.from_vector "X" ["aaa", "b"] (Value_Type.Char size=3 variable_length=False)
            r2.should_fail_with Invalid_Value_Type
@ -177,7 +181,7 @@ spec =

            r3 = Column.from_vector "X" ["aaa", 42] Value_Type.Char
            r3.should_fail_with Invalid_Value_Type
-            r3.catch.to_display_text.should_contain "Expected type Char (variable length, max_size=Nothing), but got a value 42 of type Integer (16 bits)"
+            r3.catch.to_display_text.should_contain "Expected type Char (variable length, max_size=unlimited), but got a value 42 of type Integer (16 bits)"

            r4 = Column.from_vector "X" [12, Time_Of_Day.new 10 11 12] Value_Type.Integer
            r4.should_fail_with Invalid_Value_Type
@ -199,6 +203,13 @@ spec =
            r8.should_fail_with Invalid_Value_Type
            r8.catch.to_display_text.should_contain "Expected type Byte, but got a value 1000000000 of type Integer (32 bits)"

+        Test.specify "will not allow to construct a column with Char size=0" <|
+            r1 = Column.from_vector "X" [] (Value_Type.Char size=0 variable_length=False)
+            r1.should_fail_with Illegal_Argument
+
+            r2 = Column.from_vector "X" [] (Value_Type.Char size=0 variable_length=True)
+            r2.should_fail_with Illegal_Argument
+
    Test.group "Rounding" <|
        Test.specify "should be able to round a column of decimals" <|
            Column.from_vector "foo" [1.2, 2.3, 2.5, 3.6] . round . should_equal (Column.from_vector "round([foo])" [1, 2, 3, 4])