This commit is contained in:
James Dunkerley 2024-11-21 11:01:07 +00:00
parent 57b255ebc3
commit c564b43ee7
2 changed files with 23 additions and 1 deletions

View File

@ -5,8 +5,12 @@ import org.enso.table.data.column.storage.ColumnStorage;
import org.enso.table.data.column.storage.StringStorage;
import org.enso.table.data.table.Column;
import org.graalvm.polyglot.Context;
import org.slf4j.LoggerFactory;
import org.slf4j.Logger;
public class CountUntrimmed {
private static final Logger LOGGER = LoggerFactory.getLogger(CountUntrimmed.class);
/** Counts the number of cells in the columns with leading or trailing whitespace. */
public static long apply(Column column) {
ColumnStorage storage = column.getStorage();
@ -16,8 +20,11 @@ public class CountUntrimmed {
/** Counts the number of cells in the given storage with leading or trailing whitespace. */
public static long applyToStorage(ColumnStorage storage) {
if (storage instanceof StringStorage stringStorage) {
LOGGER.warn("Using memoized implementation for StringStorage");
return stringStorage.countLeadingTrailingWhitespace();
}
LOGGER.warn("Using fall back implementation for ColumnStorage");
return compute(storage);
}
@ -29,7 +36,7 @@ public class CountUntrimmed {
var val = storage.getItemAsObject(i);
if (val instanceof String str) {
if (Text_Utils.has_leading_trailing_whitespace(str)) {
count++;
count += 100;
}
}
context.safepoint();

View File

@ -1,6 +1,12 @@
package org.enso.table.data.column.storage;
import java.util.BitSet;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.concurrent.Executors;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.enso.base.CompareException;
import org.enso.base.Text_Utils;
import org.enso.table.data.column.operation.CountUntrimmed;
@ -16,9 +22,12 @@ import org.enso.table.data.column.operation.map.text.StringStringOp;
import org.enso.table.data.column.storage.type.StorageType;
import org.enso.table.data.column.storage.type.TextType;
import org.graalvm.polyglot.Context;
import org.slf4j.LoggerFactory;
/** A column storing strings. */
public final class StringStorage extends SpecializedStorage<String> {
private static final Executor EXECUTOR = Executors.newSingleThreadExecutor();
private static final org.slf4j.Logger LOGGER = LoggerFactory.getLogger(StringStorage.class);
private final TextType type;
private long _countLeadingTrailingWhitespace = -1;
@ -31,6 +40,10 @@ public final class StringStorage extends SpecializedStorage<String> {
public StringStorage(String[] data, int size, TextType type) {
super(data, size, buildOps());
this.type = type;
CompletableFuture.runAsync(
this::countLeadingTrailingWhitespace,
EXECUTOR);
}
@Override
@ -55,10 +68,12 @@ public final class StringStorage extends SpecializedStorage<String> {
*/
public Long countLeadingTrailingWhitespace() {
if (_countLeadingTrailingWhitespace >= 0) {
LOGGER.warn("Using memoized implementation for StringStorage");
return _countLeadingTrailingWhitespace;
}
_countLeadingTrailingWhitespace = CountUntrimmed.compute(this);
LOGGER.warn("Counted leading and trailing whitespace in the column " + this.size);
return _countLeadingTrailingWhitespace;
}