Lazy scatterplot for Vector & Table (#3655)

First of all this PR demonstrates how to implement _lazy visualization_:
- one needs to write/enhance Enso visualization libraries - this PR adds two optional parameters (`bounds` and `limit`) to `process_to_json_text` function.
- the `process_to_json_text` can be tested by standard Enso test harness which this PR also does
- then one has to modify JavaScript on the IDE side to construct `setPreprocessor` expression using the optional parameters

The idea of _scatter plot lazy visualization_ is to limit the amount of points the IDE requests. Initially the limit is set to `limit=1024`. The `Scatter_Plot.enso` then processes the data and selects/generates the `limit` subset. Right now it includes `min`, `max` in both `x`, `y` axis plus randomly chosen points up to the `limit`.

![Zooming In](https://user-images.githubusercontent.com/26887752/185336126-f4fbd914-7fd8-4f0b-8377-178095401f46.png)

The D3 visualization widget is capable of _zooming in_. When that happens the JavaScript widget composes new expression with `bounds` set to the newly visible area. By calling `setPreprocessor` the engine recomputes the visualization data, filters out any data outside of the `bounds` and selects another `limit` points from the new data. The IDE visualization then updates itself to display these more detailed data. Users can zoom-in to see the smallest detail where the number of points gets bellow `limit` or they can select _Fit all_ to see all the data without any `bounds`.

# Important Notes
Randomly selecting `limit` samples from the dataset may be misleading. Probably implementing _k-means clustering_ (where `k=limit`) would generate more representative approximation.
This commit is contained in:
Jaroslav Tulach 2022-08-23 14:12:22 +02:00 committed by GitHub
parent 2507a2049b
commit 2b9352d2fc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 120 additions and 19 deletions

View File

@ -42,6 +42,7 @@
visualizations. (Previously, they were placed to the left of the
visualizations.)
- [Fixed histograms coloring and added a color legend.][3153]
- [Lazy visualization for scatter plot.][3655]
- [Fixed broken node whose expression contains non-ASCII characters.][3166]
- [Fixed developer console warnings about views being created but not
registered.][3181]
@ -185,6 +186,7 @@
[debug-shortcuts]:
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
[3153]: https://github.com/enso-org/enso/pull/3153
[3655]: https://github.com/enso-org/enso/pull/3655
[3166]: https://github.com/enso-org/enso/pull/3166
[3181]: https://github.com/enso-org/enso/pull/3181
[3186]: https://github.com/enso-org/enso/pull/3186

View File

@ -17,6 +17,7 @@ const LINEAR_SCALE = 'linear'
const LOGARITHMIC_SCALE = 'logarithmic'
const VISIBLE_POINTS = 'visible'
const BUTTONS_HEIGHT = 25
const DEFAULT_LIMIT = 1024
/**
* A d3.js ScatterPlot visualization.
@ -49,7 +50,9 @@ class ScatterPlot extends Visualization {
constructor(data) {
super(data)
this.setPreprocessor('process_to_json_text', 'Standard.Visualization.Scatter_Plot')
this.bounds = null
this.limit = DEFAULT_LIMIT
this.updatePreprocessor()
this.dataPoints = []
this.axis = {
x: { scale: LINEAR_SCALE },
@ -58,6 +61,14 @@ class ScatterPlot extends Visualization {
this.points = { labels: VISIBLE_POINTS }
}
updatePreprocessor() {
let fn = 'x -> process_to_json_text x limit=' + this.limit
if (this.bounds) {
fn += ' bounds=[' + this.bounds.join(',') + ']'
}
this.setPreprocessor(fn, 'Standard.Visualization.Scatter_Plot')
}
/**
* Presents a scatterplot visualization after receiving `data`.
*/
@ -377,6 +388,9 @@ class ScatterPlot extends Visualization {
let yMin = zoom.transformedScale.yScale.invert(extent[1][1])
let yMax = zoom.transformedScale.yScale.invert(extent[0][1])
this.bounds = [xMin, yMin, xMax, yMax]
this.updatePreprocessor()
zoom.transformedScale.xScale.domain([xMin, xMax])
zoom.transformedScale.yScale.domain([yMin, yMax])
@ -806,6 +820,10 @@ class ScatterPlot extends Visualization {
zoom.transformedScale.yScale.domain(domainY)
self.zoomingHelper(zoom.transformedScale, boxWidth, scatter, points)
self.bounds = null
self.limit = DEFAULT_LIMIT
self.updatePreprocessor()
}
document.addEventListener('keydown', e => {

View File

@ -3,6 +3,7 @@ from Standard.Base import all
import Standard.Table.Data.Column
import Standard.Table.Data.Table
import Standard.Visualization.Helpers
from Standard.Base.Data.Index_Sub_Range import Sample
## PRIVATE
@ -138,18 +139,58 @@ Vector.Vector.point_data self =
Json.from_pairs [[X.name,i],[Y.name,elem]]
## PRIVATE
json_from_table : Table -> Json
json_from_table table =
data = table.point_data
bound_data bounds data = case bounds of
Nothing -> data
_ ->
min_x = bounds.at 0
min_y = bounds.at 1
max_x = bounds.at 2
max_y = bounds.at 3
data.filter <| datum->
x = datum.get "x" . unwrap
y = datum.get "y" . unwrap
min_x<=x && x<=max_x && min_y<=y && y<=max_y
type Extreme min_x max_x min_y max_y
## PRIVATE
limit_data limit data = case limit of
Nothing -> data
_ -> if ((data.length <= limit) || (data.length == 0)) then data else
x datum = (datum.get "x").unwrap
y datum = (datum.get "y").unwrap
update_extreme current idx point =
new_min_x = if x current.min_x.second > x point then [idx, point] else current.min_x
new_min_y = if y current.min_y.second > y point then [idx, point] else current.min_y
new_max_x = if x current.max_x.second < x point then [idx, point] else current.max_x
new_max_y = if y current.max_y.second < y point then [idx, point] else current.max_y
Extreme new_min_x new_max_x new_min_y new_max_y
first = [0, data.first]
bounds = case data.fold_with_index (Extreme first first first first) update_extreme of
Extreme min_x max_x min_y max_y -> [min_x, max_x, min_y, max_y]
_ -> []
extreme = Map.from_vector bounds . values
if limit <= extreme.length then extreme.take (First limit) else
extreme + data.take (Sample (limit - extreme.length))
## PRIVATE
json_from_table : Table -> [Int]|Nothing -> Int|Nothing -> Json
json_from_table table bounds limit =
data = table.point_data |> bound_data bounds |> limit_data limit
axes = table.axes
Json.from_pairs <| [[data_field,data], [axis_field, axes]]
## PRIVATE
json_from_vector : Vector Any -> Json
json_from_vector vec =
data = [data_field, vec.point_data]
json_from_vector : Vector Any -> [Int]|Nothing -> Int|Nothing -> Json
json_from_vector vec bounds limit =
data = vec.point_data |> bound_data bounds |> limit_data limit
axes = [axis_field, Nothing]
Json.from_pairs [data, axes]
Json.from_pairs <| [[data_field, data], axes]
## PRIVATE
@ -160,12 +201,12 @@ json_from_vector vec =
Arguments:
- value: the value to be visualized.
process_to_json_text : Any -> Text
process_to_json_text value =
process_to_json_text value bounds=Nothing limit=Nothing =
json = case value of
Column.Column _ -> json_from_table value.to_table
Table.Table _ -> json_from_table value
Vector.Vector _ -> json_from_vector value
_ -> json_from_vector value.to_vector
Column.Column _ -> json_from_table value.to_table bounds limit
Table.Table _ -> json_from_table value bounds limit
Vector.Vector _ -> json_from_vector value bounds limit
_ -> json_from_vector value.to_vector bounds limit
json.to_text

View File

@ -10,8 +10,7 @@ import Standard.Test
import project
spec =
expect value axis_expected_text data_expected_text =
text = Scatter_Plot.process_to_json_text value
expect_text text axis_expected_text data_expected_text =
json = Json.parse text
json.fields.keys.should_equal ['axis','data']
@ -20,6 +19,10 @@ spec =
expected_result = Json.from_pairs [expected_axis_labels, expected_data_pair]
json.should_equal expected_result
expect value axis_expected_text data_expected_text =
text = Scatter_Plot.process_to_json_text value
expect_text text axis_expected_text data_expected_text
index = Scatter_Plot.index_name
axis label = Json.from_pairs [['label',label]]
labels x y = Json.from_pairs [['x', axis x], ['y', axis y]] . to_text
@ -70,6 +73,17 @@ spec =
table = Table.from_rows header [row_1, row_2]
expect table (labels 'x' 'y') '[{"size":40,"x":11,"y":10},{"size":50,"x":21,"y":20}]'
Test.specify "provided only recognized columns within bounds" <|
header = ['x', 'y' , 'bar' , 'size']
row_1 = [1 , 1 , '11' , 30 ]
row_2 = [11 , 10 , 'aa' , 40 ]
row_3 = [21 , 20 , 'bb' , 50 ]
row_4 = [31 , 30 , 'cc' , 60 ]
table = Table.from_rows header [row_1, row_2, row_3, row_4]
bounds = [0,5,25,25]
text = Scatter_Plot.process_to_json_text table bounds
expect_text text (labels 'x' 'y') '[{"size":40,"x":11,"y":10},{"size":50,"x":21,"y":20}]'
Test.specify "used specified numeric index for x if missing 'x' column from table" <|
header = [ 'y' , 'foo', 'bar', 'baz' , 'size']
row_1 = [ 10 , 'aa' , 12 , 14 , 40 ]
@ -96,6 +110,30 @@ spec =
vector = [0,10,20]
expect vector no_labels '[{"x":0,"y":0},{"x":1,"y":10},{"x":2,"y":20}]'
Test.specify "limit the number of elements" <|
vector = [0,10,20,30]
text = Scatter_Plot.process_to_json_text vector limit=2
json = Json.parse text
json.fields.keys.should_equal ['axis','data']
data = json.fields.get 'data'
data.unwrap.length . should_equal 2
Test.specify "limit the number of squared elements" <|
vector = (-15).up_to 15 . map (x -> x * x)
text = Scatter_Plot.process_to_json_text vector limit=10
json = Json.parse text
json.fields.keys.should_equal ['axis','data']
data = (json.fields.get 'data') . unwrap
data.length . should_equal 10
(data.take (First 3)).to_text . should_equal '[[[\'x\', 0], [\'y\', 225]], [[\'x\', 15], [\'y\', 0]], [[\'x\', 29], [\'y\', 196]]]'
Test.specify "filter the elements" <|
vector = [0,10,20,30]
bounds = [0,5,10,25]
text = Scatter_Plot.process_to_json_text vector bounds
expect_text text no_labels '[{"x":1,"y":10},{"x":2,"y":20}]'
Test.specify "using indices for x if given a column" <|
column = Column.from_vector 'some_col' [10,2,3]
expect column (labels 'index' 'some_col') '[{"x":0,"y":10},{"x":1,"y":2},{"x":2,"y":3}]'
@ -103,3 +141,5 @@ spec =
Test.specify "using indices for x if given a range" <|
value = 2.up_to 5
expect value no_labels '[{"x":0,"y":2},{"x":1,"y":3},{"x":2,"y":4}]'
main = Test.Suite.run_main spec