mirror of
https://github.com/enso-org/enso.git
synced 2024-11-27 05:15:42 +03:00
Lazy scatterplot for Vector & Table (#3655)
First of all this PR demonstrates how to implement _lazy visualization_: - one needs to write/enhance Enso visualization libraries - this PR adds two optional parameters (`bounds` and `limit`) to `process_to_json_text` function. - the `process_to_json_text` can be tested by standard Enso test harness which this PR also does - then one has to modify JavaScript on the IDE side to construct `setPreprocessor` expression using the optional parameters The idea of _scatter plot lazy visualization_ is to limit the amount of points the IDE requests. Initially the limit is set to `limit=1024`. The `Scatter_Plot.enso` then processes the data and selects/generates the `limit` subset. Right now it includes `min`, `max` in both `x`, `y` axis plus randomly chosen points up to the `limit`. ![Zooming In](https://user-images.githubusercontent.com/26887752/185336126-f4fbd914-7fd8-4f0b-8377-178095401f46.png) The D3 visualization widget is capable of _zooming in_. When that happens the JavaScript widget composes new expression with `bounds` set to the newly visible area. By calling `setPreprocessor` the engine recomputes the visualization data, filters out any data outside of the `bounds` and selects another `limit` points from the new data. The IDE visualization then updates itself to display these more detailed data. Users can zoom-in to see the smallest detail where the number of points gets bellow `limit` or they can select _Fit all_ to see all the data without any `bounds`. # Important Notes Randomly selecting `limit` samples from the dataset may be misleading. Probably implementing _k-means clustering_ (where `k=limit`) would generate more representative approximation.
This commit is contained in:
parent
2507a2049b
commit
2b9352d2fc
@ -42,6 +42,7 @@
|
||||
visualizations. (Previously, they were placed to the left of the
|
||||
visualizations.)
|
||||
- [Fixed histograms coloring and added a color legend.][3153]
|
||||
- [Lazy visualization for scatter plot.][3655]
|
||||
- [Fixed broken node whose expression contains non-ASCII characters.][3166]
|
||||
- [Fixed developer console warnings about views being created but not
|
||||
registered.][3181]
|
||||
@ -185,6 +186,7 @@
|
||||
[debug-shortcuts]:
|
||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||
[3153]: https://github.com/enso-org/enso/pull/3153
|
||||
[3655]: https://github.com/enso-org/enso/pull/3655
|
||||
[3166]: https://github.com/enso-org/enso/pull/3166
|
||||
[3181]: https://github.com/enso-org/enso/pull/3181
|
||||
[3186]: https://github.com/enso-org/enso/pull/3186
|
||||
|
@ -17,6 +17,7 @@ const LINEAR_SCALE = 'linear'
|
||||
const LOGARITHMIC_SCALE = 'logarithmic'
|
||||
const VISIBLE_POINTS = 'visible'
|
||||
const BUTTONS_HEIGHT = 25
|
||||
const DEFAULT_LIMIT = 1024
|
||||
|
||||
/**
|
||||
* A d3.js ScatterPlot visualization.
|
||||
@ -49,7 +50,9 @@ class ScatterPlot extends Visualization {
|
||||
|
||||
constructor(data) {
|
||||
super(data)
|
||||
this.setPreprocessor('process_to_json_text', 'Standard.Visualization.Scatter_Plot')
|
||||
this.bounds = null
|
||||
this.limit = DEFAULT_LIMIT
|
||||
this.updatePreprocessor()
|
||||
this.dataPoints = []
|
||||
this.axis = {
|
||||
x: { scale: LINEAR_SCALE },
|
||||
@ -58,6 +61,14 @@ class ScatterPlot extends Visualization {
|
||||
this.points = { labels: VISIBLE_POINTS }
|
||||
}
|
||||
|
||||
updatePreprocessor() {
|
||||
let fn = 'x -> process_to_json_text x limit=' + this.limit
|
||||
if (this.bounds) {
|
||||
fn += ' bounds=[' + this.bounds.join(',') + ']'
|
||||
}
|
||||
this.setPreprocessor(fn, 'Standard.Visualization.Scatter_Plot')
|
||||
}
|
||||
|
||||
/**
|
||||
* Presents a scatterplot visualization after receiving `data`.
|
||||
*/
|
||||
@ -377,6 +388,9 @@ class ScatterPlot extends Visualization {
|
||||
let yMin = zoom.transformedScale.yScale.invert(extent[1][1])
|
||||
let yMax = zoom.transformedScale.yScale.invert(extent[0][1])
|
||||
|
||||
this.bounds = [xMin, yMin, xMax, yMax]
|
||||
this.updatePreprocessor()
|
||||
|
||||
zoom.transformedScale.xScale.domain([xMin, xMax])
|
||||
zoom.transformedScale.yScale.domain([yMin, yMax])
|
||||
|
||||
@ -806,6 +820,10 @@ class ScatterPlot extends Visualization {
|
||||
zoom.transformedScale.yScale.domain(domainY)
|
||||
|
||||
self.zoomingHelper(zoom.transformedScale, boxWidth, scatter, points)
|
||||
|
||||
self.bounds = null
|
||||
self.limit = DEFAULT_LIMIT
|
||||
self.updatePreprocessor()
|
||||
}
|
||||
|
||||
document.addEventListener('keydown', e => {
|
||||
|
@ -3,6 +3,7 @@ from Standard.Base import all
|
||||
import Standard.Table.Data.Column
|
||||
import Standard.Table.Data.Table
|
||||
import Standard.Visualization.Helpers
|
||||
from Standard.Base.Data.Index_Sub_Range import Sample
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -133,23 +134,63 @@ Table.Table.axes self =
|
||||
|
||||
## PRIVATE
|
||||
Vector.Vector.point_data : Vector -> Object
|
||||
Vector.Vector.point_data self =
|
||||
self.map_with_index <| i-> elem->
|
||||
Json.from_pairs [[X.name,i],[Y.name,elem]]
|
||||
Vector.Vector.point_data self =
|
||||
self.map_with_index <| i-> elem->
|
||||
Json.from_pairs [[X.name,i],[Y.name,elem]]
|
||||
|
||||
## PRIVATE
|
||||
json_from_table : Table -> Json
|
||||
json_from_table table =
|
||||
data = table.point_data
|
||||
bound_data bounds data = case bounds of
|
||||
Nothing -> data
|
||||
_ ->
|
||||
min_x = bounds.at 0
|
||||
min_y = bounds.at 1
|
||||
max_x = bounds.at 2
|
||||
max_y = bounds.at 3
|
||||
|
||||
data.filter <| datum->
|
||||
x = datum.get "x" . unwrap
|
||||
y = datum.get "y" . unwrap
|
||||
|
||||
min_x<=x && x<=max_x && min_y<=y && y<=max_y
|
||||
|
||||
type Extreme min_x max_x min_y max_y
|
||||
|
||||
## PRIVATE
|
||||
limit_data limit data = case limit of
|
||||
Nothing -> data
|
||||
_ -> if ((data.length <= limit) || (data.length == 0)) then data else
|
||||
x datum = (datum.get "x").unwrap
|
||||
y datum = (datum.get "y").unwrap
|
||||
|
||||
update_extreme current idx point =
|
||||
new_min_x = if x current.min_x.second > x point then [idx, point] else current.min_x
|
||||
new_min_y = if y current.min_y.second > y point then [idx, point] else current.min_y
|
||||
new_max_x = if x current.max_x.second < x point then [idx, point] else current.max_x
|
||||
new_max_y = if y current.max_y.second < y point then [idx, point] else current.max_y
|
||||
Extreme new_min_x new_max_x new_min_y new_max_y
|
||||
|
||||
first = [0, data.first]
|
||||
bounds = case data.fold_with_index (Extreme first first first first) update_extreme of
|
||||
Extreme min_x max_x min_y max_y -> [min_x, max_x, min_y, max_y]
|
||||
_ -> []
|
||||
extreme = Map.from_vector bounds . values
|
||||
|
||||
if limit <= extreme.length then extreme.take (First limit) else
|
||||
extreme + data.take (Sample (limit - extreme.length))
|
||||
|
||||
## PRIVATE
|
||||
json_from_table : Table -> [Int]|Nothing -> Int|Nothing -> Json
|
||||
json_from_table table bounds limit =
|
||||
data = table.point_data |> bound_data bounds |> limit_data limit
|
||||
axes = table.axes
|
||||
Json.from_pairs <| [[data_field,data], [axis_field, axes]]
|
||||
|
||||
## PRIVATE
|
||||
json_from_vector : Vector Any -> Json
|
||||
json_from_vector vec =
|
||||
data = [data_field, vec.point_data]
|
||||
json_from_vector : Vector Any -> [Int]|Nothing -> Int|Nothing -> Json
|
||||
json_from_vector vec bounds limit =
|
||||
data = vec.point_data |> bound_data bounds |> limit_data limit
|
||||
axes = [axis_field, Nothing]
|
||||
Json.from_pairs [data, axes]
|
||||
Json.from_pairs <| [[data_field, data], axes]
|
||||
|
||||
## PRIVATE
|
||||
|
||||
@ -160,12 +201,12 @@ json_from_vector vec =
|
||||
Arguments:
|
||||
- value: the value to be visualized.
|
||||
process_to_json_text : Any -> Text
|
||||
process_to_json_text value =
|
||||
process_to_json_text value bounds=Nothing limit=Nothing =
|
||||
json = case value of
|
||||
Column.Column _ -> json_from_table value.to_table
|
||||
Table.Table _ -> json_from_table value
|
||||
Vector.Vector _ -> json_from_vector value
|
||||
_ -> json_from_vector value.to_vector
|
||||
Column.Column _ -> json_from_table value.to_table bounds limit
|
||||
Table.Table _ -> json_from_table value bounds limit
|
||||
Vector.Vector _ -> json_from_vector value bounds limit
|
||||
_ -> json_from_vector value.to_vector bounds limit
|
||||
|
||||
json.to_text
|
||||
|
||||
|
@ -10,8 +10,7 @@ import Standard.Test
|
||||
import project
|
||||
|
||||
spec =
|
||||
expect value axis_expected_text data_expected_text =
|
||||
text = Scatter_Plot.process_to_json_text value
|
||||
expect_text text axis_expected_text data_expected_text =
|
||||
json = Json.parse text
|
||||
json.fields.keys.should_equal ['axis','data']
|
||||
|
||||
@ -20,6 +19,10 @@ spec =
|
||||
expected_result = Json.from_pairs [expected_axis_labels, expected_data_pair]
|
||||
json.should_equal expected_result
|
||||
|
||||
expect value axis_expected_text data_expected_text =
|
||||
text = Scatter_Plot.process_to_json_text value
|
||||
expect_text text axis_expected_text data_expected_text
|
||||
|
||||
index = Scatter_Plot.index_name
|
||||
axis label = Json.from_pairs [['label',label]]
|
||||
labels x y = Json.from_pairs [['x', axis x], ['y', axis y]] . to_text
|
||||
@ -70,6 +73,17 @@ spec =
|
||||
table = Table.from_rows header [row_1, row_2]
|
||||
expect table (labels 'x' 'y') '[{"size":40,"x":11,"y":10},{"size":50,"x":21,"y":20}]'
|
||||
|
||||
Test.specify "provided only recognized columns within bounds" <|
|
||||
header = ['x', 'y' , 'bar' , 'size']
|
||||
row_1 = [1 , 1 , '11' , 30 ]
|
||||
row_2 = [11 , 10 , 'aa' , 40 ]
|
||||
row_3 = [21 , 20 , 'bb' , 50 ]
|
||||
row_4 = [31 , 30 , 'cc' , 60 ]
|
||||
table = Table.from_rows header [row_1, row_2, row_3, row_4]
|
||||
bounds = [0,5,25,25]
|
||||
text = Scatter_Plot.process_to_json_text table bounds
|
||||
expect_text text (labels 'x' 'y') '[{"size":40,"x":11,"y":10},{"size":50,"x":21,"y":20}]'
|
||||
|
||||
Test.specify "used specified numeric index for x if missing 'x' column from table" <|
|
||||
header = [ 'y' , 'foo', 'bar', 'baz' , 'size']
|
||||
row_1 = [ 10 , 'aa' , 12 , 14 , 40 ]
|
||||
@ -96,6 +110,30 @@ spec =
|
||||
vector = [0,10,20]
|
||||
expect vector no_labels '[{"x":0,"y":0},{"x":1,"y":10},{"x":2,"y":20}]'
|
||||
|
||||
Test.specify "limit the number of elements" <|
|
||||
vector = [0,10,20,30]
|
||||
text = Scatter_Plot.process_to_json_text vector limit=2
|
||||
json = Json.parse text
|
||||
json.fields.keys.should_equal ['axis','data']
|
||||
data = json.fields.get 'data'
|
||||
data.unwrap.length . should_equal 2
|
||||
|
||||
|
||||
Test.specify "limit the number of squared elements" <|
|
||||
vector = (-15).up_to 15 . map (x -> x * x)
|
||||
text = Scatter_Plot.process_to_json_text vector limit=10
|
||||
json = Json.parse text
|
||||
json.fields.keys.should_equal ['axis','data']
|
||||
data = (json.fields.get 'data') . unwrap
|
||||
data.length . should_equal 10
|
||||
(data.take (First 3)).to_text . should_equal '[[[\'x\', 0], [\'y\', 225]], [[\'x\', 15], [\'y\', 0]], [[\'x\', 29], [\'y\', 196]]]'
|
||||
|
||||
Test.specify "filter the elements" <|
|
||||
vector = [0,10,20,30]
|
||||
bounds = [0,5,10,25]
|
||||
text = Scatter_Plot.process_to_json_text vector bounds
|
||||
expect_text text no_labels '[{"x":1,"y":10},{"x":2,"y":20}]'
|
||||
|
||||
Test.specify "using indices for x if given a column" <|
|
||||
column = Column.from_vector 'some_col' [10,2,3]
|
||||
expect column (labels 'index' 'some_col') '[{"x":0,"y":10},{"x":1,"y":2},{"x":2,"y":3}]'
|
||||
@ -103,3 +141,5 @@ spec =
|
||||
Test.specify "using indices for x if given a range" <|
|
||||
value = 2.up_to 5
|
||||
expect value no_labels '[{"x":0,"y":2},{"x":1,"y":3},{"x":2,"y":4}]'
|
||||
|
||||
main = Test.Suite.run_main spec
|
||||
|
Loading…
Reference in New Issue
Block a user