mirror of
https://github.com/enso-org/enso.git
synced 2024-11-23 08:08:34 +03:00
Lazy scatterplot for Vector & Table (#3655)
First of all this PR demonstrates how to implement _lazy visualization_: - one needs to write/enhance Enso visualization libraries - this PR adds two optional parameters (`bounds` and `limit`) to `process_to_json_text` function. - the `process_to_json_text` can be tested by standard Enso test harness which this PR also does - then one has to modify JavaScript on the IDE side to construct `setPreprocessor` expression using the optional parameters The idea of _scatter plot lazy visualization_ is to limit the amount of points the IDE requests. Initially the limit is set to `limit=1024`. The `Scatter_Plot.enso` then processes the data and selects/generates the `limit` subset. Right now it includes `min`, `max` in both `x`, `y` axis plus randomly chosen points up to the `limit`. ![Zooming In](https://user-images.githubusercontent.com/26887752/185336126-f4fbd914-7fd8-4f0b-8377-178095401f46.png) The D3 visualization widget is capable of _zooming in_. When that happens the JavaScript widget composes new expression with `bounds` set to the newly visible area. By calling `setPreprocessor` the engine recomputes the visualization data, filters out any data outside of the `bounds` and selects another `limit` points from the new data. The IDE visualization then updates itself to display these more detailed data. Users can zoom-in to see the smallest detail where the number of points gets bellow `limit` or they can select _Fit all_ to see all the data without any `bounds`. # Important Notes Randomly selecting `limit` samples from the dataset may be misleading. Probably implementing _k-means clustering_ (where `k=limit`) would generate more representative approximation.
This commit is contained in:
parent
2507a2049b
commit
2b9352d2fc
@ -42,6 +42,7 @@
|
|||||||
visualizations. (Previously, they were placed to the left of the
|
visualizations. (Previously, they were placed to the left of the
|
||||||
visualizations.)
|
visualizations.)
|
||||||
- [Fixed histograms coloring and added a color legend.][3153]
|
- [Fixed histograms coloring and added a color legend.][3153]
|
||||||
|
- [Lazy visualization for scatter plot.][3655]
|
||||||
- [Fixed broken node whose expression contains non-ASCII characters.][3166]
|
- [Fixed broken node whose expression contains non-ASCII characters.][3166]
|
||||||
- [Fixed developer console warnings about views being created but not
|
- [Fixed developer console warnings about views being created but not
|
||||||
registered.][3181]
|
registered.][3181]
|
||||||
@ -185,6 +186,7 @@
|
|||||||
[debug-shortcuts]:
|
[debug-shortcuts]:
|
||||||
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
https://github.com/enso-org/enso/blob/develop/app/gui/docs/product/shortcuts.md#debug
|
||||||
[3153]: https://github.com/enso-org/enso/pull/3153
|
[3153]: https://github.com/enso-org/enso/pull/3153
|
||||||
|
[3655]: https://github.com/enso-org/enso/pull/3655
|
||||||
[3166]: https://github.com/enso-org/enso/pull/3166
|
[3166]: https://github.com/enso-org/enso/pull/3166
|
||||||
[3181]: https://github.com/enso-org/enso/pull/3181
|
[3181]: https://github.com/enso-org/enso/pull/3181
|
||||||
[3186]: https://github.com/enso-org/enso/pull/3186
|
[3186]: https://github.com/enso-org/enso/pull/3186
|
||||||
|
@ -17,6 +17,7 @@ const LINEAR_SCALE = 'linear'
|
|||||||
const LOGARITHMIC_SCALE = 'logarithmic'
|
const LOGARITHMIC_SCALE = 'logarithmic'
|
||||||
const VISIBLE_POINTS = 'visible'
|
const VISIBLE_POINTS = 'visible'
|
||||||
const BUTTONS_HEIGHT = 25
|
const BUTTONS_HEIGHT = 25
|
||||||
|
const DEFAULT_LIMIT = 1024
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A d3.js ScatterPlot visualization.
|
* A d3.js ScatterPlot visualization.
|
||||||
@ -49,7 +50,9 @@ class ScatterPlot extends Visualization {
|
|||||||
|
|
||||||
constructor(data) {
|
constructor(data) {
|
||||||
super(data)
|
super(data)
|
||||||
this.setPreprocessor('process_to_json_text', 'Standard.Visualization.Scatter_Plot')
|
this.bounds = null
|
||||||
|
this.limit = DEFAULT_LIMIT
|
||||||
|
this.updatePreprocessor()
|
||||||
this.dataPoints = []
|
this.dataPoints = []
|
||||||
this.axis = {
|
this.axis = {
|
||||||
x: { scale: LINEAR_SCALE },
|
x: { scale: LINEAR_SCALE },
|
||||||
@ -58,6 +61,14 @@ class ScatterPlot extends Visualization {
|
|||||||
this.points = { labels: VISIBLE_POINTS }
|
this.points = { labels: VISIBLE_POINTS }
|
||||||
}
|
}
|
||||||
|
|
||||||
|
updatePreprocessor() {
|
||||||
|
let fn = 'x -> process_to_json_text x limit=' + this.limit
|
||||||
|
if (this.bounds) {
|
||||||
|
fn += ' bounds=[' + this.bounds.join(',') + ']'
|
||||||
|
}
|
||||||
|
this.setPreprocessor(fn, 'Standard.Visualization.Scatter_Plot')
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Presents a scatterplot visualization after receiving `data`.
|
* Presents a scatterplot visualization after receiving `data`.
|
||||||
*/
|
*/
|
||||||
@ -377,6 +388,9 @@ class ScatterPlot extends Visualization {
|
|||||||
let yMin = zoom.transformedScale.yScale.invert(extent[1][1])
|
let yMin = zoom.transformedScale.yScale.invert(extent[1][1])
|
||||||
let yMax = zoom.transformedScale.yScale.invert(extent[0][1])
|
let yMax = zoom.transformedScale.yScale.invert(extent[0][1])
|
||||||
|
|
||||||
|
this.bounds = [xMin, yMin, xMax, yMax]
|
||||||
|
this.updatePreprocessor()
|
||||||
|
|
||||||
zoom.transformedScale.xScale.domain([xMin, xMax])
|
zoom.transformedScale.xScale.domain([xMin, xMax])
|
||||||
zoom.transformedScale.yScale.domain([yMin, yMax])
|
zoom.transformedScale.yScale.domain([yMin, yMax])
|
||||||
|
|
||||||
@ -726,7 +740,7 @@ class ScatterPlot extends Visualization {
|
|||||||
addStyleToElem(
|
addStyleToElem(
|
||||||
'button',
|
'button',
|
||||||
`
|
`
|
||||||
margin-left: 5px;
|
margin-left: 5px;
|
||||||
margin-bottom: 5px;
|
margin-bottom: 5px;
|
||||||
display: inline-block;
|
display: inline-block;
|
||||||
padding: 2px 10px;
|
padding: 2px 10px;
|
||||||
@ -806,6 +820,10 @@ class ScatterPlot extends Visualization {
|
|||||||
zoom.transformedScale.yScale.domain(domainY)
|
zoom.transformedScale.yScale.domain(domainY)
|
||||||
|
|
||||||
self.zoomingHelper(zoom.transformedScale, boxWidth, scatter, points)
|
self.zoomingHelper(zoom.transformedScale, boxWidth, scatter, points)
|
||||||
|
|
||||||
|
self.bounds = null
|
||||||
|
self.limit = DEFAULT_LIMIT
|
||||||
|
self.updatePreprocessor()
|
||||||
}
|
}
|
||||||
|
|
||||||
document.addEventListener('keydown', e => {
|
document.addEventListener('keydown', e => {
|
||||||
|
@ -3,6 +3,7 @@ from Standard.Base import all
|
|||||||
import Standard.Table.Data.Column
|
import Standard.Table.Data.Column
|
||||||
import Standard.Table.Data.Table
|
import Standard.Table.Data.Table
|
||||||
import Standard.Visualization.Helpers
|
import Standard.Visualization.Helpers
|
||||||
|
from Standard.Base.Data.Index_Sub_Range import Sample
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
|
||||||
@ -133,23 +134,63 @@ Table.Table.axes self =
|
|||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
Vector.Vector.point_data : Vector -> Object
|
Vector.Vector.point_data : Vector -> Object
|
||||||
Vector.Vector.point_data self =
|
Vector.Vector.point_data self =
|
||||||
self.map_with_index <| i-> elem->
|
self.map_with_index <| i-> elem->
|
||||||
Json.from_pairs [[X.name,i],[Y.name,elem]]
|
Json.from_pairs [[X.name,i],[Y.name,elem]]
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
json_from_table : Table -> Json
|
bound_data bounds data = case bounds of
|
||||||
json_from_table table =
|
Nothing -> data
|
||||||
data = table.point_data
|
_ ->
|
||||||
|
min_x = bounds.at 0
|
||||||
|
min_y = bounds.at 1
|
||||||
|
max_x = bounds.at 2
|
||||||
|
max_y = bounds.at 3
|
||||||
|
|
||||||
|
data.filter <| datum->
|
||||||
|
x = datum.get "x" . unwrap
|
||||||
|
y = datum.get "y" . unwrap
|
||||||
|
|
||||||
|
min_x<=x && x<=max_x && min_y<=y && y<=max_y
|
||||||
|
|
||||||
|
type Extreme min_x max_x min_y max_y
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
limit_data limit data = case limit of
|
||||||
|
Nothing -> data
|
||||||
|
_ -> if ((data.length <= limit) || (data.length == 0)) then data else
|
||||||
|
x datum = (datum.get "x").unwrap
|
||||||
|
y datum = (datum.get "y").unwrap
|
||||||
|
|
||||||
|
update_extreme current idx point =
|
||||||
|
new_min_x = if x current.min_x.second > x point then [idx, point] else current.min_x
|
||||||
|
new_min_y = if y current.min_y.second > y point then [idx, point] else current.min_y
|
||||||
|
new_max_x = if x current.max_x.second < x point then [idx, point] else current.max_x
|
||||||
|
new_max_y = if y current.max_y.second < y point then [idx, point] else current.max_y
|
||||||
|
Extreme new_min_x new_max_x new_min_y new_max_y
|
||||||
|
|
||||||
|
first = [0, data.first]
|
||||||
|
bounds = case data.fold_with_index (Extreme first first first first) update_extreme of
|
||||||
|
Extreme min_x max_x min_y max_y -> [min_x, max_x, min_y, max_y]
|
||||||
|
_ -> []
|
||||||
|
extreme = Map.from_vector bounds . values
|
||||||
|
|
||||||
|
if limit <= extreme.length then extreme.take (First limit) else
|
||||||
|
extreme + data.take (Sample (limit - extreme.length))
|
||||||
|
|
||||||
|
## PRIVATE
|
||||||
|
json_from_table : Table -> [Int]|Nothing -> Int|Nothing -> Json
|
||||||
|
json_from_table table bounds limit =
|
||||||
|
data = table.point_data |> bound_data bounds |> limit_data limit
|
||||||
axes = table.axes
|
axes = table.axes
|
||||||
Json.from_pairs <| [[data_field,data], [axis_field, axes]]
|
Json.from_pairs <| [[data_field,data], [axis_field, axes]]
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
json_from_vector : Vector Any -> Json
|
json_from_vector : Vector Any -> [Int]|Nothing -> Int|Nothing -> Json
|
||||||
json_from_vector vec =
|
json_from_vector vec bounds limit =
|
||||||
data = [data_field, vec.point_data]
|
data = vec.point_data |> bound_data bounds |> limit_data limit
|
||||||
axes = [axis_field, Nothing]
|
axes = [axis_field, Nothing]
|
||||||
Json.from_pairs [data, axes]
|
Json.from_pairs <| [[data_field, data], axes]
|
||||||
|
|
||||||
## PRIVATE
|
## PRIVATE
|
||||||
|
|
||||||
@ -160,12 +201,12 @@ json_from_vector vec =
|
|||||||
Arguments:
|
Arguments:
|
||||||
- value: the value to be visualized.
|
- value: the value to be visualized.
|
||||||
process_to_json_text : Any -> Text
|
process_to_json_text : Any -> Text
|
||||||
process_to_json_text value =
|
process_to_json_text value bounds=Nothing limit=Nothing =
|
||||||
json = case value of
|
json = case value of
|
||||||
Column.Column _ -> json_from_table value.to_table
|
Column.Column _ -> json_from_table value.to_table bounds limit
|
||||||
Table.Table _ -> json_from_table value
|
Table.Table _ -> json_from_table value bounds limit
|
||||||
Vector.Vector _ -> json_from_vector value
|
Vector.Vector _ -> json_from_vector value bounds limit
|
||||||
_ -> json_from_vector value.to_vector
|
_ -> json_from_vector value.to_vector bounds limit
|
||||||
|
|
||||||
json.to_text
|
json.to_text
|
||||||
|
|
||||||
|
@ -10,8 +10,7 @@ import Standard.Test
|
|||||||
import project
|
import project
|
||||||
|
|
||||||
spec =
|
spec =
|
||||||
expect value axis_expected_text data_expected_text =
|
expect_text text axis_expected_text data_expected_text =
|
||||||
text = Scatter_Plot.process_to_json_text value
|
|
||||||
json = Json.parse text
|
json = Json.parse text
|
||||||
json.fields.keys.should_equal ['axis','data']
|
json.fields.keys.should_equal ['axis','data']
|
||||||
|
|
||||||
@ -20,6 +19,10 @@ spec =
|
|||||||
expected_result = Json.from_pairs [expected_axis_labels, expected_data_pair]
|
expected_result = Json.from_pairs [expected_axis_labels, expected_data_pair]
|
||||||
json.should_equal expected_result
|
json.should_equal expected_result
|
||||||
|
|
||||||
|
expect value axis_expected_text data_expected_text =
|
||||||
|
text = Scatter_Plot.process_to_json_text value
|
||||||
|
expect_text text axis_expected_text data_expected_text
|
||||||
|
|
||||||
index = Scatter_Plot.index_name
|
index = Scatter_Plot.index_name
|
||||||
axis label = Json.from_pairs [['label',label]]
|
axis label = Json.from_pairs [['label',label]]
|
||||||
labels x y = Json.from_pairs [['x', axis x], ['y', axis y]] . to_text
|
labels x y = Json.from_pairs [['x', axis x], ['y', axis y]] . to_text
|
||||||
@ -70,6 +73,17 @@ spec =
|
|||||||
table = Table.from_rows header [row_1, row_2]
|
table = Table.from_rows header [row_1, row_2]
|
||||||
expect table (labels 'x' 'y') '[{"size":40,"x":11,"y":10},{"size":50,"x":21,"y":20}]'
|
expect table (labels 'x' 'y') '[{"size":40,"x":11,"y":10},{"size":50,"x":21,"y":20}]'
|
||||||
|
|
||||||
|
Test.specify "provided only recognized columns within bounds" <|
|
||||||
|
header = ['x', 'y' , 'bar' , 'size']
|
||||||
|
row_1 = [1 , 1 , '11' , 30 ]
|
||||||
|
row_2 = [11 , 10 , 'aa' , 40 ]
|
||||||
|
row_3 = [21 , 20 , 'bb' , 50 ]
|
||||||
|
row_4 = [31 , 30 , 'cc' , 60 ]
|
||||||
|
table = Table.from_rows header [row_1, row_2, row_3, row_4]
|
||||||
|
bounds = [0,5,25,25]
|
||||||
|
text = Scatter_Plot.process_to_json_text table bounds
|
||||||
|
expect_text text (labels 'x' 'y') '[{"size":40,"x":11,"y":10},{"size":50,"x":21,"y":20}]'
|
||||||
|
|
||||||
Test.specify "used specified numeric index for x if missing 'x' column from table" <|
|
Test.specify "used specified numeric index for x if missing 'x' column from table" <|
|
||||||
header = [ 'y' , 'foo', 'bar', 'baz' , 'size']
|
header = [ 'y' , 'foo', 'bar', 'baz' , 'size']
|
||||||
row_1 = [ 10 , 'aa' , 12 , 14 , 40 ]
|
row_1 = [ 10 , 'aa' , 12 , 14 , 40 ]
|
||||||
@ -96,6 +110,30 @@ spec =
|
|||||||
vector = [0,10,20]
|
vector = [0,10,20]
|
||||||
expect vector no_labels '[{"x":0,"y":0},{"x":1,"y":10},{"x":2,"y":20}]'
|
expect vector no_labels '[{"x":0,"y":0},{"x":1,"y":10},{"x":2,"y":20}]'
|
||||||
|
|
||||||
|
Test.specify "limit the number of elements" <|
|
||||||
|
vector = [0,10,20,30]
|
||||||
|
text = Scatter_Plot.process_to_json_text vector limit=2
|
||||||
|
json = Json.parse text
|
||||||
|
json.fields.keys.should_equal ['axis','data']
|
||||||
|
data = json.fields.get 'data'
|
||||||
|
data.unwrap.length . should_equal 2
|
||||||
|
|
||||||
|
|
||||||
|
Test.specify "limit the number of squared elements" <|
|
||||||
|
vector = (-15).up_to 15 . map (x -> x * x)
|
||||||
|
text = Scatter_Plot.process_to_json_text vector limit=10
|
||||||
|
json = Json.parse text
|
||||||
|
json.fields.keys.should_equal ['axis','data']
|
||||||
|
data = (json.fields.get 'data') . unwrap
|
||||||
|
data.length . should_equal 10
|
||||||
|
(data.take (First 3)).to_text . should_equal '[[[\'x\', 0], [\'y\', 225]], [[\'x\', 15], [\'y\', 0]], [[\'x\', 29], [\'y\', 196]]]'
|
||||||
|
|
||||||
|
Test.specify "filter the elements" <|
|
||||||
|
vector = [0,10,20,30]
|
||||||
|
bounds = [0,5,10,25]
|
||||||
|
text = Scatter_Plot.process_to_json_text vector bounds
|
||||||
|
expect_text text no_labels '[{"x":1,"y":10},{"x":2,"y":20}]'
|
||||||
|
|
||||||
Test.specify "using indices for x if given a column" <|
|
Test.specify "using indices for x if given a column" <|
|
||||||
column = Column.from_vector 'some_col' [10,2,3]
|
column = Column.from_vector 'some_col' [10,2,3]
|
||||||
expect column (labels 'index' 'some_col') '[{"x":0,"y":10},{"x":1,"y":2},{"x":2,"y":3}]'
|
expect column (labels 'index' 'some_col') '[{"x":0,"y":10},{"x":1,"y":2},{"x":2,"y":3}]'
|
||||||
@ -103,3 +141,5 @@ spec =
|
|||||||
Test.specify "using indices for x if given a range" <|
|
Test.specify "using indices for x if given a range" <|
|
||||||
value = 2.up_to 5
|
value = 2.up_to 5
|
||||||
expect value no_labels '[{"x":0,"y":2},{"x":1,"y":3},{"x":2,"y":4}]'
|
expect value no_labels '[{"x":0,"y":2},{"x":1,"y":3},{"x":2,"y":4}]'
|
||||||
|
|
||||||
|
main = Test.Suite.run_main spec
|
||||||
|
Loading…
Reference in New Issue
Block a user