mirror of
https://github.com/ilyakooo0/urbit.git
synced 2024-12-01 03:23:09 +03:00
crunch: Add crunch library and generator
The crunch library and generator are tools to export group graph data to a CSV-format file. The purpose of crunch is to help quantify the growth of the Urbit network. The code added here is work done under the grant https://urbit.org/grants/bounties/analytics-script/
This commit is contained in:
parent
ceed4b78d0
commit
d43c579d78
27
pkg/arvo/gen/hood/crunch.hoon
Normal file
27
pkg/arvo/gen/hood/crunch.hoon
Normal file
@ -0,0 +1,27 @@
|
||||
/- ms=metadata-store
|
||||
/+ crunch
|
||||
:- %say
|
||||
|= [[now=@da * bec=beak] [csv-path=path from=@da ~] [to=@da groups=(list path) content=(unit ?) ~]]
|
||||
=/ our=@p p.bec
|
||||
:: get all graph associations ship is a part of
|
||||
::
|
||||
=/ associations=associations:ms
|
||||
(~(scry-graph-associations crunch [our now]))
|
||||
:: filter by input groups, if any (default: all from scry)
|
||||
::
|
||||
=/ filtered-associations=associations:ms
|
||||
?~ groups
|
||||
associations
|
||||
%+ filter-associations-by-group-resources.crunch
|
||||
associations
|
||||
(paths-to-resources.crunch groups)
|
||||
:: walk graphs to extract content
|
||||
::
|
||||
=/ file-content=wain
|
||||
%: ~(walk-graph-associations crunch [our now])
|
||||
filtered-associations
|
||||
?~ content %.n u.content
|
||||
from
|
||||
?: =(*@da to) now to
|
||||
==
|
||||
[%helm-pass (note-write-csv-to-clay.crunch csv-path file-content)]
|
361
pkg/arvo/lib/crunch.hoon
Normal file
361
pkg/arvo/lib/crunch.hoon
Normal file
@ -0,0 +1,361 @@
|
||||
/- c=crunch, gs=graph-store, ms=metadata-store, p=post, r=resource
|
||||
::
|
||||
=<
|
||||
|_ [our=ship now=@da]
|
||||
++ walk-graph-associations
|
||||
|= [=associations:ms content=? from=@da to=@da]
|
||||
^- wain
|
||||
:: graph resources in `our`; used to avoid scrying, e.g.,
|
||||
:: a graph `our` has left and can no longer access
|
||||
::
|
||||
=/ accessible-graphs=(set resource:r) (scry-graph-resources)
|
||||
%- ~(rep by associations)
|
||||
|= [[=md-resource:ms =association:ms] out=wain]
|
||||
^- wain
|
||||
?> ?=(%graph app-name.md-resource)
|
||||
?> ?=(%graph -.config.metadatum.association)
|
||||
:: ensure graph, given by association, exists in `our`
|
||||
::
|
||||
?. (~(has in accessible-graphs) resource.md-resource)
|
||||
out
|
||||
:: scry the graph
|
||||
::
|
||||
=/ graph=(unit graph:gs) (scry-graph resource.md-resource)
|
||||
?~ graph
|
||||
out
|
||||
:: prepare channel-info argument
|
||||
::
|
||||
=/ channel-info=channel-info:c
|
||||
:*
|
||||
group.association
|
||||
resource.md-resource
|
||||
module.config.metadatum.association
|
||||
==
|
||||
:: walk the graph
|
||||
::
|
||||
?+ module.config.metadatum.association
|
||||
:: non-chat (e.g. links & notes)
|
||||
::
|
||||
%+ weld out
|
||||
%:
|
||||
walk-nested-graph-for-most-recent-entries
|
||||
u.graph
|
||||
content
|
||||
channel-info
|
||||
from
|
||||
to
|
||||
==
|
||||
::
|
||||
%chat
|
||||
%+ weld out
|
||||
%:
|
||||
walk-chat-graph
|
||||
u.graph
|
||||
content
|
||||
channel-info
|
||||
from
|
||||
to
|
||||
==
|
||||
==
|
||||
::
|
||||
++ scry-graph
|
||||
|= graph-resource=resource:r
|
||||
^- (unit graph:gs)
|
||||
=/ scry-response=update:gs
|
||||
.^ update:gs
|
||||
%gx
|
||||
(scot %p our)
|
||||
%graph-store
|
||||
(scot %da now)
|
||||
%graph
|
||||
(scot %p entity.graph-resource)
|
||||
name.graph-resource
|
||||
/noun
|
||||
==
|
||||
?. ?=(%add-graph -.q.scry-response)
|
||||
~
|
||||
?~ graph.q.scry-response
|
||||
~
|
||||
[~ graph.q.scry-response]
|
||||
::
|
||||
++ scry-graph-resources
|
||||
|= ~
|
||||
^- (set resource:r)
|
||||
=/ scry-response=update:gs
|
||||
.^ update:gs
|
||||
%gx
|
||||
(scot %p our)
|
||||
%graph-store
|
||||
(scot %da now)
|
||||
/keys/noun
|
||||
==
|
||||
?. ?=(%keys -.q.scry-response)
|
||||
~
|
||||
resources.q.scry-response
|
||||
:: helper arm for callers to get graph associations
|
||||
:: to pass to `walk-graph-associations`
|
||||
::
|
||||
++ scry-graph-associations
|
||||
|= ~
|
||||
^- associations:ms
|
||||
.^ associations:ms
|
||||
%gx
|
||||
(scot %p our)
|
||||
%metadata-store
|
||||
(scot %da now)
|
||||
/app-name/graph/noun
|
||||
==
|
||||
--
|
||||
::
|
||||
|%
|
||||
::
|
||||
:: parsing and formatting
|
||||
::
|
||||
++ concatenate-cords (cury cat 3)
|
||||
::
|
||||
++ resource-to-cord
|
||||
|= =resource:r
|
||||
^- @t
|
||||
:(concatenate-cords (scot %p entity.resource) '/' (scot %tas name.resource))
|
||||
::
|
||||
++ paths-to-resources
|
||||
|= paxs=(list path)
|
||||
^- (set resource:r)
|
||||
%- ~(gas in *(set resource:r))
|
||||
(turn paxs path-to-resource)
|
||||
::
|
||||
++ path-to-resource
|
||||
|= pax=path
|
||||
^- resource:r
|
||||
=/ entity=@p (slav %p -.pax)
|
||||
=/ name=@tas -.+.pax
|
||||
[entity name]
|
||||
::
|
||||
++ escape-characters-in-cord
|
||||
|= =cord
|
||||
^- @t
|
||||
%- crip
|
||||
%- mesc
|
||||
:: specific to CSVs: make sure content does not
|
||||
:: contain commas (only allowed as delimiters)
|
||||
::
|
||||
%- replace-tape-commas-with-semicolons
|
||||
%- trip
|
||||
cord
|
||||
::
|
||||
++ replace-tape-commas-with-semicolons
|
||||
|= string=tape
|
||||
^- tape
|
||||
=/ comma-indices=(list @ud) (fand "," string)
|
||||
|-
|
||||
^- tape
|
||||
?~ comma-indices
|
||||
string
|
||||
$(string (snap string i.comma-indices ';'), comma-indices t.comma-indices)
|
||||
::
|
||||
++ contents-to-cord
|
||||
|= contents=(list content:p)
|
||||
^- @t
|
||||
?~ contents
|
||||
''
|
||||
%+ join-cords
|
||||
' '
|
||||
(turn contents content-to-cord)
|
||||
::
|
||||
++ content-to-cord
|
||||
|= =content:p
|
||||
^- @t
|
||||
?- -.content
|
||||
%text (escape-characters-in-cord text.content)
|
||||
%mention (scot %p ship.content)
|
||||
%url url.content
|
||||
%code expression.content :: TODO: also print output?
|
||||
%reference (reference-content-to-cord reference.content)
|
||||
==
|
||||
::
|
||||
++ reference-content-to-cord
|
||||
|= =reference:p
|
||||
^- @t
|
||||
?- -.reference
|
||||
%group (resource-to-cord group.reference)
|
||||
%graph :(concatenate-cords (resource-to-cord group.reference) ': ' (resource-to-cord resource.uid.reference))
|
||||
==
|
||||
::
|
||||
++ format-post-to-comma-separated-cord
|
||||
|= [=post:gs =channel-info:c]
|
||||
^- @t
|
||||
%+ join-cords
|
||||
','
|
||||
:~
|
||||
(scot %da time-sent.post)
|
||||
(scot %p author.post)
|
||||
(resource-to-cord group.channel-info)
|
||||
(resource-to-cord channel.channel-info)
|
||||
(scot %tas channel-type.channel-info)
|
||||
:: exclude content; optionally add later
|
||||
::
|
||||
==
|
||||
::
|
||||
++ join-cords
|
||||
|= [delimiter=@t cords=(list @t)]
|
||||
^- @t
|
||||
%+ roll cords
|
||||
|= [cord=@t out=@t]
|
||||
^- @t
|
||||
?: =('' out)
|
||||
:: don't put delimiter before first element
|
||||
::
|
||||
cord
|
||||
:(concatenate-cords out delimiter cord)
|
||||
::
|
||||
:: walking graphs
|
||||
::
|
||||
++ walk-chat-graph
|
||||
|= [=graph:gs content=? =channel-info:c from=@da to=@da]
|
||||
^- wain
|
||||
%- flop
|
||||
%+ roll
|
||||
:: filter by time
|
||||
::
|
||||
%+ only-nodes-older-than to
|
||||
%+ only-nodes-newer-than from
|
||||
~(val by graph)
|
||||
|= [=node:gs out=wain]
|
||||
^- wain
|
||||
?- -.post.node
|
||||
%|
|
||||
:: do not output deleted posts
|
||||
::
|
||||
out
|
||||
%&
|
||||
?~ contents.p.post.node
|
||||
:: do not output structural nodes
|
||||
::
|
||||
out
|
||||
:_ out
|
||||
=/ post-no-content=@t (format-post-to-comma-separated-cord p.post.node channel-info)
|
||||
?- content
|
||||
%| post-no-content
|
||||
%&
|
||||
%+ join-cords ','
|
||||
~[post-no-content (contents-to-cord contents.p.post.node)]
|
||||
==
|
||||
==
|
||||
::
|
||||
++ walk-nested-graph-for-most-recent-entries
|
||||
|= [=graph:gs content=? =channel-info:c from=@da to=@da]
|
||||
^- wain
|
||||
=| out=wain
|
||||
=| most-recent-post-content=@t
|
||||
=/ nodes
|
||||
:: filter by time
|
||||
::
|
||||
%+ only-nodes-older-than to
|
||||
%+ only-nodes-newer-than from
|
||||
~(val by graph)
|
||||
%- flop
|
||||
|-
|
||||
^- wain
|
||||
?~ nodes
|
||||
?: =('' most-recent-post-content)
|
||||
:: don't return a cell: `['' ~]`
|
||||
:: we want either an empty list `~`
|
||||
:: or a list populated with actual entries
|
||||
::
|
||||
out
|
||||
[most-recent-post-content out]
|
||||
::
|
||||
=? out ?=(%graph -.children.i.nodes)
|
||||
%+ weld out
|
||||
%:
|
||||
walk-nested-graph-for-most-recent-entries
|
||||
p.children.i.nodes
|
||||
content
|
||||
channel-info
|
||||
from
|
||||
to
|
||||
==
|
||||
::
|
||||
?- -.post.i.nodes
|
||||
%|
|
||||
:: do not keep deleted posts
|
||||
::
|
||||
$(nodes t.nodes)
|
||||
%&
|
||||
?~ contents.p.post.i.nodes
|
||||
:: do not keep structural nodes
|
||||
::
|
||||
$(nodes t.nodes)
|
||||
=/ post-no-content=@t (format-post-to-comma-separated-cord p.post.i.nodes channel-info)
|
||||
%= $
|
||||
nodes t.nodes
|
||||
most-recent-post-content
|
||||
?- content
|
||||
%| post-no-content
|
||||
%&
|
||||
%+ join-cords ','
|
||||
~[post-no-content (contents-to-cord contents.p.post.i.nodes)]
|
||||
==
|
||||
==
|
||||
==
|
||||
::
|
||||
:: filters
|
||||
::
|
||||
++ filter-associations-by-group-resources
|
||||
|= [=associations:ms group-resources=(set resource:r)]
|
||||
^- associations:ms
|
||||
%- ~(rep by associations)
|
||||
|= [[=md-resource:ms =association:ms] out=associations:ms]
|
||||
^- associations:ms
|
||||
?. (~(has in group-resources) group.association)
|
||||
out
|
||||
(~(put by out) md-resource association)
|
||||
:: wrappers for intuitive use of `filter-nodes-by-timestamp`:
|
||||
:: pass `nodes` as given by the `graph-store` scry and no
|
||||
:: need to worry about comparators
|
||||
::
|
||||
++ only-nodes-older-than
|
||||
|= [time=@da nodes=(list node:gs)]
|
||||
(filter-nodes-by-timestamp nodes lte time)
|
||||
::
|
||||
++ only-nodes-newer-than
|
||||
|= [time=@da nodes=(list node:gs)]
|
||||
%- flop
|
||||
(filter-nodes-by-timestamp (flop nodes) gte time)
|
||||
::
|
||||
++ filter-nodes-by-timestamp
|
||||
|= [nodes=(list node:gs) comparator=$-([@ @] ?) time=@da]
|
||||
=| out=(list node:gs)
|
||||
:: return `out` in same time-order as `nodes`
|
||||
::
|
||||
%- flop
|
||||
|-
|
||||
^- (list node:gs)
|
||||
?~ nodes
|
||||
out
|
||||
?- -.post.i.nodes
|
||||
%|
|
||||
:: skip deleted posts
|
||||
::
|
||||
$(nodes t.nodes)
|
||||
%&
|
||||
?. (comparator time-sent.p.post.i.nodes time)
|
||||
:: assume:
|
||||
:: * time is monotonic
|
||||
:: * first `%.n` we hit indicates nodes further on are `%.n`
|
||||
:: (i.e. `nodes` must be ordered st. they start `%.y`,
|
||||
:: e.g. if want all `nodes` older than given time,
|
||||
:: `nodes` must start with oldest and comparator is `lth`)
|
||||
::
|
||||
out
|
||||
$(nodes t.nodes, out [i.nodes out])
|
||||
==
|
||||
::
|
||||
:: io
|
||||
::
|
||||
++ note-write-csv-to-clay
|
||||
|= [pax=path file-content=wain]
|
||||
?> =(%csv (snag (dec (lent pax)) pax))
|
||||
[%c [%info %home %& [pax %ins %csv !>(file-content)]~]]
|
||||
::
|
||||
--
|
15
pkg/arvo/mar/csv.hoon
Normal file
15
pkg/arvo/mar/csv.hoon
Normal file
@ -0,0 +1,15 @@
|
||||
=, format
|
||||
=, mimes:html
|
||||
|_ csv=wain
|
||||
::
|
||||
++ grab :: convert from
|
||||
|%
|
||||
++ mime |=((pair mite octs) (to-wain q.q))
|
||||
++ noun wain :: clam from %noun
|
||||
--
|
||||
++ grow
|
||||
|%
|
||||
++ mime [/text/csv (as-octs (of-wain csv))]
|
||||
--
|
||||
++ grad %mime
|
||||
--
|
9
pkg/arvo/sur/crunch.hoon
Normal file
9
pkg/arvo/sur/crunch.hoon
Normal file
@ -0,0 +1,9 @@
|
||||
/- resource
|
||||
::
|
||||
|%
|
||||
+$ channel-info
|
||||
$: group=resource:resource
|
||||
channel=resource:resource
|
||||
channel-type=term
|
||||
==
|
||||
--
|
Loading…
Reference in New Issue
Block a user