mirror of
https://github.com/ilyakooo0/urbit.git
synced 2024-12-01 11:33:41 +03:00
crunch: Add crunch library and generator
The crunch library and generator are tools to export group graph data to a CSV-format file. The purpose of crunch is to help quantify the growth of the Urbit network. The code added here is work done under the grant https://urbit.org/grants/bounties/analytics-script/
This commit is contained in:
parent
ceed4b78d0
commit
d43c579d78
27
pkg/arvo/gen/hood/crunch.hoon
Normal file
27
pkg/arvo/gen/hood/crunch.hoon
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
/- ms=metadata-store
|
||||||
|
/+ crunch
|
||||||
|
:- %say
|
||||||
|
|= [[now=@da * bec=beak] [csv-path=path from=@da ~] [to=@da groups=(list path) content=(unit ?) ~]]
|
||||||
|
=/ our=@p p.bec
|
||||||
|
:: get all graph associations ship is a part of
|
||||||
|
::
|
||||||
|
=/ associations=associations:ms
|
||||||
|
(~(scry-graph-associations crunch [our now]))
|
||||||
|
:: filter by input groups, if any (default: all from scry)
|
||||||
|
::
|
||||||
|
=/ filtered-associations=associations:ms
|
||||||
|
?~ groups
|
||||||
|
associations
|
||||||
|
%+ filter-associations-by-group-resources.crunch
|
||||||
|
associations
|
||||||
|
(paths-to-resources.crunch groups)
|
||||||
|
:: walk graphs to extract content
|
||||||
|
::
|
||||||
|
=/ file-content=wain
|
||||||
|
%: ~(walk-graph-associations crunch [our now])
|
||||||
|
filtered-associations
|
||||||
|
?~ content %.n u.content
|
||||||
|
from
|
||||||
|
?: =(*@da to) now to
|
||||||
|
==
|
||||||
|
[%helm-pass (note-write-csv-to-clay.crunch csv-path file-content)]
|
361
pkg/arvo/lib/crunch.hoon
Normal file
361
pkg/arvo/lib/crunch.hoon
Normal file
@ -0,0 +1,361 @@
|
|||||||
|
/- c=crunch, gs=graph-store, ms=metadata-store, p=post, r=resource
|
||||||
|
::
|
||||||
|
=<
|
||||||
|
|_ [our=ship now=@da]
|
||||||
|
++ walk-graph-associations
|
||||||
|
|= [=associations:ms content=? from=@da to=@da]
|
||||||
|
^- wain
|
||||||
|
:: graph resources in `our`; used to avoid scrying, e.g.,
|
||||||
|
:: a graph `our` has left and can no longer access
|
||||||
|
::
|
||||||
|
=/ accessible-graphs=(set resource:r) (scry-graph-resources)
|
||||||
|
%- ~(rep by associations)
|
||||||
|
|= [[=md-resource:ms =association:ms] out=wain]
|
||||||
|
^- wain
|
||||||
|
?> ?=(%graph app-name.md-resource)
|
||||||
|
?> ?=(%graph -.config.metadatum.association)
|
||||||
|
:: ensure graph, given by association, exists in `our`
|
||||||
|
::
|
||||||
|
?. (~(has in accessible-graphs) resource.md-resource)
|
||||||
|
out
|
||||||
|
:: scry the graph
|
||||||
|
::
|
||||||
|
=/ graph=(unit graph:gs) (scry-graph resource.md-resource)
|
||||||
|
?~ graph
|
||||||
|
out
|
||||||
|
:: prepare channel-info argument
|
||||||
|
::
|
||||||
|
=/ channel-info=channel-info:c
|
||||||
|
:*
|
||||||
|
group.association
|
||||||
|
resource.md-resource
|
||||||
|
module.config.metadatum.association
|
||||||
|
==
|
||||||
|
:: walk the graph
|
||||||
|
::
|
||||||
|
?+ module.config.metadatum.association
|
||||||
|
:: non-chat (e.g. links & notes)
|
||||||
|
::
|
||||||
|
%+ weld out
|
||||||
|
%:
|
||||||
|
walk-nested-graph-for-most-recent-entries
|
||||||
|
u.graph
|
||||||
|
content
|
||||||
|
channel-info
|
||||||
|
from
|
||||||
|
to
|
||||||
|
==
|
||||||
|
::
|
||||||
|
%chat
|
||||||
|
%+ weld out
|
||||||
|
%:
|
||||||
|
walk-chat-graph
|
||||||
|
u.graph
|
||||||
|
content
|
||||||
|
channel-info
|
||||||
|
from
|
||||||
|
to
|
||||||
|
==
|
||||||
|
==
|
||||||
|
::
|
||||||
|
++ scry-graph
|
||||||
|
|= graph-resource=resource:r
|
||||||
|
^- (unit graph:gs)
|
||||||
|
=/ scry-response=update:gs
|
||||||
|
.^ update:gs
|
||||||
|
%gx
|
||||||
|
(scot %p our)
|
||||||
|
%graph-store
|
||||||
|
(scot %da now)
|
||||||
|
%graph
|
||||||
|
(scot %p entity.graph-resource)
|
||||||
|
name.graph-resource
|
||||||
|
/noun
|
||||||
|
==
|
||||||
|
?. ?=(%add-graph -.q.scry-response)
|
||||||
|
~
|
||||||
|
?~ graph.q.scry-response
|
||||||
|
~
|
||||||
|
[~ graph.q.scry-response]
|
||||||
|
::
|
||||||
|
++ scry-graph-resources
|
||||||
|
|= ~
|
||||||
|
^- (set resource:r)
|
||||||
|
=/ scry-response=update:gs
|
||||||
|
.^ update:gs
|
||||||
|
%gx
|
||||||
|
(scot %p our)
|
||||||
|
%graph-store
|
||||||
|
(scot %da now)
|
||||||
|
/keys/noun
|
||||||
|
==
|
||||||
|
?. ?=(%keys -.q.scry-response)
|
||||||
|
~
|
||||||
|
resources.q.scry-response
|
||||||
|
:: helper arm for callers to get graph associations
|
||||||
|
:: to pass to `walk-graph-associations`
|
||||||
|
::
|
||||||
|
++ scry-graph-associations
|
||||||
|
|= ~
|
||||||
|
^- associations:ms
|
||||||
|
.^ associations:ms
|
||||||
|
%gx
|
||||||
|
(scot %p our)
|
||||||
|
%metadata-store
|
||||||
|
(scot %da now)
|
||||||
|
/app-name/graph/noun
|
||||||
|
==
|
||||||
|
--
|
||||||
|
::
|
||||||
|
|%
|
||||||
|
::
|
||||||
|
:: parsing and formatting
|
||||||
|
::
|
||||||
|
++ concatenate-cords (cury cat 3)
|
||||||
|
::
|
||||||
|
++ resource-to-cord
|
||||||
|
|= =resource:r
|
||||||
|
^- @t
|
||||||
|
:(concatenate-cords (scot %p entity.resource) '/' (scot %tas name.resource))
|
||||||
|
::
|
||||||
|
++ paths-to-resources
|
||||||
|
|= paxs=(list path)
|
||||||
|
^- (set resource:r)
|
||||||
|
%- ~(gas in *(set resource:r))
|
||||||
|
(turn paxs path-to-resource)
|
||||||
|
::
|
||||||
|
++ path-to-resource
|
||||||
|
|= pax=path
|
||||||
|
^- resource:r
|
||||||
|
=/ entity=@p (slav %p -.pax)
|
||||||
|
=/ name=@tas -.+.pax
|
||||||
|
[entity name]
|
||||||
|
::
|
||||||
|
++ escape-characters-in-cord
|
||||||
|
|= =cord
|
||||||
|
^- @t
|
||||||
|
%- crip
|
||||||
|
%- mesc
|
||||||
|
:: specific to CSVs: make sure content does not
|
||||||
|
:: contain commas (only allowed as delimiters)
|
||||||
|
::
|
||||||
|
%- replace-tape-commas-with-semicolons
|
||||||
|
%- trip
|
||||||
|
cord
|
||||||
|
::
|
||||||
|
++ replace-tape-commas-with-semicolons
|
||||||
|
|= string=tape
|
||||||
|
^- tape
|
||||||
|
=/ comma-indices=(list @ud) (fand "," string)
|
||||||
|
|-
|
||||||
|
^- tape
|
||||||
|
?~ comma-indices
|
||||||
|
string
|
||||||
|
$(string (snap string i.comma-indices ';'), comma-indices t.comma-indices)
|
||||||
|
::
|
||||||
|
++ contents-to-cord
|
||||||
|
|= contents=(list content:p)
|
||||||
|
^- @t
|
||||||
|
?~ contents
|
||||||
|
''
|
||||||
|
%+ join-cords
|
||||||
|
' '
|
||||||
|
(turn contents content-to-cord)
|
||||||
|
::
|
||||||
|
++ content-to-cord
|
||||||
|
|= =content:p
|
||||||
|
^- @t
|
||||||
|
?- -.content
|
||||||
|
%text (escape-characters-in-cord text.content)
|
||||||
|
%mention (scot %p ship.content)
|
||||||
|
%url url.content
|
||||||
|
%code expression.content :: TODO: also print output?
|
||||||
|
%reference (reference-content-to-cord reference.content)
|
||||||
|
==
|
||||||
|
::
|
||||||
|
++ reference-content-to-cord
|
||||||
|
|= =reference:p
|
||||||
|
^- @t
|
||||||
|
?- -.reference
|
||||||
|
%group (resource-to-cord group.reference)
|
||||||
|
%graph :(concatenate-cords (resource-to-cord group.reference) ': ' (resource-to-cord resource.uid.reference))
|
||||||
|
==
|
||||||
|
::
|
||||||
|
++ format-post-to-comma-separated-cord
|
||||||
|
|= [=post:gs =channel-info:c]
|
||||||
|
^- @t
|
||||||
|
%+ join-cords
|
||||||
|
','
|
||||||
|
:~
|
||||||
|
(scot %da time-sent.post)
|
||||||
|
(scot %p author.post)
|
||||||
|
(resource-to-cord group.channel-info)
|
||||||
|
(resource-to-cord channel.channel-info)
|
||||||
|
(scot %tas channel-type.channel-info)
|
||||||
|
:: exclude content; optionally add later
|
||||||
|
::
|
||||||
|
==
|
||||||
|
::
|
||||||
|
++ join-cords
|
||||||
|
|= [delimiter=@t cords=(list @t)]
|
||||||
|
^- @t
|
||||||
|
%+ roll cords
|
||||||
|
|= [cord=@t out=@t]
|
||||||
|
^- @t
|
||||||
|
?: =('' out)
|
||||||
|
:: don't put delimiter before first element
|
||||||
|
::
|
||||||
|
cord
|
||||||
|
:(concatenate-cords out delimiter cord)
|
||||||
|
::
|
||||||
|
:: walking graphs
|
||||||
|
::
|
||||||
|
++ walk-chat-graph
|
||||||
|
|= [=graph:gs content=? =channel-info:c from=@da to=@da]
|
||||||
|
^- wain
|
||||||
|
%- flop
|
||||||
|
%+ roll
|
||||||
|
:: filter by time
|
||||||
|
::
|
||||||
|
%+ only-nodes-older-than to
|
||||||
|
%+ only-nodes-newer-than from
|
||||||
|
~(val by graph)
|
||||||
|
|= [=node:gs out=wain]
|
||||||
|
^- wain
|
||||||
|
?- -.post.node
|
||||||
|
%|
|
||||||
|
:: do not output deleted posts
|
||||||
|
::
|
||||||
|
out
|
||||||
|
%&
|
||||||
|
?~ contents.p.post.node
|
||||||
|
:: do not output structural nodes
|
||||||
|
::
|
||||||
|
out
|
||||||
|
:_ out
|
||||||
|
=/ post-no-content=@t (format-post-to-comma-separated-cord p.post.node channel-info)
|
||||||
|
?- content
|
||||||
|
%| post-no-content
|
||||||
|
%&
|
||||||
|
%+ join-cords ','
|
||||||
|
~[post-no-content (contents-to-cord contents.p.post.node)]
|
||||||
|
==
|
||||||
|
==
|
||||||
|
::
|
||||||
|
++ walk-nested-graph-for-most-recent-entries
|
||||||
|
|= [=graph:gs content=? =channel-info:c from=@da to=@da]
|
||||||
|
^- wain
|
||||||
|
=| out=wain
|
||||||
|
=| most-recent-post-content=@t
|
||||||
|
=/ nodes
|
||||||
|
:: filter by time
|
||||||
|
::
|
||||||
|
%+ only-nodes-older-than to
|
||||||
|
%+ only-nodes-newer-than from
|
||||||
|
~(val by graph)
|
||||||
|
%- flop
|
||||||
|
|-
|
||||||
|
^- wain
|
||||||
|
?~ nodes
|
||||||
|
?: =('' most-recent-post-content)
|
||||||
|
:: don't return a cell: `['' ~]`
|
||||||
|
:: we want either an empty list `~`
|
||||||
|
:: or a list populated with actual entries
|
||||||
|
::
|
||||||
|
out
|
||||||
|
[most-recent-post-content out]
|
||||||
|
::
|
||||||
|
=? out ?=(%graph -.children.i.nodes)
|
||||||
|
%+ weld out
|
||||||
|
%:
|
||||||
|
walk-nested-graph-for-most-recent-entries
|
||||||
|
p.children.i.nodes
|
||||||
|
content
|
||||||
|
channel-info
|
||||||
|
from
|
||||||
|
to
|
||||||
|
==
|
||||||
|
::
|
||||||
|
?- -.post.i.nodes
|
||||||
|
%|
|
||||||
|
:: do not keep deleted posts
|
||||||
|
::
|
||||||
|
$(nodes t.nodes)
|
||||||
|
%&
|
||||||
|
?~ contents.p.post.i.nodes
|
||||||
|
:: do not keep structural nodes
|
||||||
|
::
|
||||||
|
$(nodes t.nodes)
|
||||||
|
=/ post-no-content=@t (format-post-to-comma-separated-cord p.post.i.nodes channel-info)
|
||||||
|
%= $
|
||||||
|
nodes t.nodes
|
||||||
|
most-recent-post-content
|
||||||
|
?- content
|
||||||
|
%| post-no-content
|
||||||
|
%&
|
||||||
|
%+ join-cords ','
|
||||||
|
~[post-no-content (contents-to-cord contents.p.post.i.nodes)]
|
||||||
|
==
|
||||||
|
==
|
||||||
|
==
|
||||||
|
::
|
||||||
|
:: filters
|
||||||
|
::
|
||||||
|
++ filter-associations-by-group-resources
|
||||||
|
|= [=associations:ms group-resources=(set resource:r)]
|
||||||
|
^- associations:ms
|
||||||
|
%- ~(rep by associations)
|
||||||
|
|= [[=md-resource:ms =association:ms] out=associations:ms]
|
||||||
|
^- associations:ms
|
||||||
|
?. (~(has in group-resources) group.association)
|
||||||
|
out
|
||||||
|
(~(put by out) md-resource association)
|
||||||
|
:: wrappers for intuitive use of `filter-nodes-by-timestamp`:
|
||||||
|
:: pass `nodes` as given by the `graph-store` scry and no
|
||||||
|
:: need to worry about comparators
|
||||||
|
::
|
||||||
|
++ only-nodes-older-than
|
||||||
|
|= [time=@da nodes=(list node:gs)]
|
||||||
|
(filter-nodes-by-timestamp nodes lte time)
|
||||||
|
::
|
||||||
|
++ only-nodes-newer-than
|
||||||
|
|= [time=@da nodes=(list node:gs)]
|
||||||
|
%- flop
|
||||||
|
(filter-nodes-by-timestamp (flop nodes) gte time)
|
||||||
|
::
|
||||||
|
++ filter-nodes-by-timestamp
|
||||||
|
|= [nodes=(list node:gs) comparator=$-([@ @] ?) time=@da]
|
||||||
|
=| out=(list node:gs)
|
||||||
|
:: return `out` in same time-order as `nodes`
|
||||||
|
::
|
||||||
|
%- flop
|
||||||
|
|-
|
||||||
|
^- (list node:gs)
|
||||||
|
?~ nodes
|
||||||
|
out
|
||||||
|
?- -.post.i.nodes
|
||||||
|
%|
|
||||||
|
:: skip deleted posts
|
||||||
|
::
|
||||||
|
$(nodes t.nodes)
|
||||||
|
%&
|
||||||
|
?. (comparator time-sent.p.post.i.nodes time)
|
||||||
|
:: assume:
|
||||||
|
:: * time is monotonic
|
||||||
|
:: * first `%.n` we hit indicates nodes further on are `%.n`
|
||||||
|
:: (i.e. `nodes` must be ordered st. they start `%.y`,
|
||||||
|
:: e.g. if want all `nodes` older than given time,
|
||||||
|
:: `nodes` must start with oldest and comparator is `lth`)
|
||||||
|
::
|
||||||
|
out
|
||||||
|
$(nodes t.nodes, out [i.nodes out])
|
||||||
|
==
|
||||||
|
::
|
||||||
|
:: io
|
||||||
|
::
|
||||||
|
++ note-write-csv-to-clay
|
||||||
|
|= [pax=path file-content=wain]
|
||||||
|
?> =(%csv (snag (dec (lent pax)) pax))
|
||||||
|
[%c [%info %home %& [pax %ins %csv !>(file-content)]~]]
|
||||||
|
::
|
||||||
|
--
|
15
pkg/arvo/mar/csv.hoon
Normal file
15
pkg/arvo/mar/csv.hoon
Normal file
@ -0,0 +1,15 @@
|
|||||||
|
=, format
|
||||||
|
=, mimes:html
|
||||||
|
|_ csv=wain
|
||||||
|
::
|
||||||
|
++ grab :: convert from
|
||||||
|
|%
|
||||||
|
++ mime |=((pair mite octs) (to-wain q.q))
|
||||||
|
++ noun wain :: clam from %noun
|
||||||
|
--
|
||||||
|
++ grow
|
||||||
|
|%
|
||||||
|
++ mime [/text/csv (as-octs (of-wain csv))]
|
||||||
|
--
|
||||||
|
++ grad %mime
|
||||||
|
--
|
9
pkg/arvo/sur/crunch.hoon
Normal file
9
pkg/arvo/sur/crunch.hoon
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
/- resource
|
||||||
|
::
|
||||||
|
|%
|
||||||
|
+$ channel-info
|
||||||
|
$: group=resource:resource
|
||||||
|
channel=resource:resource
|
||||||
|
channel-type=term
|
||||||
|
==
|
||||||
|
--
|
Loading…
Reference in New Issue
Block a user