mirror of
https://github.com/tloncorp/landscape.git
synced 2024-11-24 09:42:26 +03:00
vitals: initial commit of connectivity check agent
This commit is contained in:
parent
2d6e599d04
commit
ff3c55e6a2
47
desk/app/vitals-tester.hoon
Normal file
47
desk/app/vitals-tester.hoon
Normal file
@ -0,0 +1,47 @@
|
||||
/- v=vitals
|
||||
/+ default-agent
|
||||
^- agent:gall
|
||||
=>
|
||||
|%
|
||||
+$ card card:agent:gall
|
||||
--
|
||||
|_ =bowl:gall
|
||||
+* this .
|
||||
def ~(. (default-agent this %|) bowl)
|
||||
++ on-init on-init:def
|
||||
++ on-save on-save:def
|
||||
++ on-load on-load:def
|
||||
++ on-poke
|
||||
|= [=mark =vase]
|
||||
^- (quip card _this)
|
||||
?+ mark (on-poke:def mark vase)
|
||||
%sub
|
||||
?> =(our.bowl src.bowl)
|
||||
=+ !<(=ship vase)
|
||||
:_ this
|
||||
:~ :* %pass /updates
|
||||
%agent [our.bowl %vitals]
|
||||
%watch /status/(scot %p ship)
|
||||
== ==
|
||||
==
|
||||
++ on-watch on-watch:def
|
||||
++ on-leave on-leave:def
|
||||
++ on-peek on-peek:def
|
||||
++ on-agent
|
||||
|= [=wire =sign:agent:gall]
|
||||
^- (quip card _this)
|
||||
?+ wire (on-agent:def wire sign)
|
||||
[%updates ~]
|
||||
?+ -.sign (on-agent:def wire sign)
|
||||
%fact
|
||||
?+ p.cage.sign (on-agent:def wire sign)
|
||||
%vitals-result
|
||||
=/ =result:v !<(result:v q.cage.sign)
|
||||
~& result
|
||||
`this
|
||||
==
|
||||
==
|
||||
==
|
||||
++ on-arvo on-arvo:def
|
||||
++ on-fail on-fail:def
|
||||
--
|
234
desk/app/vitals.hoon
Normal file
234
desk/app/vitals.hoon
Normal file
@ -0,0 +1,234 @@
|
||||
:: This app provides a batteries-included view of your connectivity with any
|
||||
:: ship. An interface should be able to get all relevant information directly
|
||||
:: from this app and display it to the user. For this reason, it includes many
|
||||
:: "pending" states and reasons for failure.
|
||||
::
|
||||
:: To access these statuses, subscribe to /ship/~sampel-palnet for each ship
|
||||
:: you care about, and then send pokes of the form [%run-check ~sampel-palnet]
|
||||
:: to kick off the connectivity check.
|
||||
::
|
||||
:: You can also scry to /ship/~sampel-palnet to get the latest information
|
||||
:: (with timestamp), but you must poke to actively test the state. The special
|
||||
:: scry paths /sponsor and /galaxy give immediate access to connectivity state
|
||||
:: with our direct sponsor & galaxy, which will always be up-to-date (within
|
||||
:: the last minute).
|
||||
::
|
||||
:: Internally, when we receive a %run-check poke, we start a thread to attempt
|
||||
:: to contact that ship, and if it is taking too long, we will investigate and
|
||||
:: report possible reasons. As the thread progresses, it will give updates on
|
||||
:: its investigation by poking the app with `%update-status`, and these updates
|
||||
:: will immediately go out to subscribers.
|
||||
::
|
||||
:: TODO:
|
||||
:: V: Replace foreign sponsor ack/nack with remote scry
|
||||
:: W: Remote scry foreign sponsor child connection from Ames directly
|
||||
:: X: Add ping to Ames connection state and surface to %vitals
|
||||
:: Y: Register ships for regularly scheduled connectivity checks
|
||||
:: Z: Replace legacy subscription with SSS
|
||||
::
|
||||
/- v=vitals
|
||||
/+ lib=vitals
|
||||
/+ dbug, default-agent, verb
|
||||
^- agent:gall
|
||||
=>
|
||||
|%
|
||||
+$ card card:agent:gall
|
||||
+$ versioned-state current-state
|
||||
+$ current-state
|
||||
$: %0
|
||||
connections=(map ship result:v)
|
||||
==
|
||||
--
|
||||
=| current-state
|
||||
=* state -
|
||||
=<
|
||||
%+ verb |
|
||||
%- agent:dbug
|
||||
|_ =bowl:gall
|
||||
+* this .
|
||||
def ~(. (default-agent this %|) bowl)
|
||||
cor ~(. +> [bowl ~])
|
||||
++ on-init on-init:def
|
||||
::
|
||||
++ on-save
|
||||
!>(state)
|
||||
::
|
||||
++ on-load
|
||||
|= =vase
|
||||
^- (quip card _this)
|
||||
=^ cards state
|
||||
abet:(load:cor vase)
|
||||
[cards this]
|
||||
::
|
||||
++ on-poke
|
||||
|= [=mark =vase]
|
||||
^- (quip card _this)
|
||||
=^ cards state
|
||||
abet:(poke:cor mark vase)
|
||||
[cards this]
|
||||
++ on-watch
|
||||
|= =path
|
||||
^- (quip card _this)
|
||||
=^ cards state
|
||||
abet:(watch:cor path)
|
||||
[cards this]
|
||||
::
|
||||
++ on-peek peek:cor
|
||||
::
|
||||
++ on-leave on-leave:def
|
||||
::
|
||||
++ on-fail on-fail:def
|
||||
::
|
||||
++ on-agent on-agent:def
|
||||
::
|
||||
++ on-arvo
|
||||
|= [=wire sign=sign-arvo]
|
||||
^- (quip card _this)
|
||||
=^ cards state
|
||||
abet:(arvo:cor wire sign)
|
||||
[cards this]
|
||||
--
|
||||
|_ [=bowl:gall cards=(list card)]
|
||||
+* our our.bowl
|
||||
now now.bowl
|
||||
src src.bowl
|
||||
++ cor .
|
||||
++ abet [(flop cards) state]
|
||||
++ emit |=(=card cor(cards [card cards]))
|
||||
++ emil |=(caz=(list card) cor(cards (welp (flop caz) cards)))
|
||||
++ give |=(=gift:agent:gall (emit %give gift))
|
||||
++ load
|
||||
|= =vase
|
||||
=/ loaded-state !<(versioned-state vase)
|
||||
?- -.loaded-state
|
||||
%0 cor(connections connections.loaded-state)
|
||||
==
|
||||
++ poke
|
||||
|= [=mark =vase]
|
||||
^+ cor
|
||||
?+ mark ~|([%bad-mark mark] !!)
|
||||
::
|
||||
:: private pokes
|
||||
::
|
||||
%update-status
|
||||
?> =(our src)
|
||||
=+ !<(=update:v vase)
|
||||
=. connections (~(put by connections) update)
|
||||
%- give
|
||||
:* %fact
|
||||
~[/status/(scot %p -.update)]
|
||||
%vitals-result
|
||||
!>(+.update)
|
||||
==
|
||||
::
|
||||
%run-check
|
||||
?> =(our src)
|
||||
=+ !<(=ship vase)
|
||||
?> =(~ (find ~[our] (saxo:title our now ship)))
|
||||
=/ stat=(unit result:v) (~(get by connections) ship)
|
||||
:: XX: code duplicated because of annoying type issue
|
||||
?~ stat
|
||||
=. connections (~(put by connections) [ship now %pending %setting-up ~])
|
||||
%- emit
|
||||
:* %pass
|
||||
/check-result/(scot %p ship)
|
||||
%arvo
|
||||
%k
|
||||
%fard
|
||||
%garden :: XX: %landscape?
|
||||
%vitals-connection-check
|
||||
%noun
|
||||
!>(ship)
|
||||
==
|
||||
?: ?=(%pending -.status.u.stat)
|
||||
cor
|
||||
=. connections (~(put by connections) [ship now %pending %setting-up ~])
|
||||
%- emit
|
||||
:: [%pass [wire] %arvo %k %fard [beak or desk] [thread-name] [mark] [vase]]
|
||||
:* %pass
|
||||
/check-result/(scot %p ship)
|
||||
%arvo
|
||||
%k
|
||||
%fard
|
||||
%garden :: XX: %landscape?
|
||||
%vitals-connection-check
|
||||
%noun
|
||||
!>(ship)
|
||||
==
|
||||
::
|
||||
:: public pokes
|
||||
::
|
||||
%ship
|
||||
=+ !<(=ship vase)
|
||||
?< =(our ship)
|
||||
?< =(~ (find ~[our] (saxo:title our now ship)))
|
||||
?. ?=([%live *] (scry-qos:lib our now ship))
|
||||
!!
|
||||
cor
|
||||
==
|
||||
++ watch
|
||||
|= =path
|
||||
^+ cor
|
||||
?+ path ~|([%evil-watch path] !!)
|
||||
::
|
||||
[%status @ta ~]
|
||||
%- give
|
||||
:* %fact
|
||||
~[path]
|
||||
%vitals-result
|
||||
!> %+ fall
|
||||
(~(get by connections) (slav %p i.t.path))
|
||||
[*@da %complete %no-data ~]
|
||||
==
|
||||
==
|
||||
++ peek
|
||||
|= =path
|
||||
^- (unit (unit cage))
|
||||
?+ path [~ ~]
|
||||
::
|
||||
[%x %sponsor ~]
|
||||
?< ?=(%czar (clan:title our))
|
||||
%- some
|
||||
%- some
|
||||
:- %vitals-qos
|
||||
!> (scry-qos:lib our now (sein:title our now our))
|
||||
::
|
||||
[%x %galaxy ~]
|
||||
?< ?=(%czar (clan:title our))
|
||||
%- some
|
||||
%- some
|
||||
:- %vitals-qos
|
||||
!> (scry-qos:lib our now (rear (saxo:title our now our)))
|
||||
::
|
||||
[%x %ship @tas ~]
|
||||
%- some
|
||||
%- some
|
||||
:- %vitals-result
|
||||
!>
|
||||
%+ fall (~(get by connections) (slav %p i.t.t.path))
|
||||
[*@da %complete %no-data ~]
|
||||
==
|
||||
++ arvo
|
||||
|= [=wire sign=sign-arvo]
|
||||
^+ cor
|
||||
?+ wire ~|([%bad-arvo-take wire] !!)
|
||||
[%check-result @ta ~]
|
||||
?> ?=(%khan -.sign)
|
||||
?> ?=(%arow +<.sign)
|
||||
=/ =ship (slav %p i.t.wire)
|
||||
=/ =result:v
|
||||
:- now
|
||||
:- %complete
|
||||
?- -.p.sign
|
||||
%& !<(complete:v q.p.p.sign)
|
||||
%| [%crash tang.p.p.sign]
|
||||
==
|
||||
=. connections (~(put by connections) [ship result])
|
||||
%- give
|
||||
:* %fact
|
||||
~[/status/(scot %p ship)]
|
||||
%vitals-result
|
||||
!>(result)
|
||||
==
|
||||
==
|
||||
--
|
@ -7,4 +7,5 @@
|
||||
%storage
|
||||
%reel
|
||||
%bait
|
||||
%vitals
|
||||
==
|
||||
|
24
desk/lib/vitals.hoon
Normal file
24
desk/lib/vitals.hoon
Normal file
@ -0,0 +1,24 @@
|
||||
/- vitals
|
||||
|%
|
||||
++ simplify-qos
|
||||
|= =ship-state:ames
|
||||
^- qos:ames
|
||||
?- -.ship-state
|
||||
%alien [%dead *@da]
|
||||
%known ?+ -.qos.ship-state qos.ship-state
|
||||
%unborn [%dead last-contact.qos.ship-state]
|
||||
== ==
|
||||
++ scry-qos
|
||||
|= [=ship =time peer=ship]
|
||||
^- qos:ames
|
||||
%- simplify-qos
|
||||
.^ ship-state:ames
|
||||
%ax
|
||||
(scot %p ship)
|
||||
%$
|
||||
(scot %da time)
|
||||
%peers
|
||||
(scot %p peer)
|
||||
~
|
||||
==
|
||||
--
|
19
desk/mar/vitals/qos.hoon
Normal file
19
desk/mar/vitals/qos.hoon
Normal file
@ -0,0 +1,19 @@
|
||||
/- *vitals
|
||||
=, format
|
||||
|_ =qos:ames
|
||||
++ grab
|
||||
|%
|
||||
++ noun qos
|
||||
--
|
||||
++ grow
|
||||
|%
|
||||
++ noun qos
|
||||
++ json
|
||||
%- frond:enjs
|
||||
:- 'status'
|
||||
?+ -.qos [%s -.qos]
|
||||
%unborn !!
|
||||
==
|
||||
--
|
||||
++ grad %noun
|
||||
--
|
40
desk/mar/vitals/result.hoon
Normal file
40
desk/mar/vitals/result.hoon
Normal file
@ -0,0 +1,40 @@
|
||||
/- *vitals
|
||||
=, format
|
||||
|_ =result
|
||||
++ grab
|
||||
|%
|
||||
++ noun result
|
||||
--
|
||||
++ grow
|
||||
|%
|
||||
++ noun result
|
||||
++ json
|
||||
%- pairs:enjs
|
||||
:~
|
||||
['timestamp' (time:enjs timestamp.result)]
|
||||
::
|
||||
:- 'status'
|
||||
%- pairs:enjs
|
||||
?- -.status.result
|
||||
%pending
|
||||
:- ['pending' [%s -.p.status.result]]
|
||||
?+ -.p.status.result ~
|
||||
%trying-sponsor ['ship' (ship:enjs ship.p.status.result)]~
|
||||
==
|
||||
::
|
||||
%complete
|
||||
:- ['complete' [%s -.p.status.result]]
|
||||
?+ -.p.status.result ~
|
||||
%no-our-planet ['last-contact' (time:enjs last-contact.p.status.result)]~
|
||||
%no-our-sponsor ['last-contact' (time:enjs last-contact.p.status.result)]~
|
||||
%no-our-galaxy ['last-contact' (time:enjs last-contact.p.status.result)]~
|
||||
%no-sponsor-hit ['ship' (ship:enjs ship.p.status.result)]~
|
||||
%no-sponsor-miss ['ship' (ship:enjs ship.p.status.result)]~
|
||||
%no-their-galaxy ['last-contact' (time:enjs last-contact.p.status.result)]~
|
||||
%crash ['crash' (tang:enjs tang.p.status.result)]~
|
||||
==
|
||||
==
|
||||
==
|
||||
--
|
||||
++ grad %noun
|
||||
--
|
44
desk/sur/vitals.hoon
Normal file
44
desk/sur/vitals.hoon
Normal file
@ -0,0 +1,44 @@
|
||||
|%
|
||||
+| %constants
|
||||
::
|
||||
++ info-timeout ~s30
|
||||
::
|
||||
++ target-timeout ~s10
|
||||
::
|
||||
+| %types
|
||||
::
|
||||
+$ result
|
||||
$: timestamp=@da
|
||||
=status
|
||||
==
|
||||
::
|
||||
+$ status
|
||||
$% [%complete p=complete]
|
||||
[%pending p=pending]
|
||||
==
|
||||
::
|
||||
+$ complete
|
||||
$% [%yes ~]
|
||||
[%no-data ~] :: yet to test connectivity for ship
|
||||
[%no-dns ~] :: can't even talk to example.com
|
||||
[%no-our-planet last-contact=@da] :: can't reach our own planet (moon only)
|
||||
[%no-our-galaxy last-contact=@da] :: can't reach our own galaxy
|
||||
[%no-sponsor-hit =ship] :: their sponsor can reach their ship
|
||||
[%no-sponsor-miss =ship] :: their sponsor can't reach their ship
|
||||
[%no-their-galaxy last-contact=@da] :: can't reach their galaxy
|
||||
[%crash =tang] :: check crashed
|
||||
==
|
||||
::
|
||||
+$ pending
|
||||
$% [%setting-up ~]
|
||||
[%trying-dns ~]
|
||||
[%trying-local ~]
|
||||
[%trying-target ~]
|
||||
[%trying-sponsor =ship]
|
||||
==
|
||||
::
|
||||
+$ update
|
||||
$: =ship
|
||||
=result
|
||||
==
|
||||
--
|
177
desk/ted/vitals/connection-check.hoon
Normal file
177
desk/ted/vitals/connection-check.hoon
Normal file
@ -0,0 +1,177 @@
|
||||
:: XX: As an alternative implementation, we could perform these checks (mostly)
|
||||
:: in parallel. In that case, we shouldn't return immediately when a check
|
||||
:: completes, but instead record the results and then check them after some
|
||||
:: timeout (e.g. 30s).
|
||||
::
|
||||
/- spider, vitals
|
||||
/+ io=strandio, lib-vitals=vitals
|
||||
=, strand=strand:spider
|
||||
^- thread:spider
|
||||
|= arg=vase
|
||||
=/ m (strand ,vase)
|
||||
^- form:m
|
||||
=+ !<(target=ship arg)
|
||||
;< our=@p bind:m get-our:io
|
||||
|^
|
||||
:: early exit; check if we have live path to target
|
||||
;< now=@da bind:m get-time:io
|
||||
;< tqos=qos:ames bind:m (get-qos target)
|
||||
?: ?& ?=(%live -.tqos)
|
||||
(gth last-contact.tqos (sub now info-timeout:vitals))
|
||||
==
|
||||
(post-result [%yes ~])
|
||||
:: set pending to %trying-dns
|
||||
:: XX: can we use the strand cards for these?
|
||||
;< ~ bind:m (update-status [%trying-dns ~])
|
||||
:: check if we can fetch example.com
|
||||
;< ~ bind:m (send-request:io [%'GET' 'http://example.com' ~ ~])
|
||||
;< =client-response:iris bind:m take-client-response:io
|
||||
?. ?& ?=(%finished -.client-response)
|
||||
=(200 status-code.response-header.client-response)
|
||||
==
|
||||
(post-result [%no-dns ~])
|
||||
:: set pending to %trying-local
|
||||
;< ~ bind:m (update-status [%trying-local ~])
|
||||
:: check if we can contact our own galaxy
|
||||
;< gqos=qos:ames bind:m (scry:io qos:ames ~[%gx %vitals %galaxy %vitals-qos])
|
||||
?. ?=(%live -.gqos)
|
||||
(post-result [%no-our-galaxy last-contact.gqos])
|
||||
:: set pending to %trying-target
|
||||
;< ~ bind:m (update-status [%trying-target ~])
|
||||
:: check if we can contact target (with timeout)
|
||||
;< chek=(unit) bind:m (check-online target target-timeout:vitals)
|
||||
?: ?=([%$ %$] chek)
|
||||
(post-result [%yes ~])
|
||||
:: if we're a moon, check if we can contact our planet
|
||||
::
|
||||
:: NN: failing to contact our sponsor is only a failure condition for moons,
|
||||
:: since currently only moons receive additional routing help from their
|
||||
:: sponsors
|
||||
:: NN: we do this after the initial target check because if we're a moon and
|
||||
:: our planet is down, it's useful to talk to ships that still have live
|
||||
:: wires (e.g. for troubleshooting); thus, by waiting to perform this
|
||||
:: check, we don't report %no-our-planet for every connectivity check
|
||||
:: when attempting to track down a live peer from whom to seek help
|
||||
::
|
||||
;< sqos=qos:ames
|
||||
bind:m
|
||||
=/ mm (strand ,qos:ames)
|
||||
^- form:mm
|
||||
?. ?=(%earl (clan:title our))
|
||||
(pure:mm [%live *@da])
|
||||
(scry:io qos:ames ~[%gx %vitals %sponsor %vitals-qos])
|
||||
?. ?=(%live -.sqos)
|
||||
(post-result [%no-our-planet last-contact.sqos])
|
||||
:: early exit; if target is a galaxy, there's nothing more we can check
|
||||
?: ?=(%czar (clan:title target))
|
||||
(galaxy-down target)
|
||||
:: check if target sponsors can reach target
|
||||
;< saxo=(list ship) bind:m (scry:io (list ship) ~[%j %saxo (scot %p target)])
|
||||
=/ sponsors
|
||||
?~ saxo ~
|
||||
t.saxo
|
||||
|-
|
||||
:: case impossible:
|
||||
:: - early exit for target = galaxy
|
||||
:: - base case is sponsor = galaxy
|
||||
?~ sponsors !!
|
||||
:: set pending to %trying-sponsor
|
||||
;< ~ bind:m (update-status [%trying-sponsor i.sponsors])
|
||||
:: ask sponsor if he has live wire to target
|
||||
;< live=(unit ?) bind:m (ask-sponsor i.sponsors)
|
||||
:: if timeout...
|
||||
?~ live
|
||||
:: ... and sponsor is galaxy ...
|
||||
?: ?=(%czar (clan:title i.sponsors))
|
||||
:: ... it's so over
|
||||
(galaxy-down i.sponsors)
|
||||
:: ... otherwise, check next sponsor
|
||||
$(sponsors t.sponsors)
|
||||
:: report whether sponsor can reach target
|
||||
%- post-result
|
||||
?: u.live
|
||||
[%no-sponsor-hit i.sponsors]
|
||||
[%no-sponsor-miss i.sponsors]
|
||||
++ update-status
|
||||
|= =pending:vitals
|
||||
=/ m (strand ,~)
|
||||
^- form:m
|
||||
;< now=@da bind:m get-time:io
|
||||
%+ poke-our:io
|
||||
%vitals
|
||||
:- %update-status
|
||||
!>
|
||||
^- update:vitals
|
||||
[target now %pending pending]
|
||||
++ get-qos
|
||||
|= =ship
|
||||
=/ m (strand ,qos:ames)
|
||||
^- form:m
|
||||
;< state=ship-state:ames
|
||||
bind:m
|
||||
(scry:io ship-state:ames ~[%ax %$ %peers (scot %p ship)])
|
||||
(pure:m (simplify-qos:lib-vitals state))
|
||||
++ galaxy-down
|
||||
|= galaxy=ship
|
||||
=/ m (strand ,vase)
|
||||
^- form:m
|
||||
;< =qos:ames bind:m (get-qos galaxy)
|
||||
(post-result [%no-their-galaxy last-contact.qos])
|
||||
++ post-result
|
||||
|= =complete:vitals
|
||||
=/ m (strand ,vase)
|
||||
^- form:m
|
||||
(pure:m !>(complete))
|
||||
++ ask-sponsor
|
||||
|= sponsor=ship
|
||||
=/ m (strand ,(unit ?))
|
||||
^- form:m
|
||||
%- (handle-err ,?)
|
||||
%+ (set-timeout:io ,?) target-timeout:vitals
|
||||
:: XX: currently returns [~ |] if the sponsor doesn't have %vitals running
|
||||
;< ~
|
||||
bind:(strand ,?)
|
||||
%- send-raw-card:io
|
||||
:* %pass
|
||||
/poke
|
||||
%agent
|
||||
[sponsor %vitals]
|
||||
%poke
|
||||
%ship
|
||||
!>(target)
|
||||
==
|
||||
|= tin=strand-input:strand
|
||||
?+ in.tin `[%skip ~]
|
||||
~ `[%wait ~]
|
||||
::
|
||||
[~ %agent * %poke-ack *]
|
||||
?. =(/poke wire.u.in.tin)
|
||||
`[%skip ~]
|
||||
?~ p.sign.u.in.tin
|
||||
`[%done &]
|
||||
`[%done |]
|
||||
==
|
||||
++ check-online
|
||||
|= [who=ship lag=@dr]
|
||||
=/ m (strand ,(unit))
|
||||
^- form:m
|
||||
%- (handle-err ,~)
|
||||
%+ (set-timeout:io ,~) lag
|
||||
=/ n (strand ,~)
|
||||
;< ~ bind:n (poke:io [who %hood] %helm-hi !>(~))
|
||||
(pure:n ~)
|
||||
++ handle-err
|
||||
|* computation-result=mold
|
||||
=/ m (strand ,(unit computation-result))
|
||||
=/ n (strand ,computation-result)
|
||||
|= computation=form:n
|
||||
^- form:m
|
||||
|= tin=strand-input:strand
|
||||
=* loop $
|
||||
=/ c-res (computation tin)
|
||||
?+ -.next.c-res c-res
|
||||
%cont c-res(self.next ..loop(computation self.next.c-res))
|
||||
%fail c-res(next [%done ~])
|
||||
%done c-res(value.next (some value.next.c-res))
|
||||
==
|
||||
--
|
Loading…
Reference in New Issue
Block a user