From 47eac6dbc73172e30c5f0c739861759ff03340c3 Mon Sep 17 00:00:00 2001 From: Anton Dyudin Date: Tue, 15 Mar 2016 15:09:40 -0700 Subject: [PATCH] better handling of identity in web scrapers --- app/dojo.hoon | 25 ++++++++++++++----------- gen/curl-hiss.hoon | 4 ++-- gen/curl.hoon | 1 + sur/sole.hoon | 8 +++++--- 4 files changed, 22 insertions(+), 16 deletions(-) diff --git a/app/dojo.hoon b/app/dojo.hoon index b6cb57f45..cf67692dd 100644 --- a/app/dojo.hoon +++ b/app/dojo.hoon @@ -32,7 +32,7 @@ {$pill p/path} :: noun to unix pill :: {$tree p/path} :: noun to unix tree {$file p/beam} :: save to clay - {$http p/?($post $put) q/iden r/purl} :: http outbound + {$http p/?($post $put) q/(unit iden) r/purl} :: http outbound {$poke p/goal} :: poke app {$show p/?($0 $1 $2 $3)} :: print val+span+twig {$verb p/term} :: store variable @@ -42,7 +42,7 @@ q/dojo-build :: general build == :: ++ dojo-build :: one arvo step - $% {$ur p/iden q/purl} :: http GET request + $% {$ur p/(unit iden) q/purl} :: http GET request {$ge p/dojo-model} :: generator {$dv p/path} :: core from source {$ex p/twig} :: hoon expression @@ -87,7 +87,7 @@ ++ card :: general card $% {$diff $sole-effect sole-effect} :: {$send wire {ship term} clap} :: - {$hiss wire {$~ iden} mark {$hiss hiss}} :: + {$hiss wire (unit iden) mark {$hiss hiss}} :: {$exec wire @p (unit {beak silk})} :: {$deal wire sock term club} :: {$info wire @p toro} :: @@ -167,8 +167,8 @@ ;~(plug (cold %file tar) dp-beam) ;~(plug (cold %flat pat) (most fas qut)) ;~(plug (cold %pill dot) (most fas sym)) - ;~(plug (cold %http lus) (easy %post) dp-iden-url) - ;~(plug (cold %http hep) (easy %put) dp-iden-url) + ;~(plug (cold %http lus) (stag %post dp-iden-url)) + ;~(plug (cold %http hep) (stag %put dp-iden-url)) (stag %show (cook $?($1 $2 $3) (cook lent (stun [1 3] wut)))) == ++ dp-hooves :: hoof list @@ -221,7 +221,7 @@ (sear plex:vez (stag %conl poor:vez)) :: ++ dp-iden-url - (cook |=({a/(unit iden) b/purl} [(fall a *iden) b]) auru:epur) + (cook |=({a/(unit iden) b/purl} [`(fall a *iden) b]) auru:epur) :: ++ dp-model ;~(plug dp-server dp-config) :: ++dojo-model ++ dp-path (tope he-beam) :: ++path @@ -269,10 +269,10 @@ (he-card(poy `+>+<(pux `way)) %exec way our.hid `[he-beak kas]) :: ++ dy-eyre :: send work to eyre - |= {way/wire usr/iden req/hiss} + |= {way/wire usr/(unit iden) req/hiss} ^+ +>+> ?> ?=($~ pux) - (he-card(poy `+>+<(pux `way)) %hiss way `usr %httr %hiss req) + (he-card(poy `+>+<(pux `way)) %hiss way usr %httr %hiss req) :: ++ dy-stop :: stop work ^+ +> @@ -415,6 +415,7 @@ ++ dy-cast |* {typ/_* bun/vase} |= a/vase ^- typ + ~| [p.bun p.a] ?> (~(nest ut p.bun) & p.a) ;;(typ q.a) :: @@ -537,7 +538,7 @@ ++ dy-shown $? twig $^ {dy-shown dy-shown} - $% {$ur iden purl} + $% {$ur (unit iden) purl} {$dv path} {$as mark dy-shown} {$do twig dy-shown} @@ -676,9 +677,11 @@ (dy-meal (slot 7 vax)) :: $| - =+ hiz=;;(hiss +<.q.vax) + => .(vax (slap vax !,(*twig ?>(?=($| -) .)))) :: XX working spec #72 + =+ typ={$| (unit iden) hiss *} + =+ [~ usr hiz ~]=((dy-cast typ !>(*typ)) vax) =. ..dy (he-diff %tan leaf+"< {(earn p.hiz)}" ~) - (dy-eyre(pro `(slap (slot 7 vax) limb+%q)) /scar ~. hiz) + (dy-eyre(pro `(slap (slot 15 vax) limb+%r)) /scar usr hiz) == :: ++ dy-sigh-scar :: scraper result diff --git a/gen/curl-hiss.hoon b/gen/curl-hiss.hoon index 413b080d4..ff54f1408 100644 --- a/gen/curl-hiss.hoon +++ b/gen/curl-hiss.hoon @@ -4,11 +4,11 @@ /? 310 /- sole [sole] -:- %get |= {^ {a/hiss $~} $~} +:- %get |= {^ {a/hiss $~} usr/iden} ^- (sole-request (cask httr)) ?. ?=($get p.q.a) ~| %only-get-requests-supported-in-generators :: XX enforced? !! :- *tang -:+ %| `hiss`a +:^ %| `usr `hiss`a |=(hit/httr (sole-so %httr hit)) diff --git a/gen/curl.hoon b/gen/curl.hoon index e131cb1aa..7cd15555a 100644 --- a/gen/curl.hoon +++ b/gen/curl.hoon @@ -5,6 +5,7 @@ /- sole [sole] :- %get |= {^ {a/tape $~} $~} +^- (sole-request (cask httr)) %+ sole-at (scan a auri:epur) |= hit/httr (sole-so %httr hit) diff --git a/sur/sole.hoon b/sur/sole.hoon index f5144b859..a1f258d16 100644 --- a/sur/sole.hoon +++ b/sur/sole.hoon @@ -73,7 +73,7 @@ |* out/$-(* *) :: output structure %+ pair (list tank) :: %+ each (unit out) :: ~ is abort - (pair hiss $-(httr (sole-request out))) :: fetch and continue + (trel (unit iden) hiss $-(httr (sole-request out))) :: fetch and continue :: :: ++ sole-gen :: XX virtual type $% {$say $-((sole-args) (cask))} :: direct noun @@ -99,8 +99,10 @@ [p=*(list tank) q=[%| p=pom q=mor]] :: :: :: ++ sole-at :: fetch url - |* {pul/_purl fun/$-(httr *)} :: - [p=*(list tank) q=[%| p=[pul %get ~ ~] q=fun]] :: + =| usr/iden :: + |* {pul/_purl fun/$-(httr *)} :: + :- p=*(list tank) :: + q=[%| p=`usr q=[pul %get ~ ~] r=fun] :: :: :: ++ sole-no :: empty result [p=*(list tank) q=[%& ~]] ::