better handling of identity in web scrapers

This commit is contained in:
Anton Dyudin 2016-03-15 15:09:40 -07:00
parent ce7e970c68
commit 47eac6dbc7
4 changed files with 22 additions and 16 deletions

View File

@ -32,7 +32,7 @@
{$pill p/path} :: noun to unix pill {$pill p/path} :: noun to unix pill
:: {$tree p/path} :: noun to unix tree :: {$tree p/path} :: noun to unix tree
{$file p/beam} :: save to clay {$file p/beam} :: save to clay
{$http p/?($post $put) q/iden r/purl} :: http outbound {$http p/?($post $put) q/(unit iden) r/purl} :: http outbound
{$poke p/goal} :: poke app {$poke p/goal} :: poke app
{$show p/?($0 $1 $2 $3)} :: print val+span+twig {$show p/?($0 $1 $2 $3)} :: print val+span+twig
{$verb p/term} :: store variable {$verb p/term} :: store variable
@ -42,7 +42,7 @@
q/dojo-build :: general build q/dojo-build :: general build
== :: == ::
++ dojo-build :: one arvo step ++ dojo-build :: one arvo step
$% {$ur p/iden q/purl} :: http GET request $% {$ur p/(unit iden) q/purl} :: http GET request
{$ge p/dojo-model} :: generator {$ge p/dojo-model} :: generator
{$dv p/path} :: core from source {$dv p/path} :: core from source
{$ex p/twig} :: hoon expression {$ex p/twig} :: hoon expression
@ -87,7 +87,7 @@
++ card :: general card ++ card :: general card
$% {$diff $sole-effect sole-effect} :: $% {$diff $sole-effect sole-effect} ::
{$send wire {ship term} clap} :: {$send wire {ship term} clap} ::
{$hiss wire {$~ iden} mark {$hiss hiss}} :: {$hiss wire (unit iden) mark {$hiss hiss}} ::
{$exec wire @p (unit {beak silk})} :: {$exec wire @p (unit {beak silk})} ::
{$deal wire sock term club} :: {$deal wire sock term club} ::
{$info wire @p toro} :: {$info wire @p toro} ::
@ -167,8 +167,8 @@
;~(plug (cold %file tar) dp-beam) ;~(plug (cold %file tar) dp-beam)
;~(plug (cold %flat pat) (most fas qut)) ;~(plug (cold %flat pat) (most fas qut))
;~(plug (cold %pill dot) (most fas sym)) ;~(plug (cold %pill dot) (most fas sym))
;~(plug (cold %http lus) (easy %post) dp-iden-url) ;~(plug (cold %http lus) (stag %post dp-iden-url))
;~(plug (cold %http hep) (easy %put) dp-iden-url) ;~(plug (cold %http hep) (stag %put dp-iden-url))
(stag %show (cook $?($1 $2 $3) (cook lent (stun [1 3] wut)))) (stag %show (cook $?($1 $2 $3) (cook lent (stun [1 3] wut))))
== ==
++ dp-hooves :: hoof list ++ dp-hooves :: hoof list
@ -221,7 +221,7 @@
(sear plex:vez (stag %conl poor:vez)) (sear plex:vez (stag %conl poor:vez))
:: ::
++ dp-iden-url ++ dp-iden-url
(cook |=({a/(unit iden) b/purl} [(fall a *iden) b]) auru:epur) (cook |=({a/(unit iden) b/purl} [`(fall a *iden) b]) auru:epur)
:: ::
++ dp-model ;~(plug dp-server dp-config) :: ++dojo-model ++ dp-model ;~(plug dp-server dp-config) :: ++dojo-model
++ dp-path (tope he-beam) :: ++path ++ dp-path (tope he-beam) :: ++path
@ -269,10 +269,10 @@
(he-card(poy `+>+<(pux `way)) %exec way our.hid `[he-beak kas]) (he-card(poy `+>+<(pux `way)) %exec way our.hid `[he-beak kas])
:: ::
++ dy-eyre :: send work to eyre ++ dy-eyre :: send work to eyre
|= {way/wire usr/iden req/hiss} |= {way/wire usr/(unit iden) req/hiss}
^+ +>+> ^+ +>+>
?> ?=($~ pux) ?> ?=($~ pux)
(he-card(poy `+>+<(pux `way)) %hiss way `usr %httr %hiss req) (he-card(poy `+>+<(pux `way)) %hiss way usr %httr %hiss req)
:: ::
++ dy-stop :: stop work ++ dy-stop :: stop work
^+ +> ^+ +>
@ -415,6 +415,7 @@
++ dy-cast ++ dy-cast
|* {typ/_* bun/vase} |* {typ/_* bun/vase}
|= a/vase ^- typ |= a/vase ^- typ
~| [p.bun p.a]
?> (~(nest ut p.bun) & p.a) ?> (~(nest ut p.bun) & p.a)
;;(typ q.a) ;;(typ q.a)
:: ::
@ -537,7 +538,7 @@
++ dy-shown ++ dy-shown
$? twig $? twig
$^ {dy-shown dy-shown} $^ {dy-shown dy-shown}
$% {$ur iden purl} $% {$ur (unit iden) purl}
{$dv path} {$dv path}
{$as mark dy-shown} {$as mark dy-shown}
{$do twig dy-shown} {$do twig dy-shown}
@ -676,9 +677,11 @@
(dy-meal (slot 7 vax)) (dy-meal (slot 7 vax))
:: ::
$| $|
=+ hiz=;;(hiss +<.q.vax) => .(vax (slap vax !,(*twig ?>(?=($| -) .)))) :: XX working spec #72
=+ typ={$| (unit iden) hiss *}
=+ [~ usr hiz ~]=((dy-cast typ !>(*typ)) vax)
=. ..dy (he-diff %tan leaf+"< {(earn p.hiz)}" ~) =. ..dy (he-diff %tan leaf+"< {(earn p.hiz)}" ~)
(dy-eyre(pro `(slap (slot 7 vax) limb+%q)) /scar ~. hiz) (dy-eyre(pro `(slap (slot 15 vax) limb+%r)) /scar usr hiz)
== ==
:: ::
++ dy-sigh-scar :: scraper result ++ dy-sigh-scar :: scraper result

View File

@ -4,11 +4,11 @@
/? 310 /? 310
/- sole /- sole
[sole] [sole]
:- %get |= {^ {a/hiss $~} $~} :- %get |= {^ {a/hiss $~} usr/iden}
^- (sole-request (cask httr)) ^- (sole-request (cask httr))
?. ?=($get p.q.a) ?. ?=($get p.q.a)
~| %only-get-requests-supported-in-generators :: XX enforced? ~| %only-get-requests-supported-in-generators :: XX enforced?
!! !!
:- *tang :- *tang
:+ %| `hiss`a :^ %| `usr `hiss`a
|=(hit/httr (sole-so %httr hit)) |=(hit/httr (sole-so %httr hit))

View File

@ -5,6 +5,7 @@
/- sole /- sole
[sole] [sole]
:- %get |= {^ {a/tape $~} $~} :- %get |= {^ {a/tape $~} $~}
^- (sole-request (cask httr))
%+ sole-at (scan a auri:epur) %+ sole-at (scan a auri:epur)
|= hit/httr |= hit/httr
(sole-so %httr hit) (sole-so %httr hit)

View File

@ -73,7 +73,7 @@
|* out/$-(* *) :: output structure |* out/$-(* *) :: output structure
%+ pair (list tank) :: %+ pair (list tank) ::
%+ each (unit out) :: ~ is abort %+ each (unit out) :: ~ is abort
(pair hiss $-(httr (sole-request out))) :: fetch and continue (trel (unit iden) hiss $-(httr (sole-request out))) :: fetch and continue
:: :: :: ::
++ sole-gen :: XX virtual type ++ sole-gen :: XX virtual type
$% {$say $-((sole-args) (cask))} :: direct noun $% {$say $-((sole-args) (cask))} :: direct noun
@ -99,8 +99,10 @@
[p=*(list tank) q=[%| p=pom q=mor]] :: [p=*(list tank) q=[%| p=pom q=mor]] ::
:: :: :: ::
++ sole-at :: fetch url ++ sole-at :: fetch url
|* {pul/_purl fun/$-(httr *)} :: =| usr/iden ::
[p=*(list tank) q=[%| p=[pul %get ~ ~] q=fun]] :: |* {pul/_purl fun/$-(httr *)} ::
:- p=*(list tank) ::
q=[%| p=`usr q=[pul %get ~ ~] r=fun] ::
:: :: :: ::
++ sole-no :: empty result ++ sole-no :: empty result
[p=*(list tank) q=[%& ~]] :: [p=*(list tank) q=[%& ~]] ::