"
""
"Site List Examples |
"
//""
//""
,TABLE_STYLE );//, DARK_BLUE);
sb->safePrintf(
//"*"
//" | "
//"Spider all urls encountered. If you just submit "
//"this by itself, then Gigablast will initiate spidering "
//"automatically at dmoz.org, an internet "
//"directory of good sites. | "
//"
"
""
"goodstuff.com | "
""
"Spider the url goodstuff.com/ and spider "
"any links we harvest that have the domain "
"goodstuff.com"
" | "
"
"
// protocol and subdomain match
""
"http://www.goodstuff.com/ | "
""
"Spider the url "
"http://www.goodstuff.com/ and spider "
"any links we harvest that start with "
"http://www.goodstuff.com/. NOTE: if the url "
"www.goodstuff.com redirects to foo.goodstuff.com then "
"foo.goodstuff.com still gets spidered "
"because it is considered to be manually added, but "
"no other urls from foo.goodstuff.com will be spidered."
" | "
"
"
// protocol and subdomain match
""
"http://justdomain.com/foo/ | "
""
"Spider the url "
"http://justdomain.com/foo/ and spider "
"any links we harvest that start with "
"http://justdomain.com/foo/. "
"Urls that start with "
"http://www.justdomain.com/, for example, "
"will NOT match this."
" | "
"
"
""
"seed:www.goodstuff.com/myurl.html | "
""
"Spider the url www.goodstuff.com/myurl.html. "
"Add any outlinks we find into the "
"spider queue, but those outlinks will only be "
"spidered if they "
"match ANOTHER line in this site list."
" | "
"
"
// protocol and subdomain match
""
"site:http://www.goodstuff.com/ | "
""
"Allow any urls starting with "
"http://www.goodstuff.com/ to be spidered "
"if encountered."
" | "
"
"
// subdomain match
""
"site:www.goodstuff.com | "
""
"Allow any urls starting with "
"www.goodstuff.com/ to be spidered "
"if encountered."
" | "
"
"
""
"-site:bad.goodstuff.com | "
""
"Do not spider any urls starting with "
"bad.goodstuff.com/ to be spidered "
"if encountered."
" | "
"
"
// domain match
""
"site:goodstuff.com | "
""
"Allow any urls starting with "
"goodstuff.com/ to be spidered "
"if encountered."
" | "
"
"
// spider this subdir
""
"site:"
"http://www.goodstuff.com/goodir/anotherdir/ | "
""
"Allow any urls starting with "
"http://www.goodstuff.com/goodir/anotherdir/ "
"to be spidered "
"if encountered."
" | "
"
"
// exact match
//""
//"exact:http://xyz.goodstuff.com/myurl.html | "
//""
//"Allow this specific url."
//" | "
//"
"
/*
// local subdir match
""
"file://C/mydir/mysubdir/"
" | "
"Spider all files in the given subdirectory or lower. "
" | "
"
"
""
"-file://C/mydir/mysubdir/baddir/"
" | "
"Do not spider files in this subdirectory."
" | "
"
"
*/
// connect to a device and index it as a stream
//""
//"stream:/dev/eth0"
//" | "
//"Connect to a device and index it as a stream. "
//"It will be treated like a single huge document for "
//"searching purposes with chunks being indexed in "
//"realtime. Or chunk it up into individual document "
//"chunks, but proximity term searching will have to "
//"be adjusted to compute query term distances "
//"inter-document."
//" | "
//"
"
// negative subdomain match
""
"contains:goodtuff | "
"Spider any url containing goodstuff."
" | "
"
"
""
"-contains:badstuff | "
"Do not spider any url containing badstuff."
" | "
"
"
/*
""
"regexp:-pid=[0-9A-Z]+/ | "
"Url must match this regular expression. "
"Try to avoid using these if possible; they can slow "
"things down and are confusing to use."
" | "
"
"
*/
// tag match
""
//" | tag:boots contains:boots "
"tag:boots site:www.westernfootwear."
"com "
"tag:boots cowboyshop.com "
"tag:boots contains:/boots "
"tag:boots site:www.moreboots.com "
"tag:boots http://lotsoffootwear.com/"
" "
//" | t:boots -contains:www.cowboyshop.com/shoes/ | "
""
"Advance users only. "
"Tag any urls matching these 5 url patterns "
"so we can use "
"the expression tag:boots in the "
"url filters and perhaps "
"give such urls higher spider priority. "
"For more "
"precise spidering control over url subsets. "
"Preceed any pattern with the tagname followed by "
"space to tag it."
" | "
"
"
""
"# This line is a comment. | "
"Empty lines and lines starting with # are "
"ignored."
" | "
"
"
"
"
);
return true;
}
bool printScrollingWidget ( SafeBuf *sb , CollectionRec *cr ) {
sb->safePrintf("\n\n" );
long widgetWidth = 300;
long widgetHeight = 500;
// make the ajax url that gets the search results
SafeBuf ub;
ub.safePrintf("/search"
//"format=ajax"
"?c=%s"
//"&prepend=gbsortbyint%%3Agbspiderdate"
"&q=-gbstatus:0+gbsortbyint%%3Agbindexdate"
"&sc=0" // no site clustering
"&dr=0" // no deduping
// 10 results at a time
"&n=10"
"&widgetheight=%li"
"&widgetwidth=%li"
, cr->m_coll
, widgetHeight
, widgetWidth
);
//ub.safePrintf("&topdocid="
// );
// get the search results from neo as soon as this div is
// being rendered, and set its contents to them
sb->safePrintf(//"\n\n"
// if (pos < (sd.scrollHeight-%li)) return...
// once user scrolls down to within last 5
// results then try to append to the results.
, widgetHeight +5*((long)RESULT_HEIGHT+2*PADDING)
, ub.getBufStart()
//,widgetHeight +5*((long)RESULT_HEIGHT+2*PADDING
);
// then the WIDGET MASTER div. set the "id" so that the
// style tag the user sets can control its appearance.
// when the browser loads this the ajax sets the contents
// to the reply from neo.
// on scroll call widget123_append() which will append
// more search results if we are near the bottom of the
// widget.
sb->safePrintf(""
, widgetWidth
, widgetHeight
);
//sb->safePrintf("");
sb->safePrintf("Waiting for Server Response...");
// end the containing div
sb->safePrintf("
");
return true;
}
bool sendPageWidgets ( TcpSocket *socket , HttpRequest *hr ) {
// true = usedefault coll?
CollectionRec *cr = g_collectiondb.getRec ( hr , true );
if ( ! cr ) {
g_httpServer.sendErrorReply(socket,500,"invalid collection");
return true;
}
char buf [ 128000 ];
SafeBuf sb(buf,128000);
printFrontPageShell ( &sb, "widgets", cr );
sb.safePrintf("