mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
support base64 generated thumbnails in serps.
This commit is contained in:
parent
08058d4f69
commit
82726879a2
@ -169,6 +169,7 @@ case EWAITINGTOSYNCHOSTSCONF: return "Wait to ensure hosts.conf in sync";
|
||||
case EDOCNONCANONICAL: return "Url was dup of canonical page";
|
||||
case ECUSTOMCRAWLMISMATCH: return "Job name/type mismatch. Job name has already been used for a crawl or bulk job.";
|
||||
case ENOTOKEN: return "Missing token";
|
||||
case EBADIMG: return "Bad image";
|
||||
}
|
||||
// if the remote error bit is clear it must be a regulare errno
|
||||
//if ( ! ( errnum & REMOTE_ERROR_BIT ) ) return strerror ( errnum );
|
||||
|
3
Errno.h
3
Errno.h
@ -172,6 +172,7 @@ enum {
|
||||
EWAITINGTOSYNCHOSTSCONF,
|
||||
EDOCNONCANONICAL,
|
||||
ECUSTOMCRAWLMISMATCH, // a crawl request was made with a name that already existed for bulk request (or the other way around)
|
||||
ENOTOKEN
|
||||
ENOTOKEN,
|
||||
EBADIMG
|
||||
};
|
||||
#endif
|
||||
|
197
Images.cpp
197
Images.cpp
@ -12,10 +12,7 @@
|
||||
|
||||
//static void gotTermFreqWrapper ( void *state ) ;
|
||||
static void gotTermListWrapper ( void *state ) ;
|
||||
static void gotImgIpWrapper ( void *state , long ip ) ;
|
||||
static void gotImageWrapper ( void *state ) ;
|
||||
static void *thumbStartWrapper_r ( void *state , ThreadEntry *te );
|
||||
static void thumbDoneWrapper ( void *state , ThreadEntry *te );
|
||||
static void getImageInfo ( char *buf, long size, long *dx, long *dy, long *it);
|
||||
|
||||
Images::Images ( ) {
|
||||
@ -31,6 +28,8 @@ void Images::reset() {
|
||||
m_imgReplyLen = 0;
|
||||
m_imgReplyMaxLen = 0;
|
||||
m_numImages = 0;
|
||||
m_imageBufValid = false;
|
||||
m_phase = 0;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -242,6 +241,7 @@ bool Images::getThumbnail ( char *pageSite ,
|
||||
// reset here now
|
||||
m_i = 0;
|
||||
m_j = 0;
|
||||
m_phase = 0;
|
||||
|
||||
// sanity check
|
||||
if ( ! m_pageUrl ) { char *xx=NULL;*xx=0; }
|
||||
@ -481,16 +481,11 @@ void Images::gotTermList ( ) {
|
||||
|
||||
bool Images::downloadImages () {
|
||||
// all done if we got a valid thumbnail
|
||||
if ( m_thumbnailValid ) return true;
|
||||
// if not valid free old image
|
||||
if ( m_imgReply ) {
|
||||
mfree ( m_imgReply , m_imgReplyMaxLen , "Image" );
|
||||
m_imgReply = NULL;
|
||||
}
|
||||
//if ( m_thumbnailValid ) return true;
|
||||
|
||||
long srcLen;
|
||||
char *src = NULL;
|
||||
long i = 0;
|
||||
long node;
|
||||
|
||||
// downloading an image from diffbot json reply?
|
||||
if ( m_xd->m_isDiffbotJSONObject ) {
|
||||
@ -506,45 +501,98 @@ bool Images::downloadImages () {
|
||||
goto insertionPoint;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// . download each leftover image
|
||||
// . stop as soon as we get one with good dimensions
|
||||
// . make a thumbnail of that one
|
||||
for ( i = m_j ; i < m_numImages ; i++ ) {
|
||||
// advance now
|
||||
m_j++;
|
||||
// if we should stop, stop
|
||||
if ( m_stopDownloading ) break;
|
||||
// skip if bad or not unique
|
||||
if ( m_errors[i] ) continue;
|
||||
// set status msg
|
||||
sprintf ( m_statusBuf ,"downloading image %li",i);
|
||||
// point to it
|
||||
if ( m_xd ) m_xd->setStatus ( m_statusBuf );
|
||||
// get the url of the image
|
||||
src = m_xml->getString(i,i+1,"src",&srcLen);
|
||||
// construct the url to download
|
||||
insertionPoint:
|
||||
// set it to the full url
|
||||
//Url iu;
|
||||
// use "pageUrl" as the baseUrl
|
||||
m_imageUrl.set ( m_pageUrl , src , srcLen );
|
||||
for ( ; m_j < m_numImages ; m_j++ , m_phase = 0 ) {
|
||||
|
||||
// get the image ip. will also download the image.
|
||||
if ( ! downloadImage () )
|
||||
return false;
|
||||
if ( m_phase == 0 ) {
|
||||
// advance
|
||||
m_phase++;
|
||||
// get img tag node
|
||||
node = m_imageNodes[m_j];
|
||||
// get the url of the image
|
||||
src = m_xml->getString(node,"src",&srcLen);
|
||||
// construct the url to download
|
||||
insertionPoint:
|
||||
// if we should stop, stop
|
||||
if ( m_stopDownloading ) break;
|
||||
// skip if bad or not unique
|
||||
if ( m_errors[m_j] ) continue;
|
||||
// set status msg
|
||||
sprintf ( m_statusBuf ,"downloading image %li",m_j);
|
||||
// point to it
|
||||
if ( m_xd ) m_xd->setStatus ( m_statusBuf );
|
||||
// use "pageUrl" as the baseUrl
|
||||
m_imageUrl.set ( m_pageUrl , src , srcLen );
|
||||
}
|
||||
|
||||
// process the image we downloaded in case did not block,
|
||||
// maybe it was in the html cache
|
||||
gotImage();
|
||||
// get image ip
|
||||
if ( m_phase == 1 ) {
|
||||
// advance
|
||||
m_phase++;
|
||||
// this increments phase if it should
|
||||
if ( ! getImageIp() ) return false;
|
||||
// error?
|
||||
if ( g_errno ) continue;
|
||||
}
|
||||
|
||||
// download the actual image
|
||||
if ( m_phase == 2 ) {
|
||||
// advance
|
||||
m_phase++;
|
||||
// download image data
|
||||
if ( ! downloadImage() ) return false;
|
||||
// error downloading?
|
||||
if ( g_errno ) continue;
|
||||
}
|
||||
|
||||
// get thumbnail using threaded call to netpbm stuff
|
||||
if ( m_phase == 3 ) {
|
||||
// advance
|
||||
m_phase++;
|
||||
// download image data
|
||||
if ( ! makeThumb() ) return false;
|
||||
// error downloading?
|
||||
if ( g_errno ) continue;
|
||||
}
|
||||
|
||||
// error making thumb or just not a good thumb size?
|
||||
if ( ! m_thumbnailValid ) {
|
||||
// free old image we downloaded, if any
|
||||
m_msg13.reset();
|
||||
// i guess do this too, it was pointing at it in msg13
|
||||
m_imgReply = NULL;
|
||||
}
|
||||
|
||||
// it's a keeper
|
||||
m_imageBuf.safeStrcpy ( m_imageUrl.getUrl() );
|
||||
m_imageBuf.pushChar('\0');
|
||||
m_imageBuf.pushLong(m_tdx);
|
||||
m_imageBuf.pushLong(m_tdy);
|
||||
m_imageBuf.safeMemcpy ( m_imgData , m_thumbnailSize );
|
||||
m_imageBufValid = true;
|
||||
|
||||
// save mem. do this after because m_imgData uses m_msg13's
|
||||
// reply buf to store the thumbnail for now...
|
||||
m_msg13.reset();
|
||||
m_imgReply = NULL;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
return gotImage();
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Images::downloadImage ( ) {
|
||||
static void gotImgIpWrapper ( void *state , long ip ) {
|
||||
Images *THIS = (Images *)state;
|
||||
// control loop
|
||||
if ( ! THIS->downloadImages() ) return;
|
||||
// call callback at this point, we are done with the download loop
|
||||
THIS->m_callback ( THIS->m_state );
|
||||
}
|
||||
|
||||
bool Images::getImageIp ( ) {
|
||||
if ( ! m_msgc.getIp ( m_imageUrl.getHost () ,
|
||||
m_imageUrl.getHostLen() ,
|
||||
&m_latestIp ,
|
||||
@ -552,21 +600,18 @@ bool Images::downloadImage ( ) {
|
||||
gotImgIpWrapper ))
|
||||
// we blocked
|
||||
return false;
|
||||
|
||||
return downloadImage2 ( );
|
||||
return true;
|
||||
}
|
||||
|
||||
void gotImgIpWrapper ( void *state , long ip ) {
|
||||
static void downloadImageWrapper ( void *state ) {
|
||||
Images *THIS = (Images *)state;
|
||||
if ( ! THIS->downloadImage2 ( ) ) return;
|
||||
// if did not block return control to loop
|
||||
// control loop
|
||||
if ( ! THIS->downloadImages() ) return;
|
||||
// call callback at this point, we are done with the download loop
|
||||
// all done
|
||||
THIS->m_callback ( THIS->m_state );
|
||||
}
|
||||
|
||||
bool Images::downloadImage2 ( ) {
|
||||
|
||||
bool Images::downloadImage ( ) {
|
||||
// error?
|
||||
if ( m_latestIp == 0 || m_latestIp == -1 ) {
|
||||
log(LOG_DEBUG,"images: ip of %s is %li (%s)",
|
||||
@ -575,9 +620,7 @@ bool Images::downloadImage2 ( ) {
|
||||
g_errno = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
CollectionRec *cr = g_collectiondb.getRec(m_collnum);
|
||||
|
||||
// assume success
|
||||
m_httpStatus = 200;
|
||||
// set the request
|
||||
@ -594,24 +637,21 @@ bool Images::downloadImage2 ( ) {
|
||||
strcpy(r->m_url,m_imageUrl.getUrl());
|
||||
// . try to download it
|
||||
// . i guess we are ignoring hammers at this point
|
||||
if ( ! m_msg13.getDoc(r,false,this,gotImageWrapper))
|
||||
if ( ! m_msg13.getDoc(r,false,this,downloadImageWrapper))
|
||||
return false;
|
||||
// make thumbnail. this can return false if blocks, true otherwise
|
||||
// because it uses a thread
|
||||
return gotImage ( );
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void gotImageWrapper ( void *state ) {
|
||||
static void makeThumbWrapper ( void *state , ThreadEntry *t ) {
|
||||
Images *THIS = (Images *)state;
|
||||
// process/store the reply
|
||||
if ( ! THIS->gotImage ( ) ) return;
|
||||
// download the images. will set m_stopDownloading when we get one
|
||||
// control loop
|
||||
if ( ! THIS->downloadImages() ) return;
|
||||
// all done
|
||||
THIS->m_callback ( THIS->m_state );
|
||||
}
|
||||
|
||||
bool Images::gotImage ( ) {
|
||||
bool Images::makeThumb ( ) {
|
||||
// did it have an error?
|
||||
if ( g_errno ) {
|
||||
// just give up on all of them if one has an error
|
||||
@ -633,7 +673,7 @@ bool Images::gotImage ( ) {
|
||||
// the real page.
|
||||
if ( g_errno ) {
|
||||
log( "ERROR? g_errno puked: %s", mstrerror(g_errno) );
|
||||
g_errno = 0;
|
||||
//g_errno = 0;
|
||||
return true;
|
||||
}
|
||||
//if ( ! slot ) return true;
|
||||
@ -642,12 +682,17 @@ bool Images::gotImage ( ) {
|
||||
bufLen = m_msg13.m_replyBufSize;
|
||||
bufMaxLen = m_msg13.m_replyBufAllocSize;
|
||||
// no image?
|
||||
if ( ! buf || bufLen <= 0 ) return true;
|
||||
if ( ! buf || bufLen <= 0 ) {
|
||||
g_errno = EBADIMG;
|
||||
return true;
|
||||
}
|
||||
// we are image candidate #i
|
||||
long i = m_j - 1;
|
||||
//long i = m_j - 1;
|
||||
// get img tag node
|
||||
long node = m_imageNodes[m_j];
|
||||
// get the url of the image
|
||||
long srcLen;
|
||||
char *src = m_xml->getString(i,i+1,"src",&srcLen);
|
||||
char *src = m_xml->getString(node,"src",&srcLen);
|
||||
// set it to the full url
|
||||
Url iu;
|
||||
// use "pageUrl" as the baseUrl
|
||||
@ -657,6 +702,7 @@ bool Images::gotImage ( ) {
|
||||
log ( "image: MIME.set() failed in gotImage()" );
|
||||
// give up on the remaining images then
|
||||
m_stopDownloading = true;
|
||||
g_errno = EBADIMG;
|
||||
return true;
|
||||
}
|
||||
// set the status so caller can see
|
||||
@ -667,6 +713,7 @@ bool Images::gotImage ( ) {
|
||||
m_httpStatus);
|
||||
// give up on the remaining images then
|
||||
m_stopDownloading = true;
|
||||
g_errno = EBADIMG;
|
||||
return true;
|
||||
}
|
||||
// make sure this is an image
|
||||
@ -675,6 +722,7 @@ bool Images::gotImage ( ) {
|
||||
log( LOG_DEBUG, "image: gotImage() states that this image is "
|
||||
"not in a format we currently handle." );
|
||||
// try the next image if any
|
||||
g_errno = EBADIMG;
|
||||
return true;
|
||||
}
|
||||
// get the content
|
||||
@ -690,11 +738,12 @@ bool Images::gotImage ( ) {
|
||||
|
||||
if ( ! m_imgReply || m_imgReplyLen == 0 ) {
|
||||
log( LOG_DEBUG, "image: Returned empty image reply!" );
|
||||
g_errno = EBADIMG;
|
||||
return true;
|
||||
}
|
||||
|
||||
// get next if too small
|
||||
if ( m_imgDataSize < 20 ) return true;
|
||||
if ( m_imgDataSize < 20 ) { g_errno = EBADIMG; return true; }
|
||||
|
||||
long imageType;
|
||||
getImageInfo ( m_imgData, m_imgDataSize, &m_dx, &m_dy, &imageType );
|
||||
@ -710,6 +759,7 @@ bool Images::gotImage ( ) {
|
||||
// skip if bad dimensions
|
||||
if( ((m_dx < 50) || (m_dy < 50)) && ((m_dx > 0) && (m_dy > 0)) ) {
|
||||
log( "image: Image is too small to represent a news article." );
|
||||
g_errno = EBADIMG;
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -726,23 +776,14 @@ bool Images::gotImage ( ) {
|
||||
if ( g_threads.call ( FILTER_THREAD ,
|
||||
MAX_NICENESS ,
|
||||
this ,
|
||||
thumbDoneWrapper ,
|
||||
makeThumbWrapper ,
|
||||
thumbStartWrapper_r ) ) return false;
|
||||
// threads might be off
|
||||
logf ( LOG_DEBUG, "image: Calling thumbnail gen without thread.");
|
||||
thumbStartWrapper_r ( NULL , NULL );
|
||||
thumbStartWrapper_r ( this , NULL );
|
||||
return true;
|
||||
}
|
||||
|
||||
void thumbDoneWrapper ( void *state , ThreadEntry *t ) {
|
||||
Images *THIS = (Images *)state;
|
||||
// . download another image if we ! m_thumbnailValid
|
||||
// . should also free m_imgReply if ! m_thumbnailValid
|
||||
if ( ! THIS->downloadImages() ) return;
|
||||
// all done
|
||||
THIS->m_callback ( THIS->m_state );
|
||||
}
|
||||
|
||||
void *thumbStartWrapper_r ( void *state , ThreadEntry *t ) {
|
||||
Images *THIS = (Images *)state;
|
||||
THIS->thumbStart_r ( true /* am thread?*/ );
|
||||
@ -855,7 +896,8 @@ void Images::thumbStart_r ( bool amThread ) {
|
||||
|
||||
// Call clone function for the shell to execute command
|
||||
// This call WILL BLOCK . timeout is 30 seconds.
|
||||
int err = my_system_r( cmd, 30 ); // m_thmbconvTimeout );
|
||||
//int err = my_system_r( cmd, 30 ); // m_thmbconvTimeout );
|
||||
int err = system( cmd ); // m_thmbconvTimeout );
|
||||
|
||||
//if( (m_dx != 0) && (m_dy != 0) )
|
||||
// unlink( in );
|
||||
@ -936,6 +978,11 @@ void Images::thumbStart_r ( bool amThread ) {
|
||||
// MDW: this was m_imgReply
|
||||
getImageInfo ( m_imgData , m_thumbnailSize , &m_tdx , &m_tdy , NULL );
|
||||
|
||||
// now make the meta data struct
|
||||
// <imageUrl>\0<width><height><thumbnailData>
|
||||
|
||||
|
||||
|
||||
log( LOG_DEBUG, "image: Thumbnail size: %li bytes.", m_imgDataSize );
|
||||
log( LOG_DEBUG, "image: Thumbnail dx=%li dy=%li.", m_tdx,m_tdy );
|
||||
log( LOG_DEBUG, "image: Thumbnail generated in %lldms.", stop-start );
|
||||
|
17
Images.h
17
Images.h
@ -49,19 +49,26 @@ class Images {
|
||||
void *state ,
|
||||
void (*callback)(void *state) );
|
||||
|
||||
char *getImageData () { return m_imgData; };
|
||||
long getImageDataSize() { return m_imgDataSize; };
|
||||
//char *getImageData () { return m_imgData; };
|
||||
//long getImageDataSize() { return m_imgDataSize; };
|
||||
//long getImageType () { return m_imageType; };
|
||||
|
||||
SafeBuf m_imageBuf;
|
||||
bool m_imageBufValid;
|
||||
long m_phase;
|
||||
|
||||
bool gotTermFreq();
|
||||
bool launchRequests();
|
||||
void gotTermList();
|
||||
bool downloadImages();
|
||||
|
||||
bool downloadImage ( ) ;
|
||||
bool downloadImage2 ( ) ;
|
||||
|
||||
bool gotImage ( );
|
||||
|
||||
bool getImageIp();
|
||||
bool downloadImage();
|
||||
bool makeThumb();
|
||||
|
||||
//bool gotImage ( );
|
||||
void thumbStart_r ( bool amThread );
|
||||
|
||||
long m_i;
|
||||
|
@ -2567,9 +2567,34 @@ bool printResult ( State0 *st, long ix ) {
|
||||
// http://img.youtube.com/vi/auQbi_fkdGE/2.jpg
|
||||
// get the thumbnail url
|
||||
if ( mr->ptr_imgUrl && si->m_format == FORMAT_HTML )
|
||||
sb->safePrintf ("<a href=%s><image src=%s></a>",
|
||||
sb->safePrintf ("<a href=%s><img src=%s></a>",
|
||||
url,mr->ptr_imgUrl);
|
||||
|
||||
// if we have a thumbnail show it next to the search result
|
||||
if ( si->m_format == FORMAT_HTML &&
|
||||
! mr->ptr_imgUrl &&
|
||||
mr->ptr_imgData ) {
|
||||
char *p = mr->ptr_imgData; // orig img url
|
||||
p += gbstrlen(p) + 1; // dx of thumb
|
||||
long tdx = *(long *)p;
|
||||
p += 4;
|
||||
long tdy = *(long *)p;
|
||||
p += 4;
|
||||
char *imgData = p;
|
||||
char *pend = mr->ptr_imgData + mr->size_imgData;
|
||||
long thumbBytes = pend - p;
|
||||
sb->safePrintf("<a href=%s>"
|
||||
"<img width=%li height=%li "
|
||||
"src=\""
|
||||
"data:image/jpg;base64,"
|
||||
,url
|
||||
,tdx
|
||||
,tdy);
|
||||
// encode image in base 64
|
||||
sb->base64Encode ( imgData , thumbBytes , 0 ); // 0 niceness
|
||||
sb->safePrintf("\"></a>");
|
||||
}
|
||||
|
||||
|
||||
// print image for widget
|
||||
if ( //mr->ptr_imgUrl &&
|
||||
@ -2593,12 +2618,33 @@ bool printResult ( State0 *st, long ix ) {
|
||||
, (long)RESULT_HEIGHT
|
||||
, (long)PADDING
|
||||
);
|
||||
if ( mr->ptr_imgUrl )
|
||||
sb->safePrintf("background-repeat:no-repeat;"
|
||||
"background-size:%lipx 140px;"
|
||||
"background-image:url('%s');"
|
||||
, widgetwidth - 2*8 // padding is 8px
|
||||
, mr->ptr_imgUrl);
|
||||
// if ( mr->ptr_imgUrl )
|
||||
// sb->safePrintf("background-repeat:no-repeat;"
|
||||
// "background-size:%lipx 140px;"
|
||||
// "background-image:url('%s');"
|
||||
// , widgetwidth - 2*8 // padding is 8px
|
||||
// , mr->ptr_imgUrl);
|
||||
if ( mr->ptr_imgData ) {
|
||||
char *p = mr->ptr_imgData; // orig img url
|
||||
p += gbstrlen(p) + 1; // dx of thumb
|
||||
//long tdx = *(long *)p;
|
||||
p += 4;
|
||||
//long tdy = *(long *)p;
|
||||
p += 4;
|
||||
char *imgData = p;
|
||||
char *pend = mr->ptr_imgData + mr->size_imgData;
|
||||
long thumbBytes = pend - p;
|
||||
sb->safePrintf("background-repeat:no-repeat;"
|
||||
"background-size:%lipx 140px;"
|
||||
"background-image:url('data:image/"
|
||||
"jpg;base64,"
|
||||
, widgetwidth - 2*8); // padding is 8px
|
||||
// encode image in base 64
|
||||
sb->base64Encode (imgData,thumbBytes,0); // 0 niceness
|
||||
sb->safePrintf("');");
|
||||
}
|
||||
|
||||
|
||||
// end the div style attribute and div tag
|
||||
sb->safePrintf("\">");
|
||||
sb->safePrintf ( "<a "
|
||||
|
89
SafeBuf.cpp
89
SafeBuf.cpp
@ -3285,3 +3285,92 @@ bool SafeBuf::csvEncode ( char *s , long len , long niceness ) {
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SafeBuf::base64Encode ( char *sx , long len , long niceness ) {
|
||||
|
||||
unsigned char *s = (unsigned char *)sx;
|
||||
|
||||
if ( ! s ) return true;
|
||||
|
||||
// assume all chars are double quotes and will have to be encoded
|
||||
long need = len * 2 + 1 +3; // +3 for = padding
|
||||
|
||||
if ( ! reserve ( need ) ) return false;
|
||||
|
||||
// tmp vars
|
||||
char *dst = m_buf + m_length;
|
||||
|
||||
long round = 0;
|
||||
|
||||
// the table of 64 entities
|
||||
static char tab[] = {
|
||||
'A','B','C','D','E','F','G','H','I','J','K','L','M',
|
||||
'N','O','P','Q','R','S','T','U','V','W','X','Y','Z',
|
||||
'a','b','c','d','e','f','g','h','i','j','k','l','m',
|
||||
'n','o','p','q','r','s','t','u','v','w','x','y','z',
|
||||
'0','1','2','3','4','5','6','7','8','9','+','/'
|
||||
};
|
||||
|
||||
unsigned char val;
|
||||
// scan through all
|
||||
unsigned char *send = s + len;
|
||||
for ( ; s < send ; ) {
|
||||
// breathe
|
||||
QUICKPOLL ( niceness );
|
||||
|
||||
unsigned char c1 = s[0];
|
||||
unsigned char c2 = 0;
|
||||
//unsigned char c3 = 0;
|
||||
|
||||
if ( s+1 < send ) c2 = s[1];
|
||||
else c2 = 0;
|
||||
|
||||
if ( round == 0 ) {
|
||||
val = c1 >>2;
|
||||
}
|
||||
else if ( round == 1 ) {
|
||||
val = (c1 & 0x03) << 4;
|
||||
val |= c2 >> 4;
|
||||
// time for this
|
||||
s++;
|
||||
}
|
||||
else if ( round == 2 ) {
|
||||
val = ((c1 & 0x0f) << 2);
|
||||
val |= ((c2 & 0xc0) >> 6);
|
||||
s++;
|
||||
}
|
||||
else if ( round == 3 ) {
|
||||
val = (c1 & 0x3f);
|
||||
s++;
|
||||
}
|
||||
// add '0'
|
||||
*dst = tab[val];
|
||||
// point to next char
|
||||
dst++;
|
||||
// keep going if more left
|
||||
if ( s < send ) {
|
||||
// repeat every 4 cycles since it is aligned then
|
||||
if ( ++round == 4 ) round = 0;
|
||||
continue;
|
||||
}
|
||||
// if we are done do padding
|
||||
if ( round == 0 ) {
|
||||
*dst++ = '=';
|
||||
}
|
||||
if ( round == 1 ) {
|
||||
*dst++ = '=';
|
||||
*dst++ = '=';
|
||||
}
|
||||
if ( round == 2 ) {
|
||||
*dst++ = '=';
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
m_length += dst - (m_buf + m_length);
|
||||
|
||||
nullTerm();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -110,6 +110,8 @@ struct SafeBuf {
|
||||
|
||||
bool csvEncode ( char *s , long len , long niceness = 0 );
|
||||
|
||||
bool base64Encode ( char *s , long len , long niceness = 0 );
|
||||
|
||||
//bool pushLong ( long val ) { return safeMemcpy((char *)&val,4); }
|
||||
bool cat(SafeBuf& c);
|
||||
// . only cat the sections/tag that start with "tagFilter"
|
||||
|
40
XmlDoc.cpp
40
XmlDoc.cpp
@ -3340,7 +3340,7 @@ char *XmlDoc::prepareToMakeTitleRec ( ) {
|
||||
//Images *images = getImages();
|
||||
//if ( ! images || images == (Images *)-1 ) return (char *)images;
|
||||
|
||||
char **id = getImageData();
|
||||
char **id = getThumbnailData();
|
||||
if ( ! id || id == (void *)-1 ) return (char *)id;
|
||||
|
||||
int8_t *hopCount = getHopCount();
|
||||
@ -17387,13 +17387,19 @@ long XmlDoc::getDomHash32( ) {
|
||||
// . you can inline it in an image tag like
|
||||
// <img src="data:image/png;base64,iVBORw0...."/>
|
||||
// background-image:url(data:image/png;base64,iVBORw0...);
|
||||
char **XmlDoc::getImageData ( ) {
|
||||
// . FORMAT of ptr_imageData:
|
||||
// <origimageUrl>\0<4bytethumbwidth><4bytethumbheight><thumbnaildatajpg>
|
||||
char **XmlDoc::getThumbnailData ( ) {
|
||||
if ( m_imageDataValid ) return &ptr_imageData;
|
||||
Images *images = getImages();
|
||||
if ( ! images || images == (Images *)-1 ) return (char **)images;
|
||||
ptr_imageData = images->m_imgData;
|
||||
size_imageData = images->m_thumbnailSize; // size of image in bytes
|
||||
ptr_imageData = NULL;
|
||||
size_imageData = 0;
|
||||
m_imageDataValid = true;
|
||||
if ( ! images || ! images->m_imageBufValid ) return &ptr_imageData;
|
||||
if ( images->m_imageBuf.length() <= 0 ) return &ptr_imageData;
|
||||
ptr_imageData = images->m_imageBuf.getBufStart();
|
||||
size_imageData = images->m_imageBuf.length();
|
||||
return &ptr_imageData;
|
||||
}
|
||||
|
||||
@ -18548,6 +18554,24 @@ bool XmlDoc::logIt ( ) {
|
||||
else
|
||||
sb.safePrintf("addlistsize=%05li ",(long)0);
|
||||
|
||||
|
||||
if ( size_imageData && m_imageDataValid ) {
|
||||
// url is in data now
|
||||
char *imgUrl = ptr_imageData;
|
||||
long imgUrlLen = gbstrlen(imgUrl);
|
||||
char *p = imgUrl + imgUrlLen + 1;
|
||||
long tdx = *(long *)p; p += 4; // thumb width
|
||||
long tdy = *(long *)p; p += 4; // thumb height
|
||||
long used = p - ptr_imageData;
|
||||
long remain = size_imageData - used;
|
||||
//char *imgData = imgUrl + imgUrlLen + 1;
|
||||
sb.safePrintf("thumbnail=%s,%libytes,%lix%li ",
|
||||
imgUrl,remain,tdx,tdy);
|
||||
}
|
||||
else
|
||||
sb.safePrintf("thumbnail=none ");
|
||||
|
||||
|
||||
/*
|
||||
if ( m_hasAddressValid && m_addressesValid )
|
||||
sb.safePrintf("numaddr=%li ",(long)m_addresses.m_numValid);
|
||||
@ -27574,6 +27598,12 @@ Msg20Reply *XmlDoc::getMsg20Reply ( ) {
|
||||
if ( *iu ) reply->size_imgUrl = gbstrlen(*iu)+1;
|
||||
}
|
||||
|
||||
// get thumbnail image DATA
|
||||
if ( ! reply->ptr_imgData ) { // && m_req->m_getImageUrl ) {
|
||||
reply-> ptr_imgData = ptr_imageData;
|
||||
reply->size_imgData = size_imageData;
|
||||
}
|
||||
|
||||
// . adids contained in the doc
|
||||
// . get from title rec rather than generating
|
||||
// . but we need to generate to store in titleRec at index time
|
||||
@ -28178,6 +28208,7 @@ char **XmlDoc::getImageUrl() {
|
||||
// diffbot often extracts an image in the json. but even if pure
|
||||
// json it might be diffbot json that was injected an we don't know
|
||||
// it so check contentType...
|
||||
/*
|
||||
if ( m_isDiffbotJSONObject || m_contentType == CT_JSON ) {
|
||||
char *iu = strstr(ptr_utf8Content,"\"images\":[{");
|
||||
if ( ! iu ) return &m_imageUrl;
|
||||
@ -28211,6 +28242,7 @@ char **XmlDoc::getImageUrl() {
|
||||
m_imageUrl = m_imageUrlBuf.getBufStart();
|
||||
return &m_imageUrl;
|
||||
}
|
||||
*/
|
||||
|
||||
// all done if not youtube or meta cafe
|
||||
char *host = f->getHost();
|
||||
|
2
XmlDoc.h
2
XmlDoc.h
@ -670,7 +670,7 @@ class XmlDoc {
|
||||
long getHostHash32a ( ) ;
|
||||
long getHostHash32b ( ) ;
|
||||
long getDomHash32 ( );
|
||||
char **getImageData();
|
||||
char **getThumbnailData();
|
||||
class Images *getImages ( ) ;
|
||||
int8_t *getNextSpiderPriority ( ) ;
|
||||
long *getPriorityQueueNum ( ) ;
|
||||
|
6
main.cpp
6
main.cpp
@ -947,6 +947,11 @@ int main2 ( int argc , char *argv[] ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
//SafeBuf tt;
|
||||
//tt.base64Encode("any carnal pleas",16);
|
||||
//fprintf(stderr,"%s\n",tt.getBufStart());
|
||||
//exit(0);
|
||||
|
||||
// get hosts.conf file
|
||||
//char *hostsConf = "./hosts.conf";
|
||||
long hostId = 0;
|
||||
@ -1061,7 +1066,6 @@ int main2 ( int argc , char *argv[] ) {
|
||||
// return 0;
|
||||
//}
|
||||
|
||||
|
||||
// these tests do not need a hosts.conf
|
||||
/*
|
||||
if ( strcmp ( cmd , "trietest" ) == 0 ) {
|
||||
|
Loading…
Reference in New Issue
Block a user