open-source-search-engine/Images.h

198 lines
4.6 KiB
C
Raw Normal View History

2013-08-03 00:12:24 +04:00
// Matt Wells, copyright Nov 2008
#ifndef _IMAGES_H_
#define _IMAGES_H_
#include "Msg0.h"
#include "Msg36.h"
#include "Msg13.h"
#include "IndexList.h"
2014-04-24 21:13:45 +04:00
#include "MsgC.h"
2014-04-27 22:05:30 +04:00
#include "SafeBuf.h"
#include "HttpRequest.h" // FORMAT_HTML
2013-08-03 00:12:24 +04:00
#define MAX_IMAGES 500
2014-04-27 22:05:30 +04:00
// a single serialized thumbnail:
class ThumbnailInfo {
public:
2014-11-11 01:45:11 +03:00
int32_t m_origDX;
int32_t m_origDY;
int32_t m_dx;
int32_t m_dy;
int32_t m_urlSize;
int32_t m_dataSize;
2014-04-27 22:05:30 +04:00
char m_buf[];
char *getUrl() { return m_buf; };
char *getData() { return m_buf + m_urlSize; };
2014-11-11 01:45:11 +03:00
int32_t getDataSize() { return m_dataSize; };
int32_t getSize () { return sizeof(ThumbnailInfo)+m_urlSize+m_dataSize;};
2014-04-27 22:05:30 +04:00
2014-05-11 01:24:13 +04:00
// make sure neither the x or y side is > maxSize
bool printThumbnailInHtml ( SafeBuf *sb ,
2014-11-11 01:45:11 +03:00
int32_t maxWidth,
int32_t maxHeight,
2014-05-11 01:24:13 +04:00
bool printLink ,
2014-11-11 01:45:11 +03:00
int32_t *newdx ,
char *style = NULL ,
char format = FORMAT_HTML ) ;
2014-04-27 22:05:30 +04:00
};
// XmlDoc::ptr_imgData is a ThumbnailArray
class ThumbnailArray {
public:
// 1st byte if format version
char m_version;
// # of thumbs
2014-11-11 01:45:11 +03:00
int32_t m_numThumbnails;
2014-04-27 22:05:30 +04:00
// list of ThumbnailInfos
char m_buf[];
2014-11-11 01:45:11 +03:00
int32_t getNumThumbnails() { return m_numThumbnails;};
2014-04-27 22:05:30 +04:00
2014-11-11 01:45:11 +03:00
ThumbnailInfo *getThumbnailInfo ( int32_t x ) {
2014-04-27 22:05:30 +04:00
if ( x >= m_numThumbnails ) return NULL;
char *p = m_buf;
2014-11-11 01:45:11 +03:00
for ( int32_t i = 0 ; i < m_numThumbnails ; i++ ) {
2014-04-27 22:05:30 +04:00
if ( i == x ) return (ThumbnailInfo *)p;
ThumbnailInfo *ti = (ThumbnailInfo *)p;
p += ti->getSize();
}
return NULL;
};
};
2013-08-03 00:12:24 +04:00
class Images {
public:
Images();
void reset();
// . hash the candidates with a gbimage: prefix
// . called by XmlDoc.cpp
// . used by Image.cpp for determining uniqueness of image
2014-11-11 01:45:11 +03:00
//bool hash ( int32_t trVersion ,
2013-08-03 00:12:24 +04:00
// class Xml *xml ,
// class Url *url ,
// class TermTable *table ,
2014-11-11 01:45:11 +03:00
// int32_t score );
2013-08-03 00:12:24 +04:00
// set the m_imageNodes[] array to the potential thumbnails
void setCandidates ( class Url *pageUrl ,
class Words *words ,
class Xml *xml ,
2014-04-24 21:13:45 +04:00
class Sections *sections ,
class XmlDoc *xd );
2013-08-03 00:12:24 +04:00
// . returns false if blocked, true otherwise
// . sets errno on error
// . "termFreq" should NOT be on the stack in case we block
// . sets *termFreq to UPPER BOUND on # of records with that "termId"
bool getThumbnail ( char *pageSite ,
2014-11-11 01:45:11 +03:00
int32_t siteLen ,
2014-10-30 22:36:39 +03:00
int64_t docId ,
2013-08-03 00:12:24 +04:00
class XmlDoc *xd ,
collnum_t collnum,
2014-04-24 21:13:45 +04:00
//char **statusPtr ,
2014-11-11 01:45:11 +03:00
int32_t hopCount,
2013-08-03 00:12:24 +04:00
void *state ,
void (*callback)(void *state) );
//char *getImageData () { return m_imgData; };
2014-11-11 01:45:11 +03:00
//int32_t getImageDataSize() { return m_imgDataSize; };
//int32_t getImageType () { return m_imageType; };
2013-08-03 00:12:24 +04:00
SafeBuf m_imageBuf;
bool m_imageBufValid;
2014-11-11 01:45:11 +03:00
int32_t m_phase;
2013-08-03 00:12:24 +04:00
bool gotTermFreq();
bool launchRequests();
void gotTermList();
bool downloadImages();
2014-04-24 21:13:45 +04:00
bool getImageIp();
bool downloadImage();
bool makeThumb();
2014-11-11 01:45:11 +03:00
char *getImageUrl ( int32_t j , int32_t *urlLen ) ;
//bool gotImage ( );
2013-08-03 00:12:24 +04:00
void thumbStart_r ( bool amThread );
2014-11-11 01:45:11 +03:00
int32_t m_i;
int32_t m_j;
2013-08-03 00:12:24 +04:00
2014-04-24 21:13:45 +04:00
class XmlDoc *m_xd;
2013-08-03 00:12:24 +04:00
// callback information
void *m_state ;
void (* m_callback)(void *state );
2014-11-11 01:45:11 +03:00
int32_t m_xysize;
2013-08-03 00:12:24 +04:00
bool m_setCalled;
2014-11-11 01:45:11 +03:00
int32_t m_errno;
int32_t m_hadError;
2013-08-03 00:12:24 +04:00
bool m_stopDownloading;
2014-04-24 21:13:45 +04:00
//char **m_statusPtr;
2013-08-03 00:12:24 +04:00
char m_statusBuf[128];
collnum_t m_collnum;
2013-08-03 00:12:24 +04:00
2014-10-30 22:36:39 +03:00
int64_t m_docId;
2013-08-03 00:12:24 +04:00
IndexList m_list;
2014-11-11 01:45:11 +03:00
int32_t m_latestIp;
2014-04-24 21:13:45 +04:00
MsgC m_msgc;
Url m_imageUrl;
2014-11-11 01:45:11 +03:00
int32_t m_numImages;
int32_t m_imageNodes[MAX_IMAGES];
2013-08-03 00:12:24 +04:00
// termids for doing gbimage:<url> lookups for uniqueness
2014-10-30 22:36:39 +03:00
int64_t m_termIds [MAX_IMAGES];
2013-08-03 00:12:24 +04:00
// for the msg0 lookup, did we have an error?
2014-11-11 01:45:11 +03:00
int32_t m_errors [MAX_IMAGES];
2013-08-03 00:12:24 +04:00
class Url *m_pageUrl;
class Xml *m_xml;
Msg13Request m_msg13Request;
// . for getting # of permalinks from same hopcount/site
// . we need at least 10 for the uniqueness test to be effective
Msg36 m_msg36;
// . for getting docids that have the image
// . for the uniqueness test
Msg0 m_msg0;
Msg13 m_msg13;
// download status
2014-11-11 01:45:11 +03:00
int32_t m_httpStatus;
2013-08-03 00:12:24 +04:00
// ptr to the image as downloaded
char *m_imgData;
2014-11-11 01:45:11 +03:00
int32_t m_imgDataSize;
int32_t m_imgType;
2013-08-03 00:12:24 +04:00
// udp slot buffer
2014-04-24 21:13:45 +04:00
char *m_imgReply;
2014-11-11 01:45:11 +03:00
int32_t m_imgReplyLen; // how many bytes the image is
int32_t m_imgReplyMaxLen; // allocated for the image
int32_t m_dx; // width of image in pixels
int32_t m_dy; // height of image in pixels
2013-08-03 00:12:24 +04:00
bool m_thumbnailValid; // is it a valid thumbnail image
// we store the thumbnail into m_imgBuf, overwriting the original img
2014-11-11 01:45:11 +03:00
int32_t m_thumbnailSize;
2013-08-03 00:12:24 +04:00
// the thumbnail dimensions
2014-11-11 01:45:11 +03:00
int32_t m_tdx;
int32_t m_tdy;
2013-08-03 00:12:24 +04:00
};
#endif