open-source-search-engine/HttpServer.h

244 lines
8.1 KiB
C
Raw Normal View History

2013-08-03 00:12:24 +04:00
// Copyright Matt Wells Nov 2000
// . derived from TcpServer
// . fill in our own getMsgSize () -- looks for Content-Length:xxx
// . fill in our own getMsgPiece() -- looks on disk
// . fill in our own putMsgPiece() -- ??? for spidering big files!
// . all the shit is just a generic non-blocking i/o system
// . move data from one file/mem to another file/mem that might be remote
//
//TODO: handle SIG_PIPEs!! use sigaction() ...
//TODO: first packet should have some file in it, not just MIME hdr (avoid TCP delayed ACKS)
// TODO: what's TCP_CORK??? it delays sending a packet until it's full
// which improves performance quite a bit. unsetting TCP_CORK flushes it.
// TODO: investigate sendfile() (copies data between file descriptors)
#ifndef _HTTPSERVER_H_
#define _HTTPSERVER_H_
//#define BGCOLOR "89e3A9" // green
#define BGCOLOR "ffffff" // white
//#define BGCOLOR "d0cfc0" // gray
//#define BGCOLOR "d0d0d9" // blue gray
//#define BGCOLOR "d0cfd0" // gray
//#define BGCOLOR "d6ced6" // bluish gray
#define MAX_DOWNLOADS (MAX_TCP_SOCKS-50)
#include "TcpServer.h"
#include "Url.h"
#include "HttpRequest.h" // for parsing/forming HTTP requests
#include "HttpMime.h"
2014-06-09 23:42:05 +04:00
#define DEFAULT_HTTP_PROTO "HTTP/1.0"
2013-08-03 00:12:24 +04:00
//this is for low priority requests which come in while we are
//in a quickpoll
#define MAX_REQUEST_QUEUE 128
struct QueuedRequest {
HttpRequest m_r;
TcpSocket *m_s;
2014-11-11 01:45:11 +03:00
int32_t m_page;
2013-08-03 00:12:24 +04:00
};
typedef void (*tcp_callback_t)(void *, TcpSocket *);
2014-11-11 01:45:11 +03:00
int32_t getMsgSize ( char *buf , int32_t bufSize , TcpSocket *s );
2013-08-03 00:12:24 +04:00
bool sendPageAddEvent ( TcpSocket *s , HttpRequest *r );
class HttpServer {
public:
// reset the tcp server
void reset();
// returns false if initialization was unsuccessful
2014-11-11 01:45:11 +03:00
bool init ( int16_t port,
int16_t sslPort ,
2013-08-03 00:12:24 +04:00
void handlerWrapper ( TcpSocket *s) = NULL);
// . returns false if blocked, true otherwise
// . sets errno on error
// . supports partial gets with "offset" and "size"
// . IMPORTANT: we free read/send bufs of TcpSocket after callback
// . IMPORTANT: if you don't like this set s->m_read/sendBuf to NULL
// in your callback function
// . NOTE: this should always block unless errno is set
// . the TcpSocket's callbackData is a file ptr
// . replies MUST fit in memory (we have NOT implemented putMsgPiece())
// . uses the HTTP partial GET command if size is > 0
// . uses regular GET if size is -1
// . otherwise uses the HTTP HEAD command
// . the document will be in the s->m_readBuf/s->m_bytesRead of "s"
// . use Mime class to help parse the readBuf
// . timeout is in milliseconds since last read OR write
// . this now ensures that the read content is NULL terminated!
bool getDoc ( char *url , // Url *url ,
2014-11-11 01:45:11 +03:00
int32_t ip ,
int32_t offset ,
int32_t size ,
2013-08-03 00:12:24 +04:00
time_t ifModifiedSince ,
void *state ,
void (* callback) ( void *state , TcpSocket *s ) ,
2014-11-11 01:45:11 +03:00
int32_t timeout , // 60*1000
int32_t proxyIp ,
int16_t proxyPort,
int32_t maxTextDocLen ,
int32_t maxOtherDocLen ,
2013-08-03 00:12:24 +04:00
char *userAgent = NULL ,
//bool respectDownloadLimit = false ,
// . say HTTP/1.1 instead of 1.0 so we can communicate
// with room alert...
// . we do not support 1.1 that is why you should always
// use 1.0
2014-06-09 23:42:05 +04:00
char *proto = DEFAULT_HTTP_PROTO , // "HTTP/1.0" ,
2013-08-03 00:12:24 +04:00
bool doPost = false ,
char *cookie = NULL ,
char *additionalHeader = NULL , // does not include \r\n
// specify your own mime and post data here...
char *fullRequest = NULL ,
char *postContent = NULL ,
char *proxyUsernamePwdAuth = NULL );
2013-08-03 00:12:24 +04:00
2014-11-11 01:45:11 +03:00
bool getDoc ( int32_t ip,
int32_t port,
2013-08-03 00:12:24 +04:00
char *request,
2014-11-11 01:45:11 +03:00
int32_t requestLen,
2013-08-03 00:12:24 +04:00
void *state ,
void (* callback)( void *state , TcpSocket *s ) ,
2014-11-11 01:45:11 +03:00
int32_t timeout ,
int32_t maxTextDocLen ,
int32_t maxOtherDocLen );
2013-08-03 00:12:24 +04:00
//bool respectDownloadLimit = false );
2014-11-11 01:45:11 +03:00
bool gotDoc ( int32_t n , TcpSocket *s );
2013-08-03 00:12:24 +04:00
// just make a request with size set to 0 and it'll do a HEAD request
/*
bool getMime ( char *url ,
2014-11-11 01:45:11 +03:00
int32_t timeout ,
int32_t proxyIp ,
int16_t proxyPort ,
2013-08-03 00:12:24 +04:00
void *state ,
void (* callback) ( void *state , TcpSocket *s )) {
return getDoc (url,0,0,0,state,callback,
timeout,proxyIp,proxyPort,-1,-1); };
*/
// . this is public so requestHandlerWrapper() can call it
// . if it returns false "s" will be destroyed w/o a reply
void requestHandler ( TcpSocket *s );
// send an error reply, like "HTTP/1.1 404 Not Found"
2014-11-11 01:45:11 +03:00
bool sendErrorReply ( TcpSocket *s , int32_t error , char *errmsg ,
int32_t *bytesSent = NULL );
bool sendErrorReply ( class GigablastRequest *gr );
2014-07-07 01:13:00 +04:00
// xml and json uses this
bool sendSuccessReply ( class GigablastRequest *gr,char *addMsg=NULL);
2014-07-07 01:13:00 +04:00
bool sendSuccessReply (TcpSocket *s , char format , char *addMsg=NULL);
2013-08-03 00:12:24 +04:00
// send a "prettier" error reply, formatted in XML if necessary
2014-11-11 01:45:11 +03:00
bool sendQueryErrorReply ( TcpSocket *s , int32_t error , char *errmsg,
// FORMAT_HTML=0,FORMAT_XML,FORMAT_JSON
char format, int errnum,
2013-08-03 00:12:24 +04:00
char *content=NULL);
// these are for stopping annoying seo bots
2014-11-11 01:45:11 +03:00
void getKey ( int32_t *key, char *kname,
char *q , int32_t qlen , int32_t now , int32_t s , int32_t n ) ;
void getKeys ( int32_t *key1, int32_t *key2, char *kname1, char *kname2,
char *q , int32_t qlen , int32_t now , int32_t s , int32_t n ) ;
bool hasPermission ( int32_t ip , HttpRequest *r ,
char *q , int32_t qlen , int32_t s , int32_t n ) ;
2013-08-03 00:12:24 +04:00
// . used by the HttpPageX.h classes after making their dynamic content
// . returns false if blocked, true otherwise
// . sets errno on error
// . a cacheTime of -2 means browser should not cache when user
// is clicking forward or hitting back button OR anytime -- no cache!
// . a cacheTime of -1 means browser should not cache when user
// is clicking forward, but caching when clicking back button is ok
// . a cacheTime of 0 tells browser to use local caching rules
2014-11-11 01:45:11 +03:00
bool sendDynamicPage ( TcpSocket *s , char *page , int32_t pageLen ,
int32_t cacheTime = -1 , bool POSTReply = false ,
2013-08-03 00:12:24 +04:00
char *contentType = NULL,
2014-11-11 01:45:11 +03:00
int32_t httpStatus = -1,
2013-08-03 00:12:24 +04:00
char *cookie = NULL,
char *charset = NULL ,
HttpRequest *hr = NULL );
// for PageSockets
TcpServer *getTcp() { return &m_tcp; };
TcpServer *getSSLTcp() { return &m_ssltcp; };
// we contain our own tcp server
TcpServer m_tcp;
TcpServer m_ssltcp;
// cancel the transaction that had this state
void cancel ( void *state ) {
//void (*callback)(void *state, TcpSocket *s) ) {
m_tcp.cancel ( state );//, callback );
};
2014-11-11 01:45:11 +03:00
int32_t m_maxOpenSockets;
2013-08-03 00:12:24 +04:00
//for content-encoding: gzip, we unzip the reply and edit the
//header to reflect the new size and encoding
TcpSocket *unzipReply(TcpSocket* s);
float getCompressionRatio() {
if ( m_bytesDownloaded )
return (float)m_uncompressedBytes/m_bytesDownloaded;
else
return 0.0;
};
2013-08-03 00:12:24 +04:00
//this is for low priority requests which come in while we are
//in a quickpoll
2014-11-11 01:45:11 +03:00
bool addToQueue(TcpSocket *s, HttpRequest *r, int32_t page);
2013-08-03 00:12:24 +04:00
bool callQueuedPages();
2014-06-09 23:42:05 +04:00
bool processSquidProxyRequest ( TcpSocket *sock, HttpRequest *hr);
2013-08-03 00:12:24 +04:00
// private:
// like above but you supply the ip
2014-11-11 01:45:11 +03:00
bool sendRequest ( int32_t ip ,
int16_t port ,
2013-08-03 00:12:24 +04:00
char *request ,
void *state ,
void (* callback) ( void *state , TcpSocket *s ));
// go ahead and start sending the file ("path") over the socket
bool sendReply ( TcpSocket *s , HttpRequest *r , bool isAdmin);
bool sendReply2 ( char *mime,
2014-11-11 01:45:11 +03:00
int32_t mimeLen ,
2013-08-03 00:12:24 +04:00
char *content ,
2014-11-11 01:45:11 +03:00
int32_t contentLen ,
2013-08-03 00:12:24 +04:00
TcpSocket *s ,
bool alreadyCompressed = false ,
HttpRequest *hr = NULL) ;
void *states[MAX_DOWNLOADS];
tcp_callback_t callbacks[MAX_DOWNLOADS];
2014-10-30 22:36:39 +03:00
int64_t m_bytesDownloaded;
int64_t m_uncompressedBytes;
2013-08-03 00:12:24 +04:00
//QueuedRequest m_requestQueue[MAX_REQUEST_QUEUE];
2014-11-11 01:45:11 +03:00
//int32_t m_lastSlotUsed;
2013-08-03 00:12:24 +04:00
};
extern class HttpServer g_httpServer;
#endif