open-source-search-engine/File.h

189 lines
5.5 KiB
C
Raw Normal View History

2013-08-03 00:12:24 +04:00
// Matt Wells, Copyright May 2001
// . TODO: don't closes block us? if we have many fd's our closes might block!!
// . TODO: must we create a separate fd for each non-blocking read even if
// on the same file?????? that would save us...
// . this class simulates having 1K file descriptors.
// . by using it's open/write/read/close it will make it seem like you have 5K file descriptors
// . minimizes the # of open/closes it does.
// On my solaris ultra 1 i could do 28,000 open/close pairs per second.
// my 400mhz pentium linux box was 2.5 times faster! it only had 256 file
// descriptors to work with, while the sun box had 1024.
// the sockets must share with these so we'd like to set a maximum for each.
#ifndef _FILE_H_
#define _FILE_H_
#define MAX_FILENAME_LEN 128
// . max # of VIRTUAL file descriptors
// . man, chris has 958 files, lets crank it up from 2k to 5k
// . boost up to 50,000 since we are hitting this limit with crawlbot
#define MAX_NUM_VFDS (50*1024)
2013-08-03 00:12:24 +04:00
#include <sys/types.h> // for open/lseek
#include <sys/stat.h> // for open
#include <fcntl.h> // for open
#include <sys/stat.h> // for stat
#include "Mem.h" // for g_mem
#include "Loop.h" // for g_loop.setNonBlocking(int fd)
int64_t getFileSize ( char *filename ) ;
2013-08-03 00:12:24 +04:00
// for avoiding unlink/opens that mess up our threaded read
2014-11-11 01:45:11 +03:00
int32_t getCloseCount_r ( int fd );
2013-08-03 00:12:24 +04:00
// prevent fd from being closed on us when we are writing
2014-11-11 01:45:11 +03:00
void enterWriteMode ( int32_t vfd ) ;
void exitWriteMode ( int32_t vfd ) ;
2013-08-03 00:12:24 +04:00
// error correction routine used by BigFile.cpp
2014-11-11 01:45:11 +03:00
void releaseVfd ( int32_t vfd ) ;
int getfdFromVfd ( int32_t vfd ) ;
2013-08-03 00:12:24 +04:00
class File {
friend class BigFile;
public:
2014-11-18 05:13:36 +03:00
// along the same lines as getCloseCount_r()
2013-08-03 00:12:24 +04:00
//void incCloseCount_r ( ) ;
File ( );
~File ( );
// . if you don't need to do a full open then just set the filename
// . useful for unlink/rename/reserve/...
// . IMPORTANT: if bytes were already reserved can only increase the
// reserve, not decrease
void set ( char *dir , char *filename );
void set ( char *filename );
// returns false and sets errno on error, returns true on success
bool rename ( char *newFilename );
// if m_vfd is negative it's never been opened
bool isOpen () { return ( m_vfd >= 0 ); };
bool isNonBlocking () ;
// . get the file extension of this file
// . return NULL if none
char *getExtension ( ) ;
// uses lseek to get file's current position
2014-11-11 01:45:11 +03:00
int32_t getCurrentPos ( ) ;
2013-08-03 00:12:24 +04:00
// . open() returns true on success, false on failure, errno is set.
// . opens for reading/writing only
// . returns false if does not exist
bool open ( int flags , int permissions =
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
// . use an offset of -1 to use current file seek position
// . returns what ::read returns
// . returns -1 on lseek failure (if offset is valid)
// . returns 0 on EOF
// . returns numBytesRead if not error
// . a negative offset means current read offset
2014-11-11 01:45:11 +03:00
int read ( void *buf , int32_t size , int32_t offset );
2013-08-03 00:12:24 +04:00
// . use an offset of -1 to use current file seek position
// . returns what ::write returns
// . returns -1 on lseek failure (if offset is valid)
// . returns numBytesWritten if not error
// . this is non-blocking so may return < "numBytesToWrite"
// . a negative offset means current write offset
2014-11-11 01:45:11 +03:00
int write ( void *buf , int32_t size , int32_t offset );
2013-08-03 00:12:24 +04:00
// . this will really close this file
bool close ( );
// . flush the output
bool flush ( );
// used by threaded unlinks and renames by BigFile.cpp
bool m_closedIt;
void close1_r ();
void close2 ();
// . returns -1 on error
// . otherwise returns file size in bytes
// . returns 0 if does not exist
2015-01-21 05:12:58 +03:00
int64_t getFileSize ( );
2013-08-03 00:12:24 +04:00
// . when was it last touched?
time_t getLastModifiedTime ( );
// . returns -1 on error and sets errno
// . returns 0 if does not exist
// . returns 1 if it exists
// . a simple stat check
2014-11-11 01:45:11 +03:00
int32_t doesExist ( );
2013-08-03 00:12:24 +04:00
// . static so you don't need an instant of this class to call it
// . returns false and sets errno on error
bool unlink ( );
// . file position seeking -- just a wrapper for lseek
// . returns -1 on error
// . used by reserve/write/read/getFileSize()
2014-11-11 01:45:11 +03:00
int32_t lseek ( int32_t offset , int whence = SEEK_SET );
2013-08-03 00:12:24 +04:00
// . interface so BigFile and others can access the static member info
//char *getName ( ) ;
//int getMode ( ) ;
//int getPermissions ( ) ;
// . will try to REopen the file to get the fd if necessary
// . used by BigFile
// . returns -2 if we've never been officially opened
// . returns -1 on error getting the fd or opening this file
// . must call open() before calling this
int getfd ( ) ;
// return -1 if not opened, otherwise, return the opened fd
int getfdNoOpen ( ) ;
char *getFilename ( ) { return m_filename; };
// our filename allocated with strdup
// we publicize for ease of use
char m_filename [ MAX_FILENAME_LEN ];
// File::rename() uses this
//char m_oldFilename [ MAX_FILENAME_LEN ];
// BigFile uses these when passing us to a thread for unlink/rename
// so it can store its THIS ptr and the i in BigFile::m_files[i]
void *m_this;
2014-11-11 01:45:11 +03:00
int32_t m_i;
2013-08-03 00:12:24 +04:00
2014-11-11 01:45:11 +03:00
int32_t m_closeCount;
2013-08-03 00:12:24 +04:00
// private:
// initializes the fd pool
bool initialize ();
// free the least-used file.
bool closeLeastUsed ( );
// THIS file's VIRTUAL descriptor
int m_vfd;
// save the permission and flag sets in case of re-opening
int m_flags;
int m_permissions;
time_t m_st_mtime; // file last mod date
2014-11-11 01:45:11 +03:00
int32_t m_st_size; // file size
2013-08-03 00:12:24 +04:00
time_t getLastModifiedDate ( ) ;
};
#endif