mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
Merge branch 'ia-zak' of https://github.com/gigablast/open-source-search-engine into ia-zak
This commit is contained in:
commit
b199c67355
521
BigFile.cpp
521
BigFile.cpp
@ -36,12 +36,12 @@ BigFile::BigFile () {
|
||||
m_permissions = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ;
|
||||
m_flags = O_RDWR ; // | O_DIRECT;
|
||||
// NULLify all ptrs to files
|
||||
for ( int32_t i = 0 ; i < MAX_PART_FILES ; i++ ) m_files[i] = NULL;
|
||||
//for ( int32_t i = 0 ; i < MAX_PART_FILES ; i++ ) m_files[i] = NULL;
|
||||
m_maxParts = 0;
|
||||
m_numParts = 0;
|
||||
m_pc = NULL;
|
||||
m_vfd = -1;
|
||||
m_vfdAllowed = false;
|
||||
//m_vfdAllowed = false;
|
||||
m_fileSize = -1;
|
||||
m_lastModified = -1;
|
||||
m_numThreads = 0;
|
||||
@ -49,29 +49,57 @@ BigFile::BigFile () {
|
||||
g_lastDiskReadStarted = 0;
|
||||
g_lastDiskReadCompleted = 0;
|
||||
g_diskIsStuck = false;
|
||||
//memset ( m_littleBuf , 0 , LITTLEBUFSIZE );
|
||||
// avoid a malloc for small files.
|
||||
// this way we can save in memory RdbMaps upon a core, even malloc/free
|
||||
// related cores, cuz we won't have to do a malloc to save!
|
||||
//m_fileBuf.setBuf ( m_littleBuf,LITTLEBUFSIZE,0,false);
|
||||
// for this make the length always equal the capacity so when we
|
||||
// call reserve it builds on the whole thing
|
||||
//m_fileBuf.setLength ( m_fileBuf.getCapacity() );
|
||||
}
|
||||
|
||||
// we alternate parts into "dirname" and "stripeDir"
|
||||
// . return false and set g_errno on error
|
||||
bool BigFile::set ( char *dir , char *baseFilename , char *stripeDir ) {
|
||||
// reset filsize
|
||||
m_fileSize = -1;
|
||||
m_lastModified = -1;
|
||||
// m_baseFilename contains the "dir" in it
|
||||
//sprintf(m_baseFilename ,"%s/%s", dirname , baseFilename );
|
||||
strcpy ( m_baseFilename , baseFilename );
|
||||
strcpy ( m_dir , dir );
|
||||
if ( stripeDir ) strcpy ( m_stripeDir , stripeDir );
|
||||
else m_stripeDir[0] = '\0';
|
||||
|
||||
m_dir.reset();
|
||||
m_baseFilename.reset();
|
||||
|
||||
m_dir .setLabel("bfd");
|
||||
m_baseFilename.setLabel("bfbf");
|
||||
|
||||
// use this 32 byte char buf to avoid a malloc if possible
|
||||
m_baseFilename.setBuf (m_tmpBaseBuf,32,0,false);
|
||||
|
||||
if ( ! m_dir.safeStrcpy ( dir ) ) return false;
|
||||
if ( ! m_baseFilename.safeStrcpy ( baseFilename ) ) return false;
|
||||
|
||||
//strcpy ( m_baseFilename , baseFilename );
|
||||
//strcpy ( m_dir , dir );
|
||||
//if ( stripeDir ) strcpy ( m_stripeDir , stripeDir );
|
||||
//else m_stripeDir[0] = '\0';
|
||||
// reset # of parts
|
||||
m_numParts = 0;
|
||||
m_maxParts = 0;
|
||||
|
||||
m_filePtrsBuf.reset();
|
||||
|
||||
// now add parts from both directories
|
||||
if ( ! addParts ( m_dir ) ) return false;
|
||||
if ( ! addParts ( m_stripeDir ) ) return false;
|
||||
if ( ! addParts ( dir ) ) return false;
|
||||
//if ( ! addParts ( m_stripeDir ) ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool BigFile::reset ( ) {
|
||||
// RdbMap calls BigFile (m_file)::reset() so we need to free
|
||||
// the files and their safebufs for their filename and dir.
|
||||
close ();
|
||||
// reset filsize
|
||||
m_fileSize = -1;
|
||||
m_lastModified = -1;
|
||||
@ -82,18 +110,19 @@ bool BigFile::reset ( ) {
|
||||
//if ( stripeDir ) strcpy ( m_stripeDir , stripeDir );
|
||||
//else m_stripeDir[0] = '\0';
|
||||
// reset # of parts
|
||||
m_numParts = 0;
|
||||
m_maxParts = 0;
|
||||
//m_numParts = 0;
|
||||
//m_maxParts = 0;
|
||||
// now add parts from both directories
|
||||
if ( ! addParts ( m_dir ) ) return false;
|
||||
if ( ! addParts ( m_stripeDir ) ) return false;
|
||||
// MDW: why is this in reset() function? remove...
|
||||
//if ( ! addParts ( m_dir.getBufStart() ) ) return false;
|
||||
//if ( ! addParts ( m_stripeDir ) ) return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool BigFile::addParts ( char *dirname ) {
|
||||
// if dirname is NULL return true
|
||||
if ( ! dirname[0] ) return true;
|
||||
if ( ! dirname || ! dirname[0] ) return true;
|
||||
// . now set the names of all the Files that we consist of
|
||||
// . get the directory entry and find out what parts we have
|
||||
Dir dir;
|
||||
@ -102,9 +131,9 @@ bool BigFile::addParts ( char *dirname ) {
|
||||
if (!dir.open()) return log("disk: openDir (\"%s\") failed",dirname);
|
||||
// match files with this pattern in the directory
|
||||
char pattern[256];
|
||||
sprintf(pattern,"%s*", m_baseFilename );
|
||||
sprintf(pattern,"%s*", m_baseFilename.getBufStart() );
|
||||
// length of the base filename
|
||||
int32_t blen = gbstrlen ( m_baseFilename );
|
||||
int32_t blen = gbstrlen ( m_baseFilename.getBufStart() );
|
||||
// . set our m_files array
|
||||
// . addFile() will return false on problems
|
||||
// . the lower the fileId the older the file (w/ exception of #0)
|
||||
@ -127,12 +156,12 @@ bool BigFile::addParts ( char *dirname ) {
|
||||
}
|
||||
else part = atoi ( filename + blen + 5 );
|
||||
// ensure not too big
|
||||
if ( part >= MAX_PART_FILES ) {
|
||||
log ("disk: Part number of %"INT32" is too big for "
|
||||
"\"%s\". Should be less than %"INT32".",
|
||||
(int32_t)part,filename,(int32_t)MAX_PART_FILES);
|
||||
continue;
|
||||
}
|
||||
// if ( part >= MAX_PART_FILES ) {
|
||||
// log ("disk: Part number of %"INT32" is too big for "
|
||||
// "\"%s\". Should be less than %"INT32".",
|
||||
// (int32_t)part,filename,(int32_t)MAX_PART_FILES);
|
||||
// continue;
|
||||
// }
|
||||
// make this part file
|
||||
if ( ! addPart ( part ) ) return false;
|
||||
}
|
||||
@ -142,23 +171,78 @@ bool BigFile::addParts ( char *dirname ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// WE CAN'T REALLOC the safebuf because there might be a thread
|
||||
// referencing the file ptr. so let's just keep the m_filePtrs[] array
|
||||
// and realloc on that.
|
||||
bool BigFile::addPart ( int32_t n ) {
|
||||
if ( n >= MAX_PART_FILES )
|
||||
return log("disk: Part number %"INT32" > %"INT32".",
|
||||
n,(int32_t)MAX_PART_FILES);
|
||||
// if ( n >= MAX_PART_FILES )
|
||||
// return log("disk: Part number %"INT32" > %"INT32".",
|
||||
// n,(int32_t)MAX_PART_FILES);
|
||||
// . grow our dynamic array and return ptr to last element
|
||||
// . n's come in NOT necessarily in order!!!
|
||||
int32_t need = (n+1) * sizeof(File *);
|
||||
// capacity must be length always for this
|
||||
if ( m_filePtrsBuf.getCapacity() != m_filePtrsBuf.getLength() ) {
|
||||
char *xx=NULL;*xx=0;}
|
||||
|
||||
File *f ;
|
||||
try { f = new (File); }
|
||||
catch ( ... ) {
|
||||
g_errno = ENOMEM;
|
||||
return log("BigFile: new(%i): %s",(int)sizeof(File),
|
||||
mstrerror(g_errno));
|
||||
// init using tiny buf to save a malloc for small files
|
||||
if ( m_filePtrsBuf.getCapacity() == 0 ) {
|
||||
memset (m_tinyBuf,0,8);
|
||||
m_filePtrsBuf.setBuf ( m_tinyBuf,8,0,false);
|
||||
m_filePtrsBuf.setLength ( m_filePtrsBuf.getCapacity() );
|
||||
}
|
||||
|
||||
// how much more mem do we need?
|
||||
int32_t delta = need - m_filePtrsBuf.getLength();
|
||||
// . make sure our CAPACITY is increased by what we need
|
||||
// . SafeBuf::reserve() ADDS this much to current capacity
|
||||
// . true = clear new mem new new file ptrs are null because
|
||||
// there may be gaps or not exist because the BigFile was being
|
||||
// merged.
|
||||
if ( delta > 0 && ! m_filePtrsBuf.reserve ( delta ,"bfbuf",true ) ) {
|
||||
log("file: failed to reserve %i more mem for part",delta);
|
||||
return false;
|
||||
}
|
||||
// make length the capacity. so if buf is resized in call to
|
||||
// SafeBuf::reserve() it will copy over all of the old buf to new buf
|
||||
m_filePtrsBuf.setLength ( m_filePtrsBuf.getCapacity() );
|
||||
|
||||
File **filePtrs = (File **)m_filePtrsBuf.getBufStart();
|
||||
|
||||
//File *f = filesPtrs[n];
|
||||
// sanity to ensure we do not breach the buffer
|
||||
//char *fend = ((char *)f) + sizeof(File);
|
||||
//if ( fend > m_fileBuf.getBuf() ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// we have to call constructor ourself then
|
||||
//f->constructor();
|
||||
|
||||
File *f = NULL;
|
||||
|
||||
if ( m_numParts == 0 ) {
|
||||
f = (File *)m_littleBuf;
|
||||
if ( LITTLEBUFSIZE < sizeof(File) ) {
|
||||
log("file: littlebufsize too small.");
|
||||
char *xx=NULL;*xx=0;
|
||||
}
|
||||
f->constructor();
|
||||
}
|
||||
else {
|
||||
try { f = new (File); }
|
||||
catch ( ... ) {
|
||||
g_errno = ENOMEM;
|
||||
return log("BigFile: new(%i): %s",(int)sizeof(File),
|
||||
mstrerror(g_errno));
|
||||
}
|
||||
mnew ( f , sizeof(File) , "BigFile" );
|
||||
}
|
||||
mnew ( f , sizeof(File) , "BigFile" );
|
||||
char buf[1024];
|
||||
makeFilename_r ( m_baseFilename , NULL, n , buf );
|
||||
// make the filename for this new File class
|
||||
makeFilename_r ( m_baseFilename.getBufStart() , NULL, n , buf , 1024 );
|
||||
// and set it with that
|
||||
f->set ( buf );
|
||||
m_files [ n ] = f;
|
||||
// store the ptr to it in m_filePtrs
|
||||
filePtrs [ n ] = f;
|
||||
m_numParts++;
|
||||
// set maxPart
|
||||
if ( n+1 > m_maxParts ) m_maxParts = n+1;
|
||||
@ -171,11 +255,16 @@ bool BigFile::doesExist ( ) {
|
||||
|
||||
// if we can open it with a valid fd, then it exists
|
||||
bool BigFile::doesPartExist ( int32_t n ) {
|
||||
if ( n >= MAX_PART_FILES ) return false;
|
||||
bool exists = (bool)m_files[n];
|
||||
return exists;
|
||||
//if ( n >= MAX_PART_FILES ) return false;
|
||||
if ( n >= m_maxParts ) return false;
|
||||
// f will be null if part does not exist
|
||||
File *f = getFile2(n);
|
||||
if ( f ) return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
static int64_t s_vfd = 0;
|
||||
|
||||
// . overide File::open so we can set m_numParts
|
||||
// . set maxFileSize when opening a new file for writing and using
|
||||
// DiskPageCache
|
||||
@ -192,10 +281,10 @@ bool BigFile::open ( int flags , class DiskPageCache *pc ,
|
||||
// . this returns our "virtual fd", not the same as File::m_vfd
|
||||
// . returns -1 and sets g_errno on failure
|
||||
// . we pass m_vfd to getPages() and addPages()
|
||||
if ( m_pc ) {
|
||||
if ( maxFileSize == -1 ) maxFileSize = getFileSize();
|
||||
m_vfd = m_pc->getVfd ( maxFileSize, m_vfdAllowed );
|
||||
g_errno = 0;
|
||||
if ( m_pc && m_vfd == -1 ) {
|
||||
//if ( maxFileSize == -1 ) maxFileSize = getFileSize();
|
||||
m_vfd = ++s_vfd;
|
||||
//g_errno = 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -204,15 +293,31 @@ bool BigFile::open ( int flags , class DiskPageCache *pc ,
|
||||
void BigFile::makeFilename_r ( char *baseFilename ,
|
||||
char *baseFilenameDir ,
|
||||
int32_t n ,
|
||||
char *buf ) {
|
||||
char *dir = m_dir;
|
||||
char *buf ,
|
||||
int32_t bufSize ) {
|
||||
char *dir = m_dir.getBufStart();
|
||||
if ( baseFilenameDir && baseFilenameDir[0] ) dir = baseFilenameDir;
|
||||
int32_t r;
|
||||
// ensure we do not breach the buffer
|
||||
// int32_t dirLen = gbstrlen(dir);
|
||||
// int32_t baseLen = gbstrlen(baseFilename);
|
||||
// int32_t need = dirLen + 1 + baseLen + 1;
|
||||
// if ( need < bufSize ) { char *xx=NULL;*xx=0; }
|
||||
//static char s[1024];
|
||||
if ( (n % 2) == 0 || ! m_stripeDir[0] )
|
||||
sprintf ( buf, "%s/%s", dir , baseFilename );
|
||||
else sprintf ( buf, "%s/%s", m_stripeDir, baseFilename );
|
||||
if ( n == 0 ) return ;
|
||||
sprintf ( buf + gbstrlen(buf) , ".part%"INT32"", n );
|
||||
// if ( (n % 2) == 0 || ! m_stripeDir[0] )
|
||||
// sprintf ( buf, "%s/%s", dir , baseFilename );
|
||||
// else sprintf ( buf, "%s/%s", m_stripeDir, baseFilename );
|
||||
if ( n == 0 ) {
|
||||
r = snprintf ( buf, bufSize, "%s/%s",dir,baseFilename);
|
||||
if ( r < bufSize ) return;
|
||||
// truncation is bad
|
||||
char *xx=NULL; *xx=0;
|
||||
}
|
||||
// return if it fit into "buf"
|
||||
r = snprintf ( buf, bufSize, "%s/%s.part%"INT32,dir,baseFilename,n);
|
||||
if ( r < bufSize ) return;
|
||||
// truncation is bad
|
||||
char *xx=NULL; *xx=0;
|
||||
}
|
||||
|
||||
//int BigFile::getfdByOffset ( int64_t offset ) {
|
||||
@ -221,30 +326,34 @@ void BigFile::makeFilename_r ( char *baseFilename ,
|
||||
|
||||
// . get the fd of the nth file
|
||||
// . will try to open the file if it hasn't yet been opened
|
||||
int BigFile::getfd ( int32_t n , bool forReading , int32_t *vfd ) {
|
||||
int BigFile::getfd ( int32_t n , bool forReading ) { // , int64_t *vfd ) {
|
||||
|
||||
// boundary check
|
||||
if ( n >= MAX_PART_FILES )
|
||||
return log("disk: Part number %"INT32" > %"INT32". fd not available.",
|
||||
n,(int32_t)MAX_PART_FILES) - 1;
|
||||
if ( n >= m_maxParts && ! addPart ( n ) ) {
|
||||
log("disk: Part number %"INT32" > %"INT32". fd "
|
||||
"not available.",
|
||||
n,m_maxParts);
|
||||
// return -1 to indicate can't do it
|
||||
return -1;
|
||||
}
|
||||
|
||||
// get the File ptr from the table
|
||||
File *f = m_files[n];
|
||||
// if part does not exist then create it!
|
||||
File *f = getFile2(n);
|
||||
// if part does not exist then create it! addPart(n) will do that?
|
||||
if ( ! f ) {
|
||||
// don't create File if we're getting it for reading
|
||||
if ( forReading ) return -1;
|
||||
if ( ! addPart (n) ) return -1;
|
||||
f = m_files[n];
|
||||
}
|
||||
// open it if not opened
|
||||
if ( ! f->isOpen() ) {
|
||||
if ( ! f->calledOpen() ) {
|
||||
if ( ! f->open ( m_flags , m_permissions ) ) {
|
||||
log("disk: Failed to open file part #%"INT32".",n);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
// set it virtual fd, too
|
||||
if ( vfd ) *vfd = f->m_vfd;
|
||||
//if ( vfd ) *vfd = f->m_vfd;
|
||||
// get it's file descriptor
|
||||
int fd = f->getfd ( ) ;
|
||||
if ( fd >= -1 ) return fd;
|
||||
@ -264,11 +373,18 @@ int64_t BigFile::getFileSize ( ) {
|
||||
// add up the sizes of each file
|
||||
int64_t totalSize = 0;
|
||||
for ( int32_t n = 0 ; n < m_maxParts ; n++ ) {
|
||||
// we can have headless big files... count the heads
|
||||
if ( ! m_files[n] ) { totalSize += MAX_PART_SIZE; continue; }
|
||||
// shortcut
|
||||
File *f = getFile2(n);
|
||||
// we can have headless big files... count the heads.
|
||||
// this can happen if the first Files were deleted because
|
||||
// of an ongoing merge operation.
|
||||
if ( ! f ) {
|
||||
totalSize += MAX_PART_SIZE;
|
||||
continue;
|
||||
}
|
||||
// . returns -2 on error, -1 if does not exist
|
||||
// . TODO: it returns 0 if does not exist! FIX...
|
||||
int32_t size = m_files[n]->getFileSize();
|
||||
int32_t size = f->getFileSize();
|
||||
if ( size == -2 ) return -2;
|
||||
if ( size == -1 ) break;
|
||||
totalSize += size;
|
||||
@ -288,10 +404,12 @@ time_t BigFile::getLastModifiedTime ( ) {
|
||||
// add up the sizes of each file
|
||||
time_t min = -1;
|
||||
for ( int32_t n = 0 ; n < m_maxParts ; n++ ) {
|
||||
// shortcut
|
||||
File *f = getFile2(n);
|
||||
// we can have headless big files... count the heads
|
||||
if ( ! m_files[n] ) continue;
|
||||
if ( ! f ) continue;
|
||||
// returns -1 on error, 0 if file does not exist
|
||||
time_t date = m_files[n]->getLastModifiedTime();
|
||||
time_t date = f->getLastModifiedTime();
|
||||
if ( date == -1 ) return -2;
|
||||
if ( date == 0 ) break;
|
||||
// check min
|
||||
@ -388,7 +506,7 @@ bool BigFile::readwrite ( void *buf ,
|
||||
log(LOG_LOGIC,"disk: readwrite() offset is %"INT64" "
|
||||
"< 0. filename=%s/%s. dumping core. try deleting "
|
||||
"the .map file for it and restarting.",offset,
|
||||
m_dir,m_baseFilename);
|
||||
m_dir.getBufStart(),m_baseFilename.getBufStart());
|
||||
char *xx = NULL; *xx = 0;
|
||||
}
|
||||
// if we're not blocking use a fake fstate
|
||||
@ -406,29 +524,32 @@ bool BigFile::readwrite ( void *buf ,
|
||||
int32_t allocSize;
|
||||
// reset this
|
||||
fstate->m_errno = 0;
|
||||
fstate->m_inPageCache = false;
|
||||
// . try to get as much as we can from page cache first
|
||||
// . the vfd of the big file will be the vfd of its last File class
|
||||
if ( ! doWrite && m_pc && allowPageCache ) {
|
||||
int32_t oldOff = offset;
|
||||
//int32_t oldOff = offset;
|
||||
// we have to set these so RdbScan doesn't freak out if we
|
||||
// have it all cached and return without hitting disk
|
||||
fstate->m_bytesDone = size;
|
||||
fstate->m_bytesToGo = size;
|
||||
// sanity
|
||||
if ( m_vfd == -1 ) { char *xx=NULL;*xx=0; }
|
||||
//log("getting pages off=%"INT64" size=%"INT32"",offset,size);
|
||||
// now we pass in a ptr to the buf ptr, because if buf is NULL
|
||||
// this will allocate one for us if it has some pages in the
|
||||
// cache that we can use.
|
||||
m_pc->getPages (m_vfd,(char **)&buf,size,offset,&size,&offset,
|
||||
&allocBuf,&allocSize,allocOff);
|
||||
char *readBuf = m_pc->getPages ( m_vfd, offset, size );
|
||||
//log("got pages off=%"INT64" size=%"INT32"",offset,size);
|
||||
bufOff = offset - oldOff;
|
||||
//bufOff = offset - oldOff;
|
||||
// comment out for test
|
||||
if ( size == 0 ) {
|
||||
if ( readBuf ) {
|
||||
// let caller/RdbScan know about the newly alloc'd buf
|
||||
fstate->m_buf = (char *)buf;
|
||||
fstate->m_allocBuf = allocBuf;
|
||||
fstate->m_allocSize = allocSize;
|
||||
fstate->m_allocOff = allocOff;
|
||||
fstate->m_buf = (char *)readBuf;
|
||||
fstate->m_allocBuf = readBuf;
|
||||
fstate->m_allocSize = size;
|
||||
fstate->m_allocOff = 0;
|
||||
fstate->m_inPageCache = true;
|
||||
return true;
|
||||
}
|
||||
// check
|
||||
@ -494,8 +615,8 @@ bool BigFile::readwrite ( void *buf ,
|
||||
// &fstate->m_vfd2);
|
||||
fstate->m_fd1 = -3;
|
||||
fstate->m_fd2 = -3;
|
||||
fstate->m_vfd1 = -3;
|
||||
fstate->m_vfd2 = -3;
|
||||
// fstate->m_vfd1 = -3;
|
||||
// fstate->m_vfd2 = -3;
|
||||
// . if we are writing, prevent these fds from being closed on us
|
||||
// by File::closedLeastUsed(), because the fd could then be re-opened
|
||||
// by someone else doing a write and we end up writing to THAT FILE!
|
||||
@ -504,14 +625,12 @@ bool BigFile::readwrite ( void *buf ,
|
||||
if ( doWrite ) {
|
||||
// actually have to do the open here for writing so it
|
||||
// can prevent the fds from being closed on us
|
||||
fstate->m_fd1 = getfd ( fstate->m_filenum1 , !doWrite,
|
||||
&fstate->m_vfd1);
|
||||
fstate->m_fd2 = getfd ( fstate->m_filenum2 , !doWrite,
|
||||
&fstate->m_vfd2);
|
||||
fstate->m_fd1 = getfd ( fstate->m_filenum1 , !doWrite);
|
||||
fstate->m_fd2 = getfd ( fstate->m_filenum2 , !doWrite);
|
||||
//File *f1 = m_files [ fstate->m_filenum1 ];
|
||||
//File *f2 = m_files [ fstate->m_filenum2 ];
|
||||
enterWriteMode( fstate->m_vfd1 );
|
||||
enterWriteMode( fstate->m_vfd2 );
|
||||
enterWriteMode( fstate->m_fd1 );
|
||||
enterWriteMode( fstate->m_fd2 );
|
||||
fstate->m_closeCount1 = getCloseCount_r ( fstate->m_fd1 );
|
||||
fstate->m_closeCount2 = getCloseCount_r ( fstate->m_fd2 );
|
||||
}
|
||||
@ -603,10 +722,8 @@ bool BigFile::readwrite ( void *buf ,
|
||||
// come here if we haven't spawned a thread
|
||||
skipThread:
|
||||
// if there was no room in the thread queue, then we must do this here
|
||||
fstate->m_fd1 = getfd ( fstate->m_filenum1 , !doWrite ,
|
||||
&fstate->m_vfd1);
|
||||
fstate->m_fd2 = getfd ( fstate->m_filenum2 , !doWrite ,
|
||||
&fstate->m_vfd2);
|
||||
fstate->m_fd1 = getfd ( fstate->m_filenum1 , !doWrite );
|
||||
fstate->m_fd2 = getfd ( fstate->m_filenum2 , !doWrite );
|
||||
fstate->m_closeCount1 = getCloseCount_r ( fstate->m_fd1 );
|
||||
fstate->m_closeCount2 = getCloseCount_r ( fstate->m_fd2 );
|
||||
// clear g_errno from the failed thread spawn
|
||||
@ -720,8 +837,8 @@ bool BigFile::readwrite ( void *buf ,
|
||||
//File *f2 = m_files [ fstate->m_filenum2 ];
|
||||
//f1->exitWriteMode();
|
||||
//f2->exitWriteMode();
|
||||
exitWriteMode( fstate->m_vfd1 );
|
||||
exitWriteMode( fstate->m_vfd2 );
|
||||
exitWriteMode( fstate->m_fd1 );
|
||||
exitWriteMode( fstate->m_fd2 );
|
||||
}
|
||||
|
||||
// set this up here
|
||||
@ -765,9 +882,9 @@ bool BigFile::readwrite ( void *buf ,
|
||||
// store read/written pages into page cache
|
||||
if ( ! g_errno && fstate->m_pc )
|
||||
fstate->m_pc->addPages ( fstate->m_vfd ,
|
||||
fstate->m_buf ,
|
||||
fstate->m_bytesDone ,
|
||||
fstate->m_offset ,
|
||||
fstate->m_bytesDone ,
|
||||
fstate->m_buf ,
|
||||
fstate->m_niceness );
|
||||
// now log our stuff here
|
||||
if ( g_errno && g_errno != EBADENGINEER )
|
||||
@ -823,8 +940,8 @@ void doneWrapper ( void *state , ThreadEntry *t ) {
|
||||
//File *f2 = THIS->m_files [ fstate->m_filenum2 ];
|
||||
//f1->exitWriteMode();
|
||||
//f2->exitWriteMode();
|
||||
exitWriteMode( fstate->m_vfd1 );
|
||||
exitWriteMode( fstate->m_vfd2 );
|
||||
exitWriteMode( fstate->m_fd1 );
|
||||
exitWriteMode( fstate->m_fd2 );
|
||||
}
|
||||
// if it read less than 8MB/s bitch
|
||||
int64_t took = fstate->m_doneTime - fstate->m_startTime;
|
||||
@ -849,9 +966,9 @@ void doneWrapper ( void *state , ThreadEntry *t ) {
|
||||
// reference it...
|
||||
if ( ! g_errno && fstate->m_pc )
|
||||
fstate->m_pc->addPages ( fstate->m_vfd ,
|
||||
fstate->m_buf ,
|
||||
fstate->m_bytesDone ,
|
||||
fstate->m_offset ,
|
||||
fstate->m_bytesDone ,
|
||||
fstate->m_buf ,
|
||||
fstate->m_niceness );
|
||||
|
||||
// add the stat
|
||||
@ -908,12 +1025,13 @@ void doneWrapper ( void *state , ThreadEntry *t ) {
|
||||
if ( g_errno && g_errno != EDISKSTUCK ) {
|
||||
//int fd1 = fstate->m_fd1;
|
||||
//int fd2 = fstate->m_fd2;
|
||||
int vfd1 = fstate->m_vfd1;
|
||||
int vfd2 = fstate->m_vfd2;
|
||||
int ofd1 = getfdFromVfd(vfd1);
|
||||
int ofd2 = getfdFromVfd(vfd2);
|
||||
log(tt,"disk: vfd1=%i s_fds[%i]=%i.",vfd1,vfd1,ofd1);
|
||||
log(tt,"disk: vfd2=%i s_fds[%i]=%i.",vfd2,vfd2,ofd2);
|
||||
//int vfd1 = fstate->m_vfd1;
|
||||
//int vfd2 = fstate->m_vfd2;
|
||||
//int ofd1 = getfdFromVfd(vfd1);
|
||||
//int ofd2 = getfdFromVfd(vfd2);
|
||||
//log(tt,"disk: vfd1=%i s_fds[%i].",vfd1,vfd1);//,ofd1);
|
||||
//log(tt,"disk: vfd2=%i s_fds[%i].",vfd2,vfd2);//,ofd2);
|
||||
log("disk: nondstuckerr=%s",mstrerror(g_errno));
|
||||
}
|
||||
// . this EBADENGINEER can happen right after a merge if
|
||||
// the file is renamed because the fd may have changed from
|
||||
@ -1005,13 +1123,14 @@ void *readwriteWrapper_r ( void *state , ThreadEntry *t ) {
|
||||
//pthread_testcancel();
|
||||
|
||||
// get the two files
|
||||
File *f1 = NULL;
|
||||
File *f2 = NULL;
|
||||
// when we exit, m_this is invalid!!!
|
||||
if ( fstate->m_filenum1 < fstate->m_this->m_maxParts )
|
||||
f1 = fstate->m_this->m_files[fstate->m_filenum1];
|
||||
if ( fstate->m_filenum2 < fstate->m_this->m_maxParts )
|
||||
f2 = fstate->m_this->m_files[fstate->m_filenum2];
|
||||
// mdw: no we can't access bigfile it might be deleted!
|
||||
// File *f1 = NULL;
|
||||
// File *f2 = NULL;
|
||||
// // when we exit, m_this is invalid!!!
|
||||
// if ( fstate->m_filenum1 < fstate->m_this->m_maxParts )
|
||||
// f1 = fstate->m_this->getFile2(fstate->m_filenum1);
|
||||
// if ( fstate->m_filenum2 < fstate->m_this->m_maxParts )
|
||||
// f2 = fstate->m_this->getFile2(fstate->m_filenum2);
|
||||
|
||||
// . if open count changed on us our file got unlinked from under us
|
||||
// and another file was opened with that same fd!!!
|
||||
@ -1025,16 +1144,20 @@ void *readwriteWrapper_r ( void *state , ThreadEntry *t ) {
|
||||
// i saw this happen on gk153... i preserved the core/gb on there
|
||||
//if ( (getCloseCount_r (fstate->m_fd1) != fstate->m_closeCount1 ||
|
||||
// getCloseCount_r (fstate->m_fd2) != fstate->m_closeCount2 )) {
|
||||
if ( ! f1 ||
|
||||
! f2 ||
|
||||
f1->m_closeCount != fstate->m_closeCount1 ||
|
||||
f2->m_closeCount != fstate->m_closeCount2 ) {
|
||||
|
||||
int32_t cc1 = -1;
|
||||
int32_t cc2 = -1;
|
||||
if ( f1 ) cc1 = f1->m_closeCount;
|
||||
if ( f2 ) cc2 = f2->m_closeCount;
|
||||
log("file: c1a=%"INT32" c1b=%"INT32" c2a=%"INT32" c2b=%"INT32"",
|
||||
// get current close counts. we can't access BigFile because it
|
||||
// might have been deleted or closed on us, i saw this before.
|
||||
int32_t cc1 = getCloseCount_r ( fstate->m_fd1 );
|
||||
int32_t cc2 = getCloseCount_r ( fstate->m_fd2 );
|
||||
if ( //! f1 ||
|
||||
//! f2 ||
|
||||
cc1 != fstate->m_closeCount1 ||
|
||||
cc2 != fstate->m_closeCount2 ) {
|
||||
// int32_t cc1 = -1;
|
||||
// int32_t cc2 = -1;
|
||||
// if ( f1 ) cc1 = f1->m_closeCount;
|
||||
// if ( f2 ) cc2 = f2->m_closeCount;
|
||||
log("file: c1a=%"INT32" c1b=%"INT32" "
|
||||
"c2a=%"INT32" c2b=%"INT32"",
|
||||
cc1,fstate->m_closeCount1,
|
||||
cc2,fstate->m_closeCount2);
|
||||
|
||||
@ -1171,6 +1294,36 @@ bool readwrite_r ( FileState *fstate , ThreadEntry *t ) {
|
||||
if ( doWrite ) n = pwrite ( fd , p , len , localOffset );
|
||||
else n = pread ( fd , p , len , localOffset );
|
||||
|
||||
// debug msg
|
||||
if ( g_conf.m_logDebugDisk ) {
|
||||
char *s = "read";
|
||||
if ( fstate->m_doWrite ) s = "wrote";
|
||||
char *t = "no"; // are we blocking?
|
||||
if ( fstate->m_flags & O_NONBLOCK ) t = "yes";
|
||||
// this is bad for real-time threads cuz our unlink() routine
|
||||
// may have been called by RdbMerge and our m_files may be
|
||||
// altered
|
||||
// MDW: don't access m_this in case bigfile was deleted
|
||||
// since we are in a thread
|
||||
log("disk::readwrite: %s %i bytes of %i @ offset %i "
|
||||
//"from BASEfile=%s "
|
||||
"(nonBlock=%s) "
|
||||
"fd %i "
|
||||
"cc1=%i=?%i cc2=%i=?%i errno=%s",
|
||||
s,n,len,localOffset,
|
||||
//fstate->m_this->getFilename(),
|
||||
t,
|
||||
fd,
|
||||
(int)fstate->m_closeCount1 ,
|
||||
(int)getCloseCount_r ( fstate->m_fd1 ) ,
|
||||
(int)fstate->m_closeCount2 ,
|
||||
(int)getCloseCount_r ( fstate->m_fd2 ) ,
|
||||
mstrerror(errno) );
|
||||
//log("disk::readwrite_r: %s %"INT32" bytes (nonBlock=%s)",
|
||||
//s,n,t);
|
||||
//log("disk::readwrite_r: did %"INT32" bytes", n);
|
||||
}
|
||||
|
||||
// interrupted system call?
|
||||
if ( n < 0 && errno == EINTR )
|
||||
goto retry25;
|
||||
@ -1178,28 +1331,20 @@ bool readwrite_r ( FileState *fstate , ThreadEntry *t ) {
|
||||
// this is thread safe...
|
||||
g_lastDiskReadCompleted = g_now; // gettimeofdayInMilliseconds_r();
|
||||
|
||||
// debug msg
|
||||
//char *s = "read";
|
||||
//if ( fstate->m_doWrite ) s = "wrote";
|
||||
//char *t = "no"; // are we blocking?
|
||||
//if ( fstate->m_this->getFlags() & O_NONBLOCK ) t = "yes";
|
||||
// this is bad for real-time threads cuz our unlink() routine may
|
||||
// have been called by RdbMerge and our m_files may be altered
|
||||
//log("disk::readwrite: %s %"INT32" bytes from %s(nonBlock=%s)",s,n,
|
||||
// m_files[filenum]->getFilename(),t);
|
||||
//log("disk::readwrite_r: %s %"INT32" bytes (nonBlock=%s)", s,n,t);
|
||||
//log("disk::readwrite_r: did %"INT32" bytes", n);
|
||||
|
||||
// . if n is 0 that's strange!!
|
||||
// . i think the fd will have been closed and re-opened on us if this
|
||||
// happens... usually
|
||||
if (n==0 && len > 0 ) {
|
||||
log("disk: Read of %"INT32" bytes at offset %"INT64" for %s "
|
||||
"failed because file is too int16_t for that "
|
||||
// MDW: don't access m_this in case bigfile was deleted
|
||||
// since we are in a thread
|
||||
log("disk: Read of %"INT32" bytes at offset %"INT64" "
|
||||
" failed because file is too short for that "
|
||||
"offset? Our fd was probably stolen from us by another "
|
||||
"thread. Will retry. error=%s.",
|
||||
(int32_t)len,fstate->m_offset,
|
||||
fstate->m_this->getFilename(),mstrerror(errno));
|
||||
//fstate->m_this->getDir(),
|
||||
//fstate->m_this->getFilename(),
|
||||
mstrerror(errno));
|
||||
errno = EBADENGINEER;
|
||||
return false; // log("disk::read/write: offset too big");
|
||||
}
|
||||
@ -1259,7 +1404,7 @@ bool BigFile::unlink ( ) {
|
||||
}
|
||||
|
||||
bool BigFile::move ( char *newDir ) {
|
||||
return rename ( m_baseFilename , newDir );
|
||||
return rename ( m_baseFilename.getBufStart() , newDir );
|
||||
}
|
||||
|
||||
bool BigFile::rename ( char *newBaseFilename , char *newBaseFilenameDir ) {
|
||||
@ -1330,6 +1475,21 @@ bool BigFile::unlinkRename ( // non-NULL for renames, NULL for unlinks
|
||||
// into the trash subdir, so we must preserve the full path
|
||||
char *s ;
|
||||
while( (s=strchr(newBaseFilename,'/'))) newBaseFilename = s+1;
|
||||
|
||||
// now this is dynamic to save mem when we have 100,000+ files
|
||||
m_newBaseFilename .reset();
|
||||
m_newBaseFilenameDir.reset();
|
||||
|
||||
m_newBaseFilename .setLabel("nbfn");
|
||||
m_newBaseFilenameDir.setLabel("nbfnd");
|
||||
|
||||
if ( ! m_newBaseFilename.safeStrcpy ( newBaseFilename ) )
|
||||
return false;
|
||||
if ( ! m_newBaseFilenameDir.safeStrcpy ( newBaseFilenameDir ) )
|
||||
return false;
|
||||
// in case newBaseFilenameDir was NULL
|
||||
m_newBaseFilenameDir.nullTerm();
|
||||
|
||||
// close all files -- they close themselves when we call rename
|
||||
// close ();
|
||||
// . set a new base filename for us
|
||||
@ -1338,12 +1498,12 @@ bool BigFile::unlinkRename ( // non-NULL for renames, NULL for unlinks
|
||||
// done (doneWrapper) it will call File::set.
|
||||
// . when all renames have completed then
|
||||
// m_bigFile::m_baseFilename will be set to m_newBaseFilename
|
||||
strcpy ( m_newBaseFilename , newBaseFilename );
|
||||
//strcpy ( m_newBaseFilename , newBaseFilename );
|
||||
// save this guy
|
||||
if ( newBaseFilenameDir )
|
||||
strcpy ( m_newBaseFilenameDir , newBaseFilenameDir );
|
||||
else
|
||||
m_newBaseFilenameDir[0] = '\0';
|
||||
//if ( newBaseFilenameDir )
|
||||
// strcpy ( m_newBaseFilenameDir , newBaseFilenameDir );
|
||||
//else
|
||||
// m_newBaseFilenameDir[0] = '\0';
|
||||
// set the op flag
|
||||
m_isUnlink = false;
|
||||
}
|
||||
@ -1371,7 +1531,7 @@ bool BigFile::unlinkRename ( // non-NULL for renames, NULL for unlinks
|
||||
// break out if we should only unlink one part
|
||||
if ( m_part >= 0 && i != m_part ) break;
|
||||
// get the ith file to rename/unlink
|
||||
File *f = m_files[i];
|
||||
File *f = getFile2(i);
|
||||
if ( ! f ) {
|
||||
// one less part to do
|
||||
m_partsRemaining--;
|
||||
@ -1442,7 +1602,8 @@ bool BigFile::unlinkRename ( // non-NULL for renames, NULL for unlinks
|
||||
if ( m_isUnlink && part == -1 ) {
|
||||
// release it first, cuz the removeThreads() below
|
||||
// may call QUICKPOLL() and we end up reading from same file!
|
||||
if ( m_pc ) m_pc->rmVfd ( m_vfd );
|
||||
// this is no longer needed since we use rdbcache basically now
|
||||
//if ( m_pc ) m_pc->rmVfd ( m_vfd );
|
||||
// remove all queued threads that point to us that have not
|
||||
// yet been launched
|
||||
g_threads.m_threadQueues[DISK_THREAD].removeThreads(this);
|
||||
@ -1452,7 +1613,9 @@ bool BigFile::unlinkRename ( // non-NULL for renames, NULL for unlinks
|
||||
// if one blocked, we block, but never return false if !useThread
|
||||
if ( m_numThreads > 0 && useThread ) return false;
|
||||
// . if we launched no threads update OUR base filename right now
|
||||
if ( ! m_isUnlink ) strcpy ( m_baseFilename , m_newBaseFilename );
|
||||
//if ( ! m_isUnlink ) strcpy ( m_baseFilename , m_newBaseFilename );
|
||||
if ( ! m_isUnlink )
|
||||
m_baseFilename.set ( m_newBaseFilename.getBufStart() );
|
||||
// we did not block
|
||||
return true;
|
||||
}
|
||||
@ -1473,15 +1636,17 @@ void *renameWrapper_r ( void *state , ThreadEntry *t ) {
|
||||
// . get the new full name for this file
|
||||
// . based on m_dir/m_stripeDir and m_baseFilename
|
||||
char newFilename [ 1024 ];
|
||||
THIS->makeFilename_r ( THIS->m_newBaseFilename ,
|
||||
THIS->m_newBaseFilenameDir ,
|
||||
THIS->makeFilename_r ( THIS->m_newBaseFilename.getBufStart() ,
|
||||
THIS->m_newBaseFilenameDir.getBufStart() ,
|
||||
i ,
|
||||
newFilename );
|
||||
newFilename ,
|
||||
1024 );
|
||||
char oldFilename [ 1024 ];
|
||||
THIS->makeFilename_r ( THIS->m_baseFilename ,
|
||||
THIS->makeFilename_r ( THIS->m_baseFilename.getBufStart() ,
|
||||
NULL ,
|
||||
i ,
|
||||
oldFilename );
|
||||
oldFilename ,
|
||||
1024 );
|
||||
//if ( m_files[i]->rename ( newFilename ) ) continue;
|
||||
// this returns 0 on success
|
||||
if ( ::rename ( oldFilename , newFilename ) ) {
|
||||
@ -1552,15 +1717,17 @@ void doneRenameWrapper ( void *state , ThreadEntry *t ) {
|
||||
THIS->getFilename(),mstrerror(g_errno));
|
||||
// get the ith file we just unlinked
|
||||
int32_t i = f->m_i;
|
||||
File *fi = THIS->getFile2 ( i );
|
||||
// rename the part if it checks out
|
||||
if ( f == THIS->m_files[i] ) {
|
||||
if ( f == fi ) {
|
||||
// set his new name
|
||||
char newFilename [ 1024 ];
|
||||
THIS->makeFilename_r ( THIS->m_newBaseFilename,
|
||||
THIS->m_newBaseFilenameDir,
|
||||
i,
|
||||
newFilename);
|
||||
THIS->m_files[i]->set ( newFilename );
|
||||
THIS->makeFilename_r (THIS->m_newBaseFilename.getBufStart(),
|
||||
THIS->m_newBaseFilenameDir.getBufStart(),
|
||||
i,
|
||||
newFilename ,
|
||||
1024 );
|
||||
fi->set ( newFilename );
|
||||
}
|
||||
// otherwise bitch about it
|
||||
else log(LOG_LOGIC,"disk: Rename had bad file ptr.");
|
||||
@ -1571,7 +1738,10 @@ void doneRenameWrapper ( void *state , ThreadEntry *t ) {
|
||||
// return if more to do
|
||||
if ( THIS->m_partsRemaining > 0 ) return;
|
||||
// update OUR base filename now after all Files are renamed
|
||||
strcpy ( THIS->m_baseFilename , THIS->m_newBaseFilename );
|
||||
//strcpy ( THIS->m_baseFilename , THIS->m_newBaseFilename );
|
||||
THIS->m_baseFilename.reset();
|
||||
THIS->m_baseFilename.setLabel("nbfnn");
|
||||
THIS->m_baseFilename.safeStrcpy(THIS->m_newBaseFilename.getBufStart());
|
||||
// . all done, call the main callback
|
||||
// . this is NULL if we were not called in a thread
|
||||
if ( THIS->m_callback ) THIS->m_callback ( THIS->m_state );
|
||||
@ -1595,7 +1765,8 @@ void doneUnlinkWrapper ( void *state , ThreadEntry *t ) {
|
||||
int32_t i = f->m_i;
|
||||
// . remove the part if it checks out
|
||||
// . this will also close the file when it deletes it
|
||||
if ( f == THIS->m_files[i] ) THIS->removePart ( i );
|
||||
File *fi = THIS->getFile2(i);
|
||||
if ( f == fi ) THIS->removePart ( i );
|
||||
// otherwise bitch about it
|
||||
else log(LOG_LOGIC,"disk: Unlink had bad file ptr.");
|
||||
// bail if more to do
|
||||
@ -1608,22 +1779,26 @@ void doneUnlinkWrapper ( void *state , ThreadEntry *t ) {
|
||||
}
|
||||
|
||||
void BigFile::removePart ( int32_t i ) {
|
||||
|
||||
File *f = m_files[i];
|
||||
//File *f = getFile2(i);
|
||||
File **filePtrs = (File **)m_filePtrsBuf.getBufStart();
|
||||
File *f = filePtrs[i];
|
||||
// . thread should have stored the filename for unlinking
|
||||
// . now delete it from memory
|
||||
//f->destructor();
|
||||
mdelete ( f , sizeof(File) , "BigFile" );
|
||||
delete (f);
|
||||
// and clear from our table
|
||||
m_files[i] = NULL;
|
||||
filePtrs[i] = NULL;
|
||||
// we have one less part
|
||||
m_numParts--;
|
||||
// max part num may be different
|
||||
if ( m_maxParts != i+1 ) return;
|
||||
// set m_maxParts
|
||||
int32_t j;
|
||||
for ( j = i ; j >= 0 ; j-- )
|
||||
if ( m_files[j] ) { m_maxParts = j+1; break; }
|
||||
for ( j = i ; j >= 0 ; j-- ) {
|
||||
File *fj = filePtrs[j];
|
||||
if ( fj ) { m_maxParts = j+1; break; }
|
||||
}
|
||||
// may have no more part files left which means no max part num
|
||||
if ( j < 0 ) m_maxParts = 0;
|
||||
}
|
||||
@ -1633,8 +1808,9 @@ void BigFile::removePart ( int32_t i ) {
|
||||
// doesn't work.
|
||||
bool BigFile::closeFds ( ) {
|
||||
for ( int32_t i = 0 ; i < m_maxParts ; i++ ) {
|
||||
if ( ! m_files[i] ) continue;
|
||||
m_files[i]->close();
|
||||
File *f = getFile2(i);
|
||||
if ( ! f ) continue;
|
||||
f->close();
|
||||
}
|
||||
return true;
|
||||
}
|
||||
@ -1645,28 +1821,41 @@ bool BigFile::close ( ) {
|
||||
// this end up being called again through a sequence of like 20
|
||||
// subroutines, so put a stop to that circle
|
||||
m_isClosing = true;
|
||||
File **filePtrs = (File **)m_filePtrsBuf.getBufStart();
|
||||
for ( int32_t i = 0 ; i < m_maxParts ; i++ ) {
|
||||
if ( ! m_files[i] ) continue;
|
||||
m_files[i]->close();
|
||||
mdelete ( m_files[i] , sizeof(File) , "BigFile" );
|
||||
delete (m_files[i]);
|
||||
m_files[i] = NULL;
|
||||
File *f = filePtrs[i];
|
||||
if ( ! f ) continue;
|
||||
// remove from our array of File ptrs
|
||||
filePtrs[i] = NULL;
|
||||
// the destructor calls close, no need to call here
|
||||
//f->close();
|
||||
//f->destructor();
|
||||
// if we were using the stack buf in BigFile then just
|
||||
// call File::destructor()
|
||||
if ( f == (File *)m_littleBuf ) {
|
||||
f->destructor();
|
||||
continue;
|
||||
}
|
||||
// otherwise, delete as we normally would
|
||||
mdelete ( f , sizeof(File) , "BigFile" );
|
||||
delete ( f );
|
||||
}
|
||||
m_numParts = 0;
|
||||
m_maxParts = 0;
|
||||
|
||||
// save vfd and pc because removeThreads() actually ends up calling
|
||||
// the done wrapper, sending back an error reply, shutting down the
|
||||
// udp server, calling main.cpp::resetAll(), which resets the Rdb and
|
||||
// free this big file
|
||||
DiskPageCache *pc = m_pc;
|
||||
int32_t vfd = m_vfd;
|
||||
//DiskPageCache *pc = m_pc;
|
||||
//int32_t vfd = m_vfd;
|
||||
|
||||
// remove all queued threads that point to us that have not
|
||||
// yet been launched
|
||||
g_threads.m_threadQueues[DISK_THREAD].removeThreads(this);
|
||||
// release our pages from the DiskPageCache
|
||||
//if ( m_pc ) m_pc->rmVfd ( m_vfd );
|
||||
if ( pc ) pc->rmVfd ( vfd );
|
||||
//if ( pc ) pc->rmVfd ( vfd );
|
||||
return true;
|
||||
}
|
||||
|
||||
|
73
BigFile.h
73
BigFile.h
@ -23,10 +23,16 @@ ssize_t gbpwrite(int fd, const void *buf, size_t count, off_t offset);
|
||||
//#define MAX_PART_SIZE (32LL*1024LL*1024LL)
|
||||
|
||||
// have enough part files to do a 2048gig file
|
||||
#define MAX_PART_FILES (((2048LL*1000LL*1000LL*1000LL)/MAX_PART_SIZE)+1LL)
|
||||
//#define MAX_PART_FILES (((2048LL*1000LL*1000LL*1000LL)/MAX_PART_SIZE)+1LL)
|
||||
|
||||
// HACK to save mem. support a 128GB file
|
||||
//#define MAX_PART_FILES (((128LL*1000LL*1000LL*1000LL)/MAX_PART_SIZE)+1LL)
|
||||
|
||||
// debug define
|
||||
//#define MAX_PART_FILES 100
|
||||
|
||||
#define LITTLEBUFSIZE 210
|
||||
|
||||
// use this state class for doing non-blocking reads/writes
|
||||
#ifdef ASYNCIO
|
||||
#include <aio.h> // TODO: use kaio, uses only 4 threads
|
||||
@ -53,6 +59,8 @@ public:
|
||||
void (*m_callback) ( void *state ) ;
|
||||
// goes from 0 to 1, the lower the niceness, the higher the priority
|
||||
int32_t m_niceness;
|
||||
// was it found in the disk page cache?
|
||||
char m_inPageCache;
|
||||
// . if signal is still pending we need to know if BigFile got deleted
|
||||
// . m_files must be NULL terminated
|
||||
//class BigFile **m_files;
|
||||
@ -76,15 +84,15 @@ public:
|
||||
class DiskPageCache *m_pc;
|
||||
// this is just used for accessing the DiskPageCache, m_pc, it is
|
||||
// a "virtual fd" for this whole file
|
||||
int32_t m_vfd;
|
||||
int64_t m_vfd;
|
||||
// test parms
|
||||
//int32_t m_osize;
|
||||
//char *m_obuf;
|
||||
// for avoiding unlink/reopens while doing a threaded read
|
||||
int32_t m_closeCount1 ;
|
||||
int32_t m_closeCount2 ;
|
||||
int32_t m_vfd1;
|
||||
int32_t m_vfd2;
|
||||
//int32_t m_vfd1;
|
||||
//int32_t m_vfd2;
|
||||
|
||||
//char m_baseFilename[32];
|
||||
int32_t m_flags;
|
||||
@ -147,7 +155,9 @@ class BigFile {
|
||||
int64_t getSize ( ) { return getFileSize(); };
|
||||
|
||||
// use the base filename as our filename
|
||||
char *getFilename() { return m_baseFilename; };
|
||||
char *getFilename() { return m_baseFilename.getBufStart(); };
|
||||
|
||||
char *getDir() { return m_dir.getBufStart(); };
|
||||
|
||||
// . returns false if blocked, true otherwise
|
||||
// . sets g_errno on error
|
||||
@ -217,12 +227,12 @@ class BigFile {
|
||||
|
||||
// . opens the nth file if necessary to get it's fd
|
||||
// . returns -1 if none, >=0 on success
|
||||
int getfd ( int32_t n , bool forReading , int32_t *vfd = NULL );
|
||||
int getfd ( int32_t n , bool forReading );//, int32_t *vfd = NULL );
|
||||
|
||||
// public for wrapper to call
|
||||
//bool readwrite_r ( FileState *fstate );
|
||||
|
||||
int64_t m_currentOffset;
|
||||
//int64_t m_currentOffset;
|
||||
|
||||
DiskPageCache *getDiskPageCache ( ) { return m_pc; };
|
||||
int32_t getVfd ( ) { return m_vfd; };
|
||||
@ -230,13 +240,12 @@ class BigFile {
|
||||
// WARNING: some may have been unlinked from call to chopHead()
|
||||
int32_t getNumParts ( ) { return m_numParts; };
|
||||
|
||||
File *getFile ( int32_t n ) { return m_files[n]; };
|
||||
|
||||
// makes the filename of part file #n
|
||||
void makeFilename_r ( char *baseFilename ,
|
||||
char *baseFilenameDir ,
|
||||
int32_t n ,
|
||||
char *buf );
|
||||
char *buf ,
|
||||
int32_t maxBufSize );
|
||||
|
||||
void removePart ( int32_t i ) ;
|
||||
|
||||
@ -253,17 +262,16 @@ class BigFile {
|
||||
// number of parts remaining to be unlinked/renamed
|
||||
int32_t m_partsRemaining;
|
||||
|
||||
// rename stores the new name here so we can rename the m_files[i]
|
||||
// after the rename has completed and the rename thread returns
|
||||
char m_newBaseFilename [256];
|
||||
// if first char in this dir is 0 then use m_dir
|
||||
char m_newBaseFilenameDir [256];
|
||||
char m_tinyBuf[8];
|
||||
|
||||
// store our base filename here
|
||||
char m_baseFilename [256];
|
||||
// to hold the array of Files
|
||||
SafeBuf m_filePtrsBuf;
|
||||
|
||||
// enough mem for our first File so we can avoid a malloc
|
||||
char m_littleBuf[LITTLEBUFSIZE];
|
||||
|
||||
// ptrs to the part files
|
||||
File *m_files [ MAX_PART_FILES ];
|
||||
//File *m_files ;//[ MAX_PART_FILES ];
|
||||
|
||||
// private:
|
||||
|
||||
@ -299,8 +307,17 @@ class BigFile {
|
||||
|
||||
//bool unlinkPart ( int32_t n , bool block );
|
||||
|
||||
File *getFile2 ( int32_t n ) {
|
||||
if ( n >= m_maxParts ) return NULL;
|
||||
File **filePtrs = (File **)m_filePtrsBuf.getBufStart();
|
||||
File *f = filePtrs[n];
|
||||
//if ( ! f ->calledSet() ) return NULL;
|
||||
// this will be NULL if addPart(n) never called
|
||||
return f;
|
||||
};
|
||||
|
||||
// if part file not created, will create it
|
||||
File *getPartFile ( int32_t n ) { return m_files[n]; };
|
||||
//File *getPartFile2 ( int32_t n ) { return getFile2(n); }
|
||||
|
||||
// . put a signal on the queue to do reading/writing
|
||||
// . we call readwrite ( FileState *) when we handle the signal
|
||||
@ -308,9 +325,19 @@ class BigFile {
|
||||
|
||||
bool reset ( );
|
||||
|
||||
// store our base filename here
|
||||
char m_dir [256];
|
||||
char m_stripeDir [256];
|
||||
// for basefilename to avoid an alloc
|
||||
char m_tmpBaseBuf[32];
|
||||
|
||||
// our most important the directory and filename
|
||||
SafeBuf m_dir ;// [256];
|
||||
SafeBuf m_baseFilename ;//[256];
|
||||
|
||||
// rename stores the new name here so we can rename the m_files[i]
|
||||
// after the rename has completed and the rename thread returns
|
||||
SafeBuf m_newBaseFilename ;// [256];
|
||||
// if first char in this dir is 0 then use m_dir
|
||||
SafeBuf m_newBaseFilenameDir ;//[256];
|
||||
|
||||
|
||||
int32_t m_permissions;
|
||||
int32_t m_flags;
|
||||
@ -322,7 +349,7 @@ class BigFile {
|
||||
|
||||
class DiskPageCache *m_pc;
|
||||
int32_t m_vfd;
|
||||
bool m_vfdAllowed;
|
||||
//bool m_vfdAllowed;
|
||||
|
||||
// prevent circular calls to BigFile::close() with this
|
||||
char m_isClosing;
|
||||
|
@ -40,7 +40,7 @@ bool Blaster::init(){
|
||||
log("blaster::hashinit failed" ); return 0; }
|
||||
|
||||
// init the memory class after conf since it gets maxMem from Conf
|
||||
if ( ! g_mem.init ( 200000000 ) ) {
|
||||
if ( ! g_mem.init ( ) ) {//200000000 ) ) {
|
||||
log("blaster::Mem init failed" ); return 0; }
|
||||
// start up log file
|
||||
if ( ! g_log.init( "/tmp/blasterLog" ) ) {
|
||||
|
@ -41,9 +41,7 @@ bool Cachedb::init ( ) {
|
||||
if ( ! m_pc.init ( m_name ,
|
||||
m_rdbId, // RDB_CACHEDB,
|
||||
pcmem ,
|
||||
pageSize ,
|
||||
true , // use shared mem?
|
||||
false )) // minimizeDiskSeeks?
|
||||
pageSize ))
|
||||
return log("db: %s init failed.",m_name);
|
||||
// init the rdb
|
||||
if ( ! m_rdb.init ( g_hostdb.m_dir ,
|
||||
|
@ -36,6 +36,7 @@ Collectiondb::Collectiondb ( ) {
|
||||
m_numRecs = 0;
|
||||
m_numRecsUsed = 0;
|
||||
m_numCollsSwappedOut = 0;
|
||||
m_initializing = false;
|
||||
//m_lastUpdateTime = 0LL;
|
||||
m_needsSave = false;
|
||||
// sanity
|
||||
@ -88,16 +89,30 @@ bool Collectiondb::init ( bool isDump ) {
|
||||
}
|
||||
*/
|
||||
|
||||
extern bool g_inAutoSave;
|
||||
|
||||
// . save to disk
|
||||
// . returns false if blocked, true otherwise
|
||||
bool Collectiondb::save ( ) {
|
||||
if ( g_conf.m_readOnlyMode ) return true;
|
||||
|
||||
if ( g_inAutoSave && m_numRecsUsed > 20 && g_hostdb.m_hostId != 0 )
|
||||
return true;
|
||||
|
||||
// which collection rec needs a save
|
||||
for ( int32_t i = 0 ; i < m_numRecs ; i++ ) {
|
||||
if ( ! m_recs[i] ) continue;
|
||||
// temp debug message
|
||||
//logf(LOG_DEBUG,"admin: SAVING collection #%"INT32" ANYWAY",i);
|
||||
if ( ! m_recs[i]->m_needsSave ) continue;
|
||||
|
||||
// if we core in malloc we won't be able to save the
|
||||
// coll.conf files
|
||||
if ( m_recs[i]->m_isCustomCrawl &&
|
||||
g_inMemFunction &&
|
||||
g_hostdb.m_hostId != 0 )
|
||||
continue;
|
||||
|
||||
//log(LOG_INFO,"admin: Saving collection #%"INT32".",i);
|
||||
m_recs[i]->save ( );
|
||||
}
|
||||
@ -111,6 +126,9 @@ bool Collectiondb::save ( ) {
|
||||
//
|
||||
///////////
|
||||
bool Collectiondb::loadAllCollRecs ( ) {
|
||||
|
||||
m_initializing = true;
|
||||
|
||||
char dname[1024];
|
||||
// MDW: sprintf ( dname , "%s/collections/" , g_hostdb.m_dir );
|
||||
sprintf ( dname , "%s" , g_hostdb.m_dir );
|
||||
@ -172,6 +190,8 @@ bool Collectiondb::loadAllCollRecs ( ) {
|
||||
0 );
|
||||
}
|
||||
|
||||
m_initializing = false;
|
||||
|
||||
// note it
|
||||
//log(LOG_INFO,"db: Loaded data for %"INT32" collections. Ranging from "
|
||||
// "collection #0 to #%"INT32".",m_numRecsUsed,m_numRecs-1);
|
||||
@ -246,6 +266,26 @@ bool Collectiondb::addExistingColl ( char *coll, collnum_t collnum ) {
|
||||
char *xx=NULL;*xx=0;
|
||||
}
|
||||
|
||||
// also try by #, i've seen this happen too
|
||||
CollectionRec *ocr = getRec ( i );
|
||||
if ( ocr ) {
|
||||
g_errno = EEXIST;
|
||||
log("admin: Collection id %i is in use already by "
|
||||
"%s, so we can not add %s. moving %s to trash."
|
||||
,(int)i,ocr->m_coll,coll,coll);
|
||||
SafeBuf cmd;
|
||||
int64_t now = gettimeofdayInMilliseconds();
|
||||
cmd.safePrintf ( "mv coll.%s.%i trash/coll.%s.%i.%"UINT64
|
||||
, coll
|
||||
,(int)i
|
||||
, coll
|
||||
,(int)i
|
||||
, now );
|
||||
//log("admin: %s",cmd.getBufStart());
|
||||
gbsystem ( cmd.getBufStart() );
|
||||
return true;
|
||||
}
|
||||
|
||||
// create the record in memory
|
||||
CollectionRec *cr = new (CollectionRec);
|
||||
if ( ! cr )
|
||||
@ -312,6 +352,10 @@ bool Collectiondb::addNewColl ( char *coll ,
|
||||
// to add the same collnum to every shard
|
||||
collnum_t newCollnum ) {
|
||||
|
||||
|
||||
//do not send add/del coll request until we are in sync with shard!!
|
||||
// just return ETRYAGAIN for the parmlist...
|
||||
|
||||
// ensure coll name is legit
|
||||
char *p = coll;
|
||||
for ( ; *p ; p++ ) {
|
||||
@ -996,7 +1040,9 @@ bool Collectiondb::deleteRec2 ( collnum_t collnum ) { //, WaitEntry *we ) {
|
||||
bf.set ( bu.getBufStart() );
|
||||
if ( bf.doesExist() ) bf.unlink();
|
||||
}
|
||||
|
||||
|
||||
// now remove from list of collections that might need a disk merge
|
||||
removeFromMergeLinkedList ( cr );
|
||||
|
||||
//////
|
||||
//
|
||||
@ -1061,6 +1107,8 @@ bool Collectiondb::growRecPtrBuf ( collnum_t collnum ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
m_recPtrBuf.setLabel ("crecptrb");
|
||||
|
||||
// . true here means to clear the new space to zeroes
|
||||
// . this shit works based on m_length not m_capacity
|
||||
if ( ! m_recPtrBuf.reserve ( need2 ,NULL, true ) ) {
|
||||
@ -1684,6 +1732,8 @@ static CollectionRec g_default;
|
||||
|
||||
|
||||
CollectionRec::CollectionRec() {
|
||||
m_nextLink = NULL;
|
||||
m_prevLink = NULL;
|
||||
m_spiderCorruptCount = 0;
|
||||
m_collnum = -1;
|
||||
m_coll[0] = '\0';
|
||||
@ -1900,7 +1950,7 @@ bool CollectionRec::load ( char *coll , int32_t i ) {
|
||||
gbmemcpy ( &m_localCrawlInfo , sb.getBufStart(),sb.length() );
|
||||
|
||||
|
||||
if ( ! g_conf.m_doingCommandLine )
|
||||
if ( ! g_conf.m_doingCommandLine && ! g_collectiondb.m_initializing )
|
||||
log("coll: Loaded %s (%"INT32") local hasurlsready=%"INT32"",
|
||||
m_coll,
|
||||
(int32_t)m_collnum,
|
||||
@ -1947,7 +1997,7 @@ bool CollectionRec::load ( char *coll , int32_t i ) {
|
||||
// it is binary now
|
||||
gbmemcpy ( &m_globalCrawlInfo , sb.getBufStart(),sb.length() );
|
||||
|
||||
if ( ! g_conf.m_doingCommandLine )
|
||||
if ( ! g_conf.m_doingCommandLine && ! g_collectiondb.m_initializing )
|
||||
log("coll: Loaded %s (%"INT32") global hasurlsready=%"INT32"",
|
||||
m_coll,
|
||||
(int32_t)m_collnum,
|
||||
@ -1987,6 +2037,15 @@ bool CollectionRec::load ( char *coll , int32_t i ) {
|
||||
// always turn off gigabits so &s=1000 can do summary skipping
|
||||
if ( m_isCustomCrawl ) m_docsToScanForTopics = 0;
|
||||
|
||||
// make min to merge smaller than normal since most collections are
|
||||
// small and we want to reduce the # of vfds (files) we have
|
||||
if ( m_isCustomCrawl ) {
|
||||
m_posdbMinFilesToMerge = 6;
|
||||
m_titledbMinFilesToMerge = 4;
|
||||
m_linkdbMinFilesToMerge = 3;
|
||||
m_tagdbMinFilesToMerge = 2;
|
||||
}
|
||||
|
||||
// always turn on distributed spider locking because otherwise
|
||||
// we end up calling Msg50 which calls Msg25 for the same root url
|
||||
// at the same time, thereby wasting massive resources. it is also
|
||||
@ -3890,7 +3949,7 @@ bool CollectionRec::rebuildUrlFiltersDiffbot() {
|
||||
// . it is also called on load of the collection at startup
|
||||
bool CollectionRec::rebuildUrlFilters ( ) {
|
||||
|
||||
if ( ! g_conf.m_doingCommandLine )
|
||||
if ( ! g_conf.m_doingCommandLine && ! g_collectiondb.m_initializing )
|
||||
log("coll: Rebuilding url filters for %s ufp=%s",m_coll,
|
||||
m_urlFiltersProfile.getBufStart());
|
||||
|
||||
|
@ -174,6 +174,7 @@ class Collectiondb {
|
||||
|
||||
int32_t m_numCollsSwappedOut;
|
||||
|
||||
bool m_initializing;
|
||||
//int64_t m_lastUpdateTime;
|
||||
};
|
||||
|
||||
@ -420,6 +421,9 @@ class CollectionRec {
|
||||
int32_t m_dailyMergeStarted; // time_t
|
||||
int32_t m_dailyMergeTrigger;
|
||||
|
||||
class CollectionRec *m_nextLink;
|
||||
class CollectionRec *m_prevLink;
|
||||
|
||||
char m_dailyMergeDOWList[48];
|
||||
|
||||
int32_t m_treeCount;
|
||||
@ -531,6 +535,7 @@ class CollectionRec {
|
||||
char m_recycleContent ;
|
||||
char m_recycleCatdb ;
|
||||
char m_getLinkInfo ; // turn off to save seeks
|
||||
char m_computeSiteNumInlinks ;
|
||||
//char m_recycleLinkInfo2 ; // ALWAYS recycle linkInfo2?
|
||||
//char m_useLinkInfo2ForQuality ;
|
||||
char m_indexInlinkNeighborhoods;
|
||||
|
13
Conf.cpp
13
Conf.cpp
@ -12,6 +12,8 @@ Conf g_conf;
|
||||
Conf::Conf ( ) {
|
||||
m_save = true;
|
||||
m_doingCommandLine = false;
|
||||
// set max mem to 16GB at least until we load on disk
|
||||
m_maxMem = 16000000000;
|
||||
}
|
||||
|
||||
// . does this requester have ROOT admin privledges???
|
||||
@ -285,9 +287,12 @@ bool Conf::init ( char *dir ) { // , int32_t hostId ) {
|
||||
//}
|
||||
|
||||
// make sure g_mem.maxMem is big enough temporarily
|
||||
if ( g_mem.m_maxMem < 10000000 ) g_mem.m_maxMem = 10000000;
|
||||
g_conf.m_maxMem = 8000000000; // 8gb temp
|
||||
|
||||
bool status = g_parms.setFromFile ( this , fname , NULL , OBJ_CONF );
|
||||
|
||||
if ( g_conf.m_maxMem < 10000000 ) g_conf.m_maxMem = 10000000;
|
||||
|
||||
// if not there, create it!
|
||||
if ( ! status ) {
|
||||
log("gb: Creating %s from defaults.",fname);
|
||||
@ -323,7 +328,7 @@ bool Conf::init ( char *dir ) { // , int32_t hostId ) {
|
||||
|
||||
// update g_mem
|
||||
//g_mem.m_maxMem = g_conf.m_maxMem;
|
||||
if ( ! g_mem.init ( g_conf.m_maxMem ) ) return false;
|
||||
if ( ! g_mem.init ( ) ) return false;
|
||||
// always turn this off
|
||||
g_conf.m_testMem = false;
|
||||
// and this, in case you forgot to turn it off
|
||||
@ -527,7 +532,9 @@ bool Conf::save ( ) {
|
||||
g_conf.m_testMem = false;
|
||||
//char fname[1024];
|
||||
//sprintf ( fname , "%sgb.conf.saving", g_hostdb.m_dir );
|
||||
SafeBuf fn;
|
||||
// fix so if we core in malloc/free we can still save conf
|
||||
char fnbuf[1024];
|
||||
SafeBuf fn(fnbuf,1024);
|
||||
fn.safePrintf("%sgb.conf",g_hostdb.m_dir);
|
||||
bool status = g_parms.saveToXml ( (char *)this ,
|
||||
fn.getBufStart(),
|
||||
|
1
Conf.h
1
Conf.h
@ -653,6 +653,7 @@ class Conf {
|
||||
bool m_logDebugDb ;
|
||||
bool m_logDebugDirty ;
|
||||
bool m_logDebugDisk ;
|
||||
bool m_logDebugDiskPageCache;
|
||||
bool m_logDebugDns ;
|
||||
bool m_logDebugDownloads;
|
||||
bool m_logDebugFacebook;
|
||||
|
3
Dir.cpp
3
Dir.cpp
@ -50,7 +50,10 @@ bool Dir::open ( ) {
|
||||
close ( );
|
||||
if ( ! m_dirname ) return false;
|
||||
retry8:
|
||||
// opendir() calls malloc
|
||||
g_inMemFunction = true;
|
||||
m_dir = opendir ( m_dirname );
|
||||
g_inMemFunction = false;
|
||||
// interrupted system call
|
||||
if ( ! m_dir && errno == EINTR ) goto retry8;
|
||||
|
||||
|
1637
DiskPageCache.cpp
1637
DiskPageCache.cpp
File diff suppressed because it is too large
Load Diff
226
DiskPageCache.h
226
DiskPageCache.h
@ -1,42 +1,13 @@
|
||||
// Matt Wells, Copyright Jan 2004
|
||||
// Matt Wells, Copyright Jan 2004-2015
|
||||
|
||||
// . each Rdb has its own m_pageCache member
|
||||
// . a ptr to this class is passed to all File::open() calls
|
||||
// . that ptr is stored in the File class as File::m_pageCachePtr
|
||||
// . the File class uses the virtual file descriptor, vfd, for use with
|
||||
// the pageCache since we tend to open and close files a lot when we run
|
||||
// out of actual fds
|
||||
// . every subsequent read/write to that file will then use the pageCache
|
||||
// . before doing a read in File::read() we try to increase the offset
|
||||
// by filling the beginning of the buffer with data from the page cache.
|
||||
// We also try to decrease the bytes to read by filling the end of the
|
||||
// buffer. What is left to actually read, if anything, is the middle.
|
||||
// . after File::read() completes it call DiskPageCache::storePages (buf,size,off)
|
||||
// to fill the page cache.
|
||||
// . when maxMem is reached, the DiskPageCache will unfrequently used pages by
|
||||
// using a linked list
|
||||
// . when File class releases its vfd it must call m_pageCachePtr->close(vfd)
|
||||
// . now we just use RdbCache
|
||||
// . when a BigFile is first opened we assign it a unique 'vfd' (virtual fd)
|
||||
// . to make the rdbcache key we hash this vfd with the read offset and size
|
||||
|
||||
// . we use PAGESIZE defined in RdbMap.h as our page size
|
||||
// . TODO: convert PAGESIZE to 8000 not 8192
|
||||
#ifndef PAGECACHE_H
|
||||
#define PAGECACHE_H
|
||||
|
||||
#ifndef _PAGECACHE_H_
|
||||
#define _PAGECACHE_H_
|
||||
|
||||
// . use 128 disk megabytes per set of pages
|
||||
// . this MUST be a multiple of (PAGE_SIZE+HEADERSIZE) now
|
||||
//#define PAGE_SET_SIZE (128*1024*1024)
|
||||
//#define PHSIZE (GB_PAGE_SIZE+HEADERSIZE)
|
||||
//#define PAGE_SET_SIZE (((128*1024*1024)/PHSIZE)*PHSIZE)
|
||||
|
||||
// how many page sets can we have?
|
||||
#define MAX_PAGE_SETS 128
|
||||
|
||||
// how many BigFiles can be using the same DiskPageCache?
|
||||
#include "File.h"
|
||||
#define MAX_NUM_VFDS2 MAX_NUM_VFDS
|
||||
|
||||
extern void freeAllSharedMem ( int32_t max );
|
||||
#include "RdbCache.h"
|
||||
|
||||
class DiskPageCache {
|
||||
|
||||
@ -48,180 +19,39 @@ class DiskPageCache {
|
||||
|
||||
// returns false and sets g_errno if unable to alloc the memory,
|
||||
// true otherwise
|
||||
bool init ( const char *dbname ,
|
||||
char rdbId, // use 0 for none
|
||||
int32_t maxMem ,
|
||||
int32_t pageSize,
|
||||
bool useRAMDisk = false,
|
||||
bool minimizeDiskSeeks = false );
|
||||
// int32_t maxMem ,
|
||||
// void (*getPages2)(DiskPageCache*, int32_t, char*, int32_t,
|
||||
// int64_t, int32_t*, int64_t*) = NULL,
|
||||
// void (*addPages2)(DiskPageCache*, int32_t, char*, int32_t,
|
||||
// int64_t) = NULL,
|
||||
// int32_t (*getVfd2)(DiskPageCache*, int64_t) = NULL,
|
||||
// void (*rmVfd2)(DiskPageCache*, int32_t) = NULL );
|
||||
|
||||
bool initRAMDisk( const char *dbname, int32_t maxMem );
|
||||
|
||||
int32_t getMemUsed () ;
|
||||
int32_t getMemAlloced () { return m_memAlloced; };
|
||||
int32_t getMemMax () { return m_maxMem; };
|
||||
|
||||
int64_t getNumHits () { return m_hits; };
|
||||
int64_t getNumMisses () { return m_misses; };
|
||||
void resetStats () { m_hits = 0 ; m_misses = 0; };
|
||||
|
||||
// verify each page in cache for this file is what is on disk
|
||||
bool verifyData ( class BigFile *f );
|
||||
bool verifyData2 ( int32_t vfd );
|
||||
|
||||
void disableCache ( ) { m_enabled = false; };
|
||||
void enableCache ( ) { m_enabled = true; };
|
||||
|
||||
// . grow/shrink m_memOff[] which maps vfd/page to a mem offset
|
||||
// . returns false and sets g_errno on error
|
||||
// . called by DiskPageCache::open()/close() respectively
|
||||
// . maxFileSize is so we can alloc m_memOff[vfd] big enough for all
|
||||
// pages that are in or will be in the file (if it is being created)
|
||||
int32_t getVfd ( int64_t maxFileSize, bool vfdAllowed );
|
||||
void rmVfd ( int32_t vfd );
|
||||
bool init ( const char *dbname ,
|
||||
char rdbId ,
|
||||
int64_t maxMem ,
|
||||
int32_t pageSize );
|
||||
|
||||
// . this returns true iff the entire read was copied into
|
||||
// "buf" from the page cache
|
||||
// . it will move the used pages to the head of the linked list
|
||||
void getPages ( int32_t vfd ,
|
||||
char **buf ,
|
||||
int32_t numBytes ,
|
||||
int64_t offset ,
|
||||
int32_t *newNumBytes ,
|
||||
int64_t *newOffset ,
|
||||
char **allocBuf , //we alloc this if buf==NULL
|
||||
int32_t *allocSize , //size of the alloc
|
||||
int32_t allocOff );
|
||||
char *getPages ( int64_t vfd ,
|
||||
int64_t offset ,
|
||||
int64_t readSize );
|
||||
|
||||
// after you read/write from/to disk, copy into the page cache
|
||||
void addPages ( int32_t vfd, char *buf , int32_t numBytes, int64_t offset,
|
||||
int32_t niceness );
|
||||
|
||||
|
||||
// used for minimize disk seeks
|
||||
bool m_minimizeDiskSeeks;
|
||||
|
||||
int32_t m_diskPageSize;
|
||||
|
||||
private:
|
||||
|
||||
void addPage (int32_t vfd,int32_t pageNum,char *page,int32_t size,int32_t skip);
|
||||
void enhancePage ( int32_t poff,char *page,int32_t size,int32_t skip) ;
|
||||
void promotePage ( int32_t poff , bool isNew ) ;
|
||||
void excisePage ( int32_t poff ) ;
|
||||
|
||||
bool growCache ( int32_t mem );
|
||||
|
||||
//bool needsMerge();
|
||||
|
||||
void writeToCache ( int32_t memOff, int32_t memPageOff, void *inBuf,
|
||||
int32_t size );
|
||||
void readFromCache( void *outBuf, int32_t memOff, int32_t memPageOff,
|
||||
int32_t size );
|
||||
|
||||
char *getMemPtrFromMemOff ( int32_t off );
|
||||
|
||||
// . the pages are here
|
||||
// . there are 1024 page sets
|
||||
// . each page set can have up to 128 megabytes of pages
|
||||
// . much more than that and pthread_create() fails
|
||||
char *m_pageSet [ MAX_PAGE_SETS ];
|
||||
int32_t m_pageSetSize [ MAX_PAGE_SETS ];
|
||||
int32_t m_numPageSets;
|
||||
|
||||
// . next available page offset
|
||||
// . when storing a page we read from disk into a pageSet we first
|
||||
// try to get a memory offset from m_availMemOff, if none are there
|
||||
// then we use m_nextMemOff and increment it by PAGE_SIZE+HEADERSIZE
|
||||
// . if m_nextMemOff would breech m_upperMemOff then we call
|
||||
// growCache to increase m_upperMemOff
|
||||
// . we try to grow 100k with each call to growCache
|
||||
// . if m_upperMemOff would breech m_maxMem, then we kick out the
|
||||
// least used page using
|
||||
// . we store a linked list in bytes 4-12 of each page in memory
|
||||
int32_t m_nextMemOff; // next available mem offset to hold a page
|
||||
int32_t m_upperMemOff; // how many bytes are allocated in page sets?
|
||||
int32_t m_maxMem; // max we can allocate
|
||||
|
||||
// . available offsets of released pages
|
||||
// . offsets are into the page sets, m_pageSet[]
|
||||
int32_t *m_availMemOff;
|
||||
int32_t m_numAvailMemOffs;
|
||||
int32_t m_maxAvailMemOffs;
|
||||
|
||||
// . m_memOffFromDiskPage[vfd][diskPageNum] --> memOff
|
||||
// . maps a vfd and disk page number to a memory offset
|
||||
// . maps to -1 if not in page cache
|
||||
// . try to keep the number of pages down, under 100,000
|
||||
// . 100,000 pages would be about 800 megabytes
|
||||
// . I am only planning on using this for tfndb and Checksumdb so
|
||||
// we should be under or around this limit
|
||||
int32_t *m_memOffFromDiskPage [ MAX_NUM_VFDS2 ];
|
||||
|
||||
// . how many offsets are in m_memOffFromDiskPage?
|
||||
// . we have one offset per page in the file
|
||||
int32_t m_maxPagesInFile [ MAX_NUM_VFDS2 ];
|
||||
|
||||
// max number of pages that this file shall have
|
||||
int32_t m_maxPagesPerFile [ MAX_NUM_VFDS2 ];
|
||||
// max number of pages of file currently in the cache
|
||||
int32_t m_numPagesPresentOfFile[ MAX_NUM_VFDS2 ];
|
||||
// mem that has not been used
|
||||
int32_t m_memFree;
|
||||
|
||||
// how much memory is currently allocated?
|
||||
int32_t m_memAlloced;
|
||||
|
||||
// stats (partial hits/misses supported)
|
||||
int64_t m_hits;
|
||||
int64_t m_misses;
|
||||
|
||||
// . linked list boundary info
|
||||
// . linked list is actually stored in bytes 2-8 (next/prev) of pages
|
||||
// in memory
|
||||
int32_t m_headOff;
|
||||
int32_t m_tailOff;
|
||||
|
||||
// for selecting the next vfd in line and preventing sudden closing
|
||||
// and opening of a vfd, resulting in a thread returning and calling
|
||||
// addPages() for the wrong file!!
|
||||
int32_t m_nexti;
|
||||
bool addPages ( int64_t vfd ,
|
||||
int64_t offset ,
|
||||
int64_t readSize ,
|
||||
char *buf ,
|
||||
char niceness );
|
||||
|
||||
void enableCache () { m_enabled = true ; };
|
||||
void disableCache() { m_enabled = false; };
|
||||
bool m_enabled;
|
||||
|
||||
int32_t m_maxPageSetSize;
|
||||
|
||||
const char *m_dbname;
|
||||
int32_t m_pageSize;
|
||||
char m_rdbId;
|
||||
bool *m_switch;
|
||||
char m_dbname[64];
|
||||
|
||||
char m_memTag[16];
|
||||
RdbCache m_rc;
|
||||
|
||||
//bool m_useRAMDisk;
|
||||
//bool m_useSHM;
|
||||
|
||||
//int m_ramfd;
|
||||
|
||||
//int m_shmids [ 4096 ];
|
||||
//int32_t m_shmidSize [ 4096 ];
|
||||
//int32_t m_numShmids;
|
||||
//int32_t m_maxAllocSize;
|
||||
//int32_t m_spageSize;
|
||||
|
||||
// for overriding the disk page cache with custom functions
|
||||
//bool m_isOverriden;
|
||||
//void (*m_getPages2)(DiskPageCache*, int32_t, char*, int32_t, int64_t,
|
||||
// int32_t*, int64_t*);
|
||||
//void (*m_addPages2)(DiskPageCache*, int32_t, char*, int32_t, int64_t);
|
||||
//int32_t (*m_getVfd2)(DiskPageCache*, int64_t);
|
||||
//void (*m_rmVfd2)(DiskPageCache*, int32_t);
|
||||
int64_t getNumHits () { return m_rc.getNumHits(); }
|
||||
int64_t getNumMisses () { return m_rc.getNumMisses(); }
|
||||
int64_t getMemUsed () { return m_rc.getMemOccupied(); }
|
||||
int64_t getMemAlloced() { return m_rc.getMemAlloced(); }
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -194,6 +194,7 @@ case EJSONMISSINGLASTCURLY: return "JSON was missing last curly bracket";
|
||||
case EADMININTERFERENCE: return "Adminstrative interference";
|
||||
case EDNSERROR : return "DNS lookup error";
|
||||
case ETHREADSDISABLED:return "Threads Disabled";
|
||||
case EMALFORMEDQUERY: return "Malformed query";
|
||||
}
|
||||
// if the remote error bit is clear it must be a regulare errno
|
||||
//if ( ! ( errnum & REMOTE_ERROR_BIT ) ) return strerror ( errnum );
|
||||
|
3
Errno.h
3
Errno.h
@ -198,6 +198,7 @@ enum {
|
||||
EJSONMISSINGLASTCURLY,
|
||||
EADMININTERFERENCE,
|
||||
EDNSERROR ,
|
||||
ETHREADSDISABLED
|
||||
ETHREADSDISABLED,
|
||||
EMALFORMEDQUERY
|
||||
};
|
||||
#endif
|
||||
|
38
File.h
38
File.h
@ -23,7 +23,8 @@
|
||||
// . man, chris has 958 files, lets crank it up from 2k to 5k
|
||||
// . boost up to 50,000 since we are hitting this limit with crawlbot
|
||||
// . we are hitting again with crawlbot, boost to 200k from 50k
|
||||
#define MAX_NUM_VFDS (200*1024)
|
||||
// . TODO: make this dynamically allocate based on need
|
||||
//#define MAX_NUM_VFDS (1024*1024)
|
||||
|
||||
#include <sys/types.h> // for open/lseek
|
||||
#include <sys/stat.h> // for open
|
||||
@ -31,18 +32,21 @@
|
||||
#include <sys/stat.h> // for stat
|
||||
#include "Mem.h" // for g_mem
|
||||
#include "Loop.h" // for g_loop.setNonBlocking(int fd)
|
||||
#include "SafeBuf.h"
|
||||
|
||||
int64_t getFileSize ( char *filename ) ;
|
||||
|
||||
int64_t getFileSize_cygwin ( char *filename ) ;
|
||||
|
||||
// for avoiding unlink/opens that mess up our threaded read
|
||||
int32_t getCloseCount_r ( int fd );
|
||||
|
||||
// prevent fd from being closed on us when we are writing
|
||||
void enterWriteMode ( int32_t vfd ) ;
|
||||
void exitWriteMode ( int32_t vfd ) ;
|
||||
void enterWriteMode ( int fd ) ;
|
||||
void exitWriteMode ( int fd ) ;
|
||||
// error correction routine used by BigFile.cpp
|
||||
void releaseVfd ( int32_t vfd ) ;
|
||||
int getfdFromVfd ( int32_t vfd ) ;
|
||||
//void releaseVfd ( int32_t vfd ) ;
|
||||
//int getfdFromVfd ( int32_t vfd ) ;
|
||||
|
||||
class File {
|
||||
|
||||
@ -56,6 +60,9 @@ class File {
|
||||
File ( );
|
||||
~File ( );
|
||||
|
||||
void constructor();
|
||||
void destructor ();
|
||||
|
||||
// . if you don't need to do a full open then just set the filename
|
||||
// . useful for unlink/rename/reserve/...
|
||||
// . IMPORTANT: if bytes were already reserved can only increase the
|
||||
@ -66,8 +73,8 @@ class File {
|
||||
// returns false and sets errno on error, returns true on success
|
||||
bool rename ( char *newFilename );
|
||||
|
||||
// if m_vfd is negative it's never been opened
|
||||
bool isOpen () { return ( m_vfd >= 0 ); };
|
||||
bool calledOpen () { return m_calledOpen; };
|
||||
bool calledSet () { return m_calledSet; };
|
||||
|
||||
bool isNonBlocking () ;
|
||||
|
||||
@ -149,11 +156,15 @@ class File {
|
||||
// return -1 if not opened, otherwise, return the opened fd
|
||||
int getfdNoOpen ( ) ;
|
||||
|
||||
//char *getFilename ( ) { return m_filename.getBufStart(); };
|
||||
char *getFilename ( ) { return m_filename; };
|
||||
|
||||
// our filename allocated with strdup
|
||||
// we publicize for ease of use
|
||||
char m_filename [ MAX_FILENAME_LEN ];
|
||||
//SafeBuf m_filename;
|
||||
|
||||
//char m_filenameBuf [ MAX_FILENAME_LEN ];
|
||||
|
||||
// File::rename() uses this
|
||||
//char m_oldFilename [ MAX_FILENAME_LEN ];
|
||||
@ -174,18 +185,25 @@ class File {
|
||||
bool closeLeastUsed ( );
|
||||
|
||||
// THIS file's VIRTUAL descriptor
|
||||
int m_vfd;
|
||||
//int m_vfd;
|
||||
|
||||
// now just the real fd. is -1 if not opened
|
||||
int m_fd;
|
||||
|
||||
|
||||
// save the permission and flag sets in case of re-opening
|
||||
int m_flags;
|
||||
int m_permissions;
|
||||
|
||||
char m_calledOpen;
|
||||
char m_calledSet;
|
||||
|
||||
time_t m_st_mtime; // file last mod date
|
||||
int32_t m_st_size; // file size
|
||||
time_t getLastModifiedDate ( ) ;
|
||||
|
||||
class File *m_nextActive;
|
||||
class File *m_prevActive;
|
||||
//class File *m_nextActive;
|
||||
//class File *m_prevActive;
|
||||
};
|
||||
|
||||
|
||||
|
@ -457,6 +457,9 @@ bool HashTableX::load ( char *dir, char *filename, char **tbuf, int32_t *tsize )
|
||||
|
||||
// bogus key size?
|
||||
if ( ks <= 0 ) {
|
||||
// is very common for this file so skip it
|
||||
if ( strstr(filename,"ipstouseproxiesfor.dat") )
|
||||
return false;
|
||||
log("htable: reading hashtable from %s%s: "
|
||||
"bogus keysize of %"INT32"",
|
||||
dir,filename,ks );
|
||||
|
@ -2633,6 +2633,9 @@ bool Hostdb::createHostsConf( char *cwd ) {
|
||||
sb.safePrintf("# List of hosts. Limited to 512 from MAX_HOSTS in Hostdb.h. Increase that\n");
|
||||
sb.safePrintf("# if you want more.\n");
|
||||
sb.safePrintf("#\n");
|
||||
|
||||
/*
|
||||
|
||||
sb.safePrintf("# Format:\n");
|
||||
sb.safePrintf("#\n");
|
||||
sb.safePrintf("# first column: hostID (starts at 0 and increments from there)\n");
|
||||
@ -2672,6 +2675,7 @@ bool Hostdb::createHostsConf( char *cwd ) {
|
||||
sb.safePrintf("# The working directory is the last string on each line. That is where the\n");
|
||||
sb.safePrintf("# 'gb' binary resides.\n");
|
||||
sb.safePrintf("#\n");
|
||||
*/
|
||||
|
||||
sb.safePrintf("#\n");
|
||||
sb.safePrintf("# Example of a four-node distributed search index running on a single\n");
|
||||
@ -2680,7 +2684,7 @@ bool Hostdb::createHostsConf( char *cwd ) {
|
||||
sb.safePrintf("# different ports for each gb instance since they are all on the same\n");
|
||||
sb.safePrintf("# server.\n");
|
||||
sb.safePrintf("#\n");
|
||||
sb.safePrintf("# Use './gb 2' to run as the host on IP 1.2.3.8 for example.\n");
|
||||
//sb.safePrintf("# Use './gb 2' to run as the host on IP 1.2.3.8 for example.\n");
|
||||
sb.safePrintf("#\n");
|
||||
sb.safePrintf("#0 5998 7000 8000 9000 1.2.3.4 1.2.3.5 /home/mwells/host0/\n");
|
||||
sb.safePrintf("#1 5997 7001 8001 9001 1.2.3.4 1.2.3.5 /home/mwells/host1/\n");
|
||||
@ -2707,6 +2711,7 @@ bool Hostdb::createHostsConf( char *cwd ) {
|
||||
sb.safePrintf("#5 5998 7000 8000 9000 se5 se5b /home/mwells/gigablast/\n");
|
||||
sb.safePrintf("#6 5998 7000 8000 9000 se6 se6b /home/mwells/gigablast/\n");
|
||||
sb.safePrintf("#7 5998 7000 8000 9000 se7 se7b /home/mwells/gigablast/\n");
|
||||
/*
|
||||
sb.safePrintf("\n");
|
||||
sb.safePrintf("\n");
|
||||
sb.safePrintf("# Proxies\n");
|
||||
@ -2732,6 +2737,7 @@ bool Hostdb::createHostsConf( char *cwd ) {
|
||||
sb.safePrintf("# Example:\n");
|
||||
sb.safePrintf("# A proxy will be running on 10.5.66.18:\n");
|
||||
sb.safePrintf("#proxy 6001 7001 8001 9001 10.5.66.18\n");
|
||||
*/
|
||||
|
||||
log("%shosts.conf does not exist, creating.",cwd);
|
||||
sb.save ( cwd , "hosts.conf" );
|
||||
|
@ -1543,7 +1543,7 @@ bool HttpServer::sendReply ( TcpSocket *s , HttpRequest *r , bool isAdmin) {
|
||||
// case, as it is only set to true in TcpServer::readSocketWrapper()
|
||||
// which should never be called by TcpServer::sendMsg() above.
|
||||
// so let cleanUp know it is no longer valid
|
||||
if ( ! f->isOpen() ) f->open( O_RDONLY );
|
||||
if ( ! f->calledOpen() ) f->open( O_RDONLY );
|
||||
int fd = f->getfd();
|
||||
cleanUp ( f , NULL/*TcpSocket */ );
|
||||
// . AND we need to do this ourselves here
|
||||
@ -2249,7 +2249,7 @@ int32_t getMsgPiece ( TcpSocket *s ) {
|
||||
char *p = s->m_sendBuf;
|
||||
char *pend = p + s->m_sendBufUsed;
|
||||
// skip if not a doc.234567 filename format
|
||||
if ( ! gb_strcasestr(f->m_filename,"/doc." ) ) p = pend;
|
||||
if ( ! gb_strcasestr(f->getFilename(),"/doc." ) ) p = pend;
|
||||
// do the replace
|
||||
for ( ; p < pend ; p++ ) {
|
||||
if ( strncasecmp(p,"google",6)) continue;
|
||||
|
@ -112,9 +112,7 @@ bool Indexdb::init ( ) {
|
||||
if ( ! m_pc.init ( "indexdb",
|
||||
RDB_INDEXDB,
|
||||
pcmem ,
|
||||
pageSize ,
|
||||
true , // use RAM disk?
|
||||
false )) // minimize disk seeks?
|
||||
pageSize ))
|
||||
return log("db: Indexdb init failed.");
|
||||
|
||||
// . set our own internal rdb
|
||||
|
3
Json.cpp
3
Json.cpp
@ -378,6 +378,9 @@ JsonItem *Json::parseJsonStringIntoJsonItems ( char *json , int32_t niceness ) {
|
||||
if ( mem != memEnd )
|
||||
log("json: json parser reallocated buffer. inefficient.");
|
||||
|
||||
// return NULL if no json items were found
|
||||
if ( m_sb.length() <= 0 ) return NULL;
|
||||
|
||||
return (JsonItem *)m_sb.getBufStart();
|
||||
}
|
||||
|
||||
|
1
Json.h
1
Json.h
@ -90,6 +90,7 @@ class Json {
|
||||
JsonItem *m_stack[MAXJSONPARENTS];
|
||||
int32_t m_stackPtr;
|
||||
class JsonItem *m_prev;
|
||||
void reset() { m_sb.purge(); };
|
||||
};
|
||||
|
||||
#endif
|
||||
|
26
Linkdb.cpp
26
Linkdb.cpp
@ -120,9 +120,7 @@ bool Linkdb::init ( ) {
|
||||
if ( ! m_pc.init ( "linkdb" ,
|
||||
RDB_LINKDB,
|
||||
pcmem ,
|
||||
pageSize ,
|
||||
true , // use shared mem?
|
||||
false )) // minimizeDiskSeeks?
|
||||
pageSize ))
|
||||
return log("db: Linkdb init failed.");
|
||||
// init the rdb
|
||||
return m_rdb.init ( g_hostdb.m_dir ,
|
||||
@ -716,6 +714,14 @@ void handleRequest25 ( UdpSlot *slot , int32_t netnice ) {
|
||||
// used by sendReply()
|
||||
req->m_udpSlot = slot;
|
||||
|
||||
if ( g_conf.m_logDebugLinkInfo && req->m_mode == MODE_SITELINKINFO ) {
|
||||
log("linkdb: got msg25 request sitehash64=%"INT64" "
|
||||
"site=%s "
|
||||
,req->m_siteHash64
|
||||
,req->ptr_site
|
||||
);
|
||||
}
|
||||
|
||||
// set up the hashtable if our first time
|
||||
if ( ! g_lineTable.isInitialized() )
|
||||
g_lineTable.set ( 8,sizeof(Msg25Request *),256,
|
||||
@ -740,7 +746,8 @@ void handleRequest25 ( UdpSlot *slot , int32_t netnice ) {
|
||||
req->m_next = head->m_next;
|
||||
head->m_next = req;
|
||||
// note it for debugging
|
||||
log("build: msg25 request waiting in line for %s slot=0x%"PTRFMT"",
|
||||
log("build: msg25 request waiting in line for %s "
|
||||
"udpslot=0x%"PTRFMT"",
|
||||
req->ptr_url,(PTRTYPE)slot);
|
||||
// we will send a reply back for this guy when done
|
||||
// getting the reply for the head msg25request
|
||||
@ -1118,9 +1125,9 @@ bool Msg25::doReadLoop ( ) {
|
||||
if ( g_conf.m_logDebugLinkInfo ) {
|
||||
char *ms = "page";
|
||||
if ( m_mode == MODE_SITELINKINFO ) ms = "site";
|
||||
log("msg25: getting full linkinfo mode=%s site=%s url=%s "
|
||||
"docid=%"INT64"",
|
||||
ms,m_site,m_url,m_docId);
|
||||
log("msg25: reading linkdb list mode=%s site=%s url=%s "
|
||||
"docid=%"INT64" linkdbstartkey=%s",
|
||||
ms,m_site,m_url,m_docId,KEYSTR(&startKey,LDBKS));
|
||||
}
|
||||
|
||||
m_gettingList = true;
|
||||
@ -2310,8 +2317,9 @@ bool Msg25::gotLinkText ( Msg20Request *req ) { // LinkTextReply *linkText ) {
|
||||
}
|
||||
// debug
|
||||
if ( g_conf.m_logDebugLinkInfo ) {
|
||||
log("linkdb: recalling round=%"INT32" for %s=%s",
|
||||
m_round,ms,m_site);
|
||||
log("linkdb: recalling round=%"INT32" for %s=%s "
|
||||
"req=0x%"PTRFMT" numlinkerreplies=%"INT32,
|
||||
m_round,ms,m_site,(PTRTYPE)m_req25,m_numReplyPtrs);
|
||||
}
|
||||
// and re-call. returns true if did not block.
|
||||
// returns true with g_errno set on error.
|
||||
|
16
Log.cpp
16
Log.cpp
@ -222,6 +222,8 @@ bool Log::shouldLog ( int32_t type , char *msg ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
bool g_loggingEnabled = true;
|
||||
|
||||
// 1GB max log file size
|
||||
#define MAXLOGFILESIZE 1000000000
|
||||
// for testing:
|
||||
@ -233,6 +235,8 @@ bool Log::logR ( int64_t now , int32_t type , char *msg , bool asterisk ,
|
||||
// filter if we should
|
||||
//if ( forced ) goto skipfilter;
|
||||
|
||||
if ( ! g_loggingEnabled )
|
||||
return true;
|
||||
// return true if we should not log this
|
||||
if ( ! forced && ! shouldLog ( type , msg ) ) return true;
|
||||
// skipfilter:
|
||||
@ -367,7 +371,7 @@ bool Log::logR ( int64_t now , int32_t type , char *msg , bool asterisk ,
|
||||
|
||||
// . if filesize would be too big then make a new log file
|
||||
// . should make a new m_fd
|
||||
if ( m_logFileSize + tlen+1 > MAXLOGFILESIZE )
|
||||
if ( m_logFileSize + tlen+1 > MAXLOGFILESIZE && g_conf.m_runAsDaemon )
|
||||
makeNewLogFile();
|
||||
|
||||
if ( m_fd >= 0 ) {
|
||||
@ -398,9 +402,17 @@ bool Log::logR ( int64_t now , int32_t type , char *msg , bool asterisk ,
|
||||
}
|
||||
|
||||
bool Log::makeNewLogFile ( ) {
|
||||
|
||||
// prevent deadlock. don't log since we are in the middle of logging.
|
||||
// otherwise, safebuf, which is used when renaming files, might
|
||||
// call logR().
|
||||
g_loggingEnabled = false;
|
||||
// . rename old log file like log000 to log000-2013_11_04-18:19:32
|
||||
// . returns false on error
|
||||
if ( ! renameCurrentLogFile() ) return false;
|
||||
bool status = renameCurrentLogFile();
|
||||
// re-enable logging since nothing below should call logR() indirectly
|
||||
g_loggingEnabled = true;
|
||||
if ( ! status ) return false;
|
||||
// close old fd
|
||||
if ( m_fd >= 0 ) ::close ( m_fd );
|
||||
// invalidate
|
||||
|
5
Loop.cpp
5
Loop.cpp
@ -1017,6 +1017,11 @@ void printStackTrace ( int signum , siginfo_t *info , void *ptr ) {
|
||||
logf(LOG_DEBUG,"gb: seg fault. printing stack trace. use "
|
||||
"'addr2line -e gb' to decode the hex below.");
|
||||
|
||||
if ( g_inMemFunction ) {
|
||||
logf(LOG_DEBUG,"gb: in mem function not doing backtrace");
|
||||
return;
|
||||
}
|
||||
|
||||
static void *s_bt[200];
|
||||
int sz = backtrace(s_bt, 200);
|
||||
//char **strings = backtrace_symbols(s_bt, sz);
|
||||
|
16
Makefile
16
Makefile
@ -32,7 +32,7 @@ OBJS = UdpSlot.o Rebalance.o \
|
||||
Msg39.o Msg3.o \
|
||||
Msg22.o \
|
||||
Msg20.o Msg2.o \
|
||||
Msg1.o Msg35.o \
|
||||
Msg1.o \
|
||||
Msg0.o Mem.o Matches.o Loop.o \
|
||||
Log.o Lang.o \
|
||||
Indexdb.o Posdb.o Clusterdb.o IndexList.o Revdb.o \
|
||||
@ -86,7 +86,10 @@ STATIC :=
|
||||
XMLDOCOPT := -O2
|
||||
else
|
||||
OS_DEB := true
|
||||
STATIC := -static
|
||||
# let's remove static now by default to be safe because we don't always
|
||||
# detect red hat installs like on aws. do 'make static' to make as static.
|
||||
#STATIC := -static
|
||||
STATIC :=
|
||||
# MDW: i get some parsing inconsistencies when running the first qa injection
|
||||
# test if this is -O3. strange.
|
||||
# now debian jesse doesn't like -O3, it will core right away when spidering
|
||||
@ -110,11 +113,13 @@ LIBS = ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a
|
||||
# are we a 32-bit architecture? use different libraries then
|
||||
else ifeq ($(ARCH), i686)
|
||||
CPPFLAGS= -m32 -g -Wall -pipe -fno-stack-protector -Wno-write-strings -Wstrict-aliasing=0 -Wno-uninitialized -DPTHREADS -Wno-unused-but-set-variable $(STATIC)
|
||||
LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a -lpthread
|
||||
#LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a -lpthread
|
||||
LIBS= -lm -lpthread -lssl -lcrypto ./libiconv.a ./libz.a
|
||||
|
||||
else ifeq ($(ARCH), i386)
|
||||
CPPFLAGS= -m32 -g -Wall -pipe -fno-stack-protector -Wno-write-strings -Wstrict-aliasing=0 -Wno-uninitialized -DPTHREADS -Wno-unused-but-set-variable $(STATIC)
|
||||
LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a -lpthread
|
||||
#LIBS= -L. ./libz.a ./libssl.a ./libcrypto.a ./libiconv.a ./libm.a ./libstdc++.a -lpthread
|
||||
LIBS= -lm -lpthread -lssl -lcrypto ./libiconv.a ./libz.a
|
||||
|
||||
else
|
||||
#
|
||||
@ -190,6 +195,9 @@ vclean:
|
||||
gb: vclean $(OBJS) main.o $(LIBFILES)
|
||||
$(CC) $(DEFS) $(CPPFLAGS) -o $@ main.o $(OBJS) $(LIBS)
|
||||
|
||||
static: vclean $(OBJS) main.o $(LIBFILES)
|
||||
$(CC) $(DEFS) $(CPPFLAGS) -static -o gb main.o $(OBJS) $(LIBS)
|
||||
|
||||
|
||||
# use this for compiling on CYGWIN:
|
||||
# only for 32bit cygwin right now and
|
||||
|
193
Mem.cpp
193
Mem.cpp
@ -12,6 +12,9 @@
|
||||
//#include "Stats.h"
|
||||
#include "Pages.h"
|
||||
|
||||
// uncomment this #define to electric fence just on umsg00 buffers:
|
||||
//#define SPECIAL
|
||||
|
||||
// put me back
|
||||
//#define EFENCE
|
||||
//#define EFENCE_SIZE 50000
|
||||
@ -27,6 +30,8 @@
|
||||
#undef calloc
|
||||
#undef realloc
|
||||
|
||||
bool g_inMemFunction = false;
|
||||
|
||||
// from malloc.c (dlmalloc)
|
||||
//void *dlmalloc(size_t);
|
||||
//void dlfree(void*);
|
||||
@ -68,7 +73,7 @@ extern bool g_isYippy;
|
||||
|
||||
bool freeCacheMem();
|
||||
|
||||
#if defined(EFENCE) || defined(EFENCE_SIZE)
|
||||
#if defined(EFENCE) || defined(EFENCE_SIZE) || defined(SPECIAL)
|
||||
static void *getElecMem ( int32_t size ) ;
|
||||
static void freeElecMem ( void *p ) ;
|
||||
#endif
|
||||
@ -246,14 +251,21 @@ void * operator new (size_t size) throw (std::bad_alloc) {
|
||||
//if ( ! g_stats.m_gotLock || g_threads.amThread() ) mutexLock();
|
||||
//else unlock = false;
|
||||
|
||||
// hack so hostid #0 can use more mem
|
||||
int64_t max = g_conf.m_maxMem;
|
||||
//if ( g_hostdb.m_hostId == 0 ) max += 2000000000;
|
||||
|
||||
// don't go over max
|
||||
if ( g_mem.m_used + (int32_t)size >= g_mem.m_maxMem &&
|
||||
g_mem.m_maxMem > 1000000 ) {
|
||||
if ( g_mem.m_used + (int32_t)size >= max &&
|
||||
g_conf.m_maxMem > 1000000 ) {
|
||||
log("mem: new(%"UINT32"): Out of memory.", (uint32_t)size );
|
||||
//if ( unlock ) mutexUnlock();
|
||||
throw std::bad_alloc();
|
||||
//throw 1;
|
||||
}
|
||||
|
||||
g_inMemFunction = true;
|
||||
|
||||
#ifdef EFENCE
|
||||
void *mem = getElecMem(size);
|
||||
#elif EFENCE_SIZE
|
||||
@ -266,6 +278,9 @@ void * operator new (size_t size) throw (std::bad_alloc) {
|
||||
//void *mem = dlmalloc ( size );
|
||||
void *mem = sysmalloc ( size );
|
||||
#endif
|
||||
|
||||
g_inMemFunction = false;
|
||||
|
||||
int32_t memLoop = 0;
|
||||
newmemloop:
|
||||
//void *mem = s_pool.malloc ( size );
|
||||
@ -334,13 +349,20 @@ void * operator new [] (size_t size) throw (std::bad_alloc) {
|
||||
// // return NULL; }
|
||||
//}
|
||||
|
||||
// hack so hostid #0 can use more mem
|
||||
int64_t max = g_conf.m_maxMem;
|
||||
//if ( g_hostdb.m_hostId == 0 ) max += 2000000000;
|
||||
|
||||
// don't go over max
|
||||
if ( g_mem.m_used + (int32_t)size >= g_mem.m_maxMem &&
|
||||
g_mem.m_maxMem > 1000000 ) {
|
||||
if ( g_mem.m_used + (int32_t)size >= max &&
|
||||
g_conf.m_maxMem > 1000000 ) {
|
||||
log("mem: new(%"UINT32"): Out of memory.", (uint32_t)size );
|
||||
throw std::bad_alloc();
|
||||
//throw 1;
|
||||
}
|
||||
|
||||
g_inMemFunction = true;
|
||||
|
||||
#ifdef EFENCE
|
||||
void *mem = getElecMem(size);
|
||||
#elif EFENCE_SIZE
|
||||
@ -354,6 +376,9 @@ void * operator new [] (size_t size) throw (std::bad_alloc) {
|
||||
void *mem = sysmalloc ( size );
|
||||
#endif
|
||||
|
||||
g_inMemFunction = false;
|
||||
|
||||
|
||||
int32_t memLoop = 0;
|
||||
newmemloop:
|
||||
//void *mem = s_pool.malloc ( size );
|
||||
@ -406,7 +431,7 @@ newmemloop:
|
||||
Mem::Mem() {
|
||||
m_used = 0;
|
||||
// assume large max until this gets set for real
|
||||
m_maxMem = 50000000;
|
||||
//m_maxMem = 50000000;
|
||||
m_numAllocated = 0;
|
||||
m_numTotalAllocated = 0;
|
||||
m_maxAlloc = 0;
|
||||
@ -447,17 +472,16 @@ pid_t Mem::getPid() {
|
||||
return s_pid;
|
||||
}
|
||||
|
||||
bool Mem::init ( int64_t maxMem ) {
|
||||
bool Mem::init ( ) { // int64_t maxMem ) {
|
||||
// set main process pid
|
||||
s_pid = getpid();
|
||||
|
||||
// . don't swap our memory out, man...
|
||||
// . damn, linux 2.4.17 seems to crash the kernel sometimes w/ this
|
||||
//if ( mlockall( MCL_CURRENT | MCL_FUTURE ) == -1 ) {
|
||||
// log("Mem::init: mlockall: %s" , strerror(errno) );
|
||||
// errno = 0;
|
||||
//}
|
||||
m_maxMem = maxMem;
|
||||
//m_maxMem = maxMem;
|
||||
// set it
|
||||
//struct rlimit lim;
|
||||
//lim.rlim_max = maxMem;
|
||||
@ -530,9 +554,9 @@ void Mem::addMem ( void *mem , int32_t size , const char *note , char isnew ) {
|
||||
|
||||
//validate();
|
||||
|
||||
// if ( note && note[0] == 'S' && note[1] == 'a' &&
|
||||
// note[2] == 'f' && size == 13371521 )
|
||||
// log("mem: got mystery safebuf");
|
||||
// if ( note && note[0] == 'S' && note[1] == 'a' &&
|
||||
// note[2] == 'f' && size == 1179 )
|
||||
// log("mem: got mystery safebuf");
|
||||
|
||||
|
||||
//m_memtablesize = 0;//DMEMTABLESIZE;
|
||||
@ -542,7 +566,8 @@ void Mem::addMem ( void *mem , int32_t size , const char *note , char isnew ) {
|
||||
if ( ! s_initialized ) {
|
||||
//m_memtablesize = m_maxMem / 6510;
|
||||
// support 1.2M ptrs for now. good for about 8GB
|
||||
m_memtablesize = 3000*1024;//m_maxMem / 6510;
|
||||
// raise from 3000 to 8194 to fix host #1
|
||||
m_memtablesize = 8194*1024;//m_maxMem / 6510;
|
||||
//if ( m_maxMem < 8000000000 ) { char *xx=NULL;*xx=0; }
|
||||
}
|
||||
|
||||
@ -600,7 +625,18 @@ void Mem::addMem ( void *mem , int32_t size , const char *note , char isnew ) {
|
||||
*xx = 0;
|
||||
}
|
||||
|
||||
if ( ! isnew ) {
|
||||
// umsg00
|
||||
bool useElectricFence = false;
|
||||
#ifdef SPECIAL
|
||||
if ( note[0] == 'u' &&
|
||||
note[1] == 'm' &&
|
||||
note[2] == 's' &&
|
||||
note[3] == 'g' &&
|
||||
note[4] == '0' &&
|
||||
note[5] == '0' )
|
||||
useElectricFence = true;
|
||||
#endif
|
||||
if ( ! isnew && ! useElectricFence ) {
|
||||
for ( int32_t i = 0 ; i < UNDERPAD ; i++ )
|
||||
((char *)mem)[0-i-1] = MAGICCHAR;
|
||||
for ( int32_t i = 0 ; i < OVERPAD ; i++ )
|
||||
@ -608,7 +644,8 @@ void Mem::addMem ( void *mem , int32_t size , const char *note , char isnew ) {
|
||||
}
|
||||
// hey!
|
||||
if ( s_pid == -1 && m_numTotalAllocated >1000 ) {
|
||||
log(LOG_WARN, "pid is %i and numAllocs is %i", s_pid, m_numTotalAllocated);
|
||||
log(LOG_WARN, "pid is %i and numAllocs is %i", (int)s_pid,
|
||||
(int)m_numTotalAllocated);
|
||||
//char *xx=NULL;*xx=0;}
|
||||
// if ( s_pid == -1 && m_numTotalAllocated >1000 ) { char *xx=NULL;*xx=0;}
|
||||
}
|
||||
@ -961,9 +998,10 @@ bool Mem::rmMem ( void *mem , int32_t size , const char *note ) {
|
||||
if ( size == 0 ) return true;
|
||||
// hey!
|
||||
if ( s_pid == -1 && m_numTotalAllocated >1000 ) {
|
||||
log(LOG_WARN, "pid is %i and numAllocs is %i", s_pid, m_numTotalAllocated);
|
||||
log(LOG_WARN, "pid is %i and numAllocs is %i",
|
||||
(int)s_pid, (int)m_numTotalAllocated);
|
||||
//char *xx=NULL;*xx=0;}
|
||||
}
|
||||
}
|
||||
// threads can't be here!
|
||||
if ( s_pid != -1 && getpid() != s_pid ) {
|
||||
log("mem: rmMem: Called from thread.");
|
||||
@ -1145,6 +1183,18 @@ int Mem::printBreech ( int32_t i , char core ) {
|
||||
if ( s_labels[i*16+0] == 'T' &&
|
||||
s_labels[i*16+1] == 'h' &&
|
||||
!strcmp(&s_labels[i*16 ],"ThreadStack" ) ) return 0;
|
||||
#ifdef SPECIAL
|
||||
// for now this is efence. umsg00
|
||||
bool useElectricFence = false;
|
||||
if ( s_labels[i*16+0] == 'u' &&
|
||||
s_labels[i*16+1] == 'm' &&
|
||||
s_labels[i*16+2] == 's' &&
|
||||
s_labels[i*16+3] == 'g' &&
|
||||
s_labels[i*16+4] == '0' &&
|
||||
s_labels[i*16+5] == '0' )
|
||||
useElectricFence = true;
|
||||
if ( useElectricFence ) return 0;
|
||||
#endif
|
||||
char flag = 0;
|
||||
// check for underruns
|
||||
char *mem = (char *)s_mptrs[i];
|
||||
@ -1270,6 +1320,9 @@ int Mem::printBreeches ( char core ) {
|
||||
if ( ! s_mptrs ) return 0;
|
||||
// do not bother if no padding at all
|
||||
if ( (int32_t)UNDERPAD == 0 && (int32_t)OVERPAD == 0 ) return 0;
|
||||
|
||||
log("mem: checking mem for breeches");
|
||||
|
||||
// loop through the whole mem table
|
||||
for ( int32_t i = 0 ; i < (int32_t)m_memtablesize ; i++ )
|
||||
// only check if non-empty
|
||||
@ -1346,8 +1399,13 @@ void *Mem::gbmalloc ( int size , const char *note ) {
|
||||
}
|
||||
|
||||
retry:
|
||||
|
||||
// hack so hostid #0 can use more mem
|
||||
int64_t max = g_conf.m_maxMem;
|
||||
//if ( g_hostdb.m_hostId == 0 ) max += 2000000000;
|
||||
|
||||
// don't go over max
|
||||
if ( m_used + size + UNDERPAD + OVERPAD >= m_maxMem ) {
|
||||
if ( m_used + size + UNDERPAD + OVERPAD >= max ) {
|
||||
// try to free temp mem. returns true if it freed some.
|
||||
if ( freeCacheMem() ) goto retry;
|
||||
g_errno = ENOMEM;
|
||||
@ -1363,6 +1421,8 @@ void *Mem::gbmalloc ( int size , const char *note ) {
|
||||
|
||||
void *mem;
|
||||
|
||||
g_inMemFunction = true;
|
||||
|
||||
// to find bug that cores on malloc do this
|
||||
//printBreeches(true);
|
||||
//g_errno=ENOMEM;return (void *)log("Mem::malloc: reached mem limit");}
|
||||
@ -1375,11 +1435,32 @@ void *Mem::gbmalloc ( int size , const char *note ) {
|
||||
mem = getElecMem(size+0+0);
|
||||
else
|
||||
mem = (void *)sysmalloc ( size + UNDERPAD + OVERPAD );
|
||||
#else
|
||||
#else
|
||||
|
||||
#ifdef SPECIAL
|
||||
// debug where tagrec in xmldoc.cpp's msge0 tag list is overrunning
|
||||
// for umsg00
|
||||
bool useElectricFence = false;
|
||||
if ( note[0] == 'u' &&
|
||||
note[1] == 'm' &&
|
||||
note[2] == 's' &&
|
||||
note[3] == 'g' &&
|
||||
note[4] == '0' &&
|
||||
note[5] == '0' )
|
||||
useElectricFence = true;
|
||||
if ( useElectricFence ) {
|
||||
mem = getElecMem(size+0+0);
|
||||
addMem ( (char *)mem + 0 , size , note , 0 );
|
||||
return (char *)mem + 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
//void *mem = dlmalloc ( size );
|
||||
mem = (void *)sysmalloc ( size + UNDERPAD + OVERPAD );
|
||||
#endif
|
||||
|
||||
g_inMemFunction = false;
|
||||
|
||||
// initialization debug
|
||||
//char *pend = (char *)mem + UNDERPAD + size;
|
||||
//for ( char *p = (char *)mem + UNDERPAD ; p < pend ; p++ )
|
||||
@ -1406,7 +1487,7 @@ mallocmemloop:
|
||||
static int64_t s_lastTime;
|
||||
static int32_t s_missed = 0;
|
||||
int64_t now = gettimeofdayInMillisecondsLocal();
|
||||
int64_t avail = (int64_t)m_maxMem -
|
||||
int64_t avail = (int64_t)g_conf.m_maxMem -
|
||||
(int64_t)m_used;
|
||||
if ( now - s_lastTime >= 1000LL ) {
|
||||
log("mem: system malloc(%i,%s) availShouldBe=%"INT64": "
|
||||
@ -1510,8 +1591,13 @@ void *Mem::gbrealloc ( void *ptr , int oldSize , int newSize ,
|
||||
// return NULL;
|
||||
//}
|
||||
retry:
|
||||
|
||||
// hack so hostid #0 can use more mem
|
||||
int64_t max = g_conf.m_maxMem;
|
||||
//if ( g_hostdb.m_hostId == 0 ) max += 2000000000;
|
||||
|
||||
// don't go over max
|
||||
if ( m_used + newSize - oldSize >= m_maxMem ) {
|
||||
if ( m_used + newSize - oldSize >= max ) {
|
||||
// try to free temp mem. returns true if it freed some.
|
||||
if ( freeCacheMem() ) goto retry;
|
||||
g_errno = ENOMEM;
|
||||
@ -1536,6 +1622,34 @@ void *Mem::gbrealloc ( void *ptr , int oldSize , int newSize ,
|
||||
return mem;
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef SPECIAL
|
||||
int32_t slot = g_mem.getMemSlot ( ptr );
|
||||
// debug where tagrec in xmldoc.cpp's msge0 tag list is overrunning
|
||||
// for umsg00
|
||||
if ( slot >= 0 ) {
|
||||
char *label = &s_labels[slot*16];
|
||||
bool useElectricFence = false;
|
||||
if ( label[0] == 'u' &&
|
||||
label[1] == 'm' &&
|
||||
label[2] == 's' &&
|
||||
label[3] == 'g' &&
|
||||
label[4] == '0' &&
|
||||
label[5] == '0' )
|
||||
useElectricFence = true;
|
||||
if ( useElectricFence ) {
|
||||
// just make a new buf
|
||||
mem = (char *)mmalloc ( newSize , note );
|
||||
if ( ! mem ) return NULL;
|
||||
// copy over to it
|
||||
gbmemcpy ( mem , ptr , oldSize );
|
||||
// free the old
|
||||
mfree ( ptr , oldSize , note );
|
||||
return mem;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// assume it will be successful. we can't call rmMem() after
|
||||
// calling sysrealloc() because it will mess up our MAGICCHAR buf
|
||||
rmMem ( ptr , oldSize , note );
|
||||
@ -1626,11 +1740,38 @@ void Mem::gbfree ( void *ptr , int size , const char *note ) {
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef SPECIAL
|
||||
g_inMemFunction = true;
|
||||
// debug where tagrec in xmldoc.cpp's msge0 tag list is overrunning
|
||||
// for umsg00
|
||||
bool useElectricFence = false;
|
||||
char *label = &s_labels[slot*16];
|
||||
if ( label[0] == 'u' &&
|
||||
label[1] == 'm' &&
|
||||
label[2] == 's' &&
|
||||
label[3] == 'g' &&
|
||||
label[4] == '0' &&
|
||||
label[5] == '0' )
|
||||
useElectricFence = true;
|
||||
if ( useElectricFence ) {
|
||||
// this calls rmMem() itself
|
||||
freeElecMem ((char *)ptr - 0 );
|
||||
g_inMemFunction = false;
|
||||
// if this returns false it was an unbalanced free
|
||||
//if ( ! rmMem ( ptr , size , note ) ) return;
|
||||
return;
|
||||
}
|
||||
g_inMemFunction = false;
|
||||
#endif
|
||||
|
||||
// if this returns false it was an unbalanced free
|
||||
if ( ! rmMem ( ptr , size , note ) ) return;
|
||||
|
||||
g_inMemFunction = true;
|
||||
if ( isnew ) sysfree ( (char *)ptr );
|
||||
else sysfree ( (char *)ptr - UNDERPAD );
|
||||
g_inMemFunction = false;
|
||||
}
|
||||
|
||||
int32_t getLowestLitBitLL ( uint64_t bits ) {
|
||||
@ -2062,6 +2203,7 @@ void *getElecMem ( int32_t size ) {
|
||||
// store the ptrs
|
||||
*(char **)(returnMem- sizeof(char *)) = realMem;
|
||||
*(char **)(returnMem- sizeof(char *)*2) = realMemEnd;
|
||||
//log("protect2 0x%"PTRFMT"\n",(PTRTYPE)protMem);
|
||||
// protect that after we wrote our ptr
|
||||
if ( mprotect ( protMem , MEMPAGESIZE , PROT_NONE) < 0 )
|
||||
log("mem: mprotect failed: %s",mstrerror(errno));
|
||||
@ -2113,6 +2255,7 @@ void *getElecMem ( int32_t size ) {
|
||||
*(char **)(returnMem- sizeof(char *)*2) = realMemEnd;
|
||||
// sanity
|
||||
if ( returnMem - sizeof(char *)*2 < realMem ) { char *xx=NULL;*xx=0; }
|
||||
//log("protect3 0x%"PTRFMT"\n",(PTRTYPE)protMem);
|
||||
// protect that after we wrote our ptr
|
||||
if ( mprotect ( protMem , MEMPAGESIZE , PROT_NONE) < 0 )
|
||||
log("mem: mprotect failed: %s",mstrerror(errno));
|
||||
@ -2165,6 +2308,9 @@ void freeElecMem ( void *fakeMem ) {
|
||||
char *oldProtMem = cp + fakeSize;
|
||||
#endif
|
||||
|
||||
// hack
|
||||
//oldProtMem -= 4;
|
||||
//log("unprotect1 0x%"PTRFMT"\n",(PTRTYPE)oldProtMem);
|
||||
// unprotect it
|
||||
if ( mprotect ( oldProtMem , MEMPAGESIZE, PROT_READ|PROT_WRITE) < 0 )
|
||||
log("mem: munprotect failed: %s",mstrerror(errno));
|
||||
@ -2186,6 +2332,7 @@ void freeElecMem ( void *fakeMem ) {
|
||||
// sanity
|
||||
if ( protMem < realMem ) { char *xx=NULL;*xx=0; }
|
||||
if ( protMem - realMem > (int32_t)MEMPAGESIZE) { char *xx=NULL;*xx=0; }
|
||||
//log("protect1 0x%"PTRFMT"\n",(PTRTYPE)protMem);
|
||||
// before adding it into the ring, protect it
|
||||
if ( mprotect ( protMem , protEnd-protMem, PROT_NONE) < 0 )
|
||||
log("mem: mprotect2 failed: %s",mstrerror(errno));
|
||||
@ -2199,6 +2346,8 @@ void freeElecMem ( void *fakeMem ) {
|
||||
g_mem.rmMem ( s_freeCursor->m_fakeMem,
|
||||
s_freeCursor->m_fakeSize,
|
||||
s_freeCursor->m_note );
|
||||
// log("unprotect2 0x%"PTRFMT"\n",
|
||||
// (PTRTYPE)s_freeCursor->m_protMem);
|
||||
// unprotect it
|
||||
if ( mprotect (s_freeCursor->m_protMem,
|
||||
s_freeCursor->m_protSize,
|
||||
@ -2237,6 +2386,8 @@ void freeElecMem ( void *fakeMem ) {
|
||||
g_mem.rmMem ( s_freeCursor->m_fakeMem,
|
||||
s_freeCursor->m_fakeSize,
|
||||
s_freeCursor->m_note );
|
||||
// log("unprotect3 0x%"PTRFMT"\n",
|
||||
// (PTRTYPE)s_freeCursor->m_protMem);
|
||||
// unprotect it
|
||||
if ( mprotect (s_freeCursor->m_protMem,
|
||||
s_freeCursor->m_protSize,
|
||||
|
8
Mem.h
8
Mem.h
@ -17,6 +17,8 @@
|
||||
//#include <dmalloc.h>
|
||||
//#endif
|
||||
|
||||
extern bool g_inMemFunction;
|
||||
|
||||
// we share malloc between threads, so you need to get the lock
|
||||
//void mutexLock ( );
|
||||
//void mutexUnlock ( );
|
||||
@ -81,7 +83,7 @@ class Mem {
|
||||
Mem();
|
||||
~Mem();
|
||||
|
||||
bool init ( int64_t maxMem );
|
||||
bool init ( );//int64_t maxMem );
|
||||
|
||||
void setPid();
|
||||
pid_t getPid();
|
||||
@ -130,7 +132,7 @@ class Mem {
|
||||
int printBreeches ( char core ) ;
|
||||
// print mem usage stats
|
||||
int printMem ( ) ;
|
||||
void addMem ( void *mem , int32_t size , const char *note , char isnew ) ;
|
||||
void addMem ( void *mem , int32_t size , const char *note, char isnew);
|
||||
bool rmMem ( void *mem , int32_t size , const char *note ) ;
|
||||
bool lblMem ( void *mem , int32_t size , const char *note );
|
||||
|
||||
@ -161,7 +163,7 @@ class Mem {
|
||||
int64_t m_maxAlloced; // at any one time
|
||||
int64_t m_maxAlloc; // the biggest single alloc ever done
|
||||
const char *m_maxAllocBy; // the biggest single alloc ever done
|
||||
int64_t m_maxMem;
|
||||
//int64_t m_maxMem;
|
||||
|
||||
// shared mem used
|
||||
int64_t m_sharedUsed;
|
||||
|
@ -26,9 +26,7 @@ bool Monitordb::init ( ) {
|
||||
if ( ! m_pc.init ( "monitordb" ,
|
||||
RDB_MONITORDB,
|
||||
pcmem ,
|
||||
pageSize ,
|
||||
true , // use shared mem?
|
||||
false )) // minimizeDiskSeeks?
|
||||
pageSize ))
|
||||
return log("db: Monitordb init failed.");
|
||||
// init the rdb
|
||||
return m_rdb.init ( g_hostdb.m_dir ,
|
||||
|
101
Msg13.cpp
101
Msg13.cpp
@ -2300,7 +2300,8 @@ bool getTestDoc ( char *u , TcpSocket *ts , Msg13Request *r ) {
|
||||
|
||||
// log it for now
|
||||
//if ( g_conf.m_logDebugSpider )
|
||||
log("test: GOT doc in test cache: %s (%"UINT64")",u,h);
|
||||
log("test: GOT doc in test cache: %s (qa/doc.%"UINT64".html)",
|
||||
u,h);
|
||||
|
||||
//fprintf(stderr,"scp gk252:/e/test-spider/doc.%"UINT64".* /home/mwells/gigablast/test-parser/\n",h);
|
||||
|
||||
@ -3132,6 +3133,8 @@ bool addToHammerQueue ( Msg13Request *r ) {
|
||||
// we gotta update the crawldelay here in case we modified
|
||||
// it in the above logic.
|
||||
r->m_crawlDelayMS = crawlDelayMS;
|
||||
// when we stored it in the hammer queue
|
||||
r->m_stored = nowms;
|
||||
// add it to queue
|
||||
if ( ! s_hammerQueueHead ) {
|
||||
s_hammerQueueHead = r;
|
||||
@ -12053,3 +12056,99 @@ char *getRandUserAgent ( int32_t urlIp , int32_t proxyIp , int32_t proxyPort ) {
|
||||
|
||||
return s_agentList[n];
|
||||
}
|
||||
|
||||
bool printHammerQueueTable ( SafeBuf *sb ) {
|
||||
|
||||
char *title = "Queued Download Requests";
|
||||
sb->safePrintf (
|
||||
"<table %s>"
|
||||
"<tr class=hdrow><td colspan=19>"
|
||||
"<center>"
|
||||
"<b>%s</b>"
|
||||
"</td></tr>"
|
||||
|
||||
"<tr bgcolor=#%s>"
|
||||
"<td><b>#</td>"
|
||||
"<td><b>age</td>"
|
||||
"<td><b>first ip found</td>"
|
||||
"<td><b>actual ip</td>"
|
||||
"<td><b>crawlDelayMS</td>"
|
||||
"<td><b># proxies banning</td>"
|
||||
|
||||
"<td><b>coll</td>"
|
||||
"<td><b>url</td>"
|
||||
|
||||
"</tr>\n"
|
||||
, TABLE_STYLE
|
||||
, title
|
||||
, DARK_BLUE
|
||||
);
|
||||
|
||||
Msg13Request *r = s_hammerQueueHead ;
|
||||
|
||||
int32_t count = 0;
|
||||
int64_t nowms = gettimeofdayInMilliseconds();
|
||||
|
||||
loop:
|
||||
if ( ! r ) return true;
|
||||
|
||||
// print row
|
||||
sb->safePrintf( "<tr bgcolor=#%s>"
|
||||
"<td>%i</td>" // #
|
||||
"<td>%ims</td>" // age in hammer queue
|
||||
"<td>%s</td>"
|
||||
,LIGHT_BLUE
|
||||
,(int)count
|
||||
,(int)(nowms - r->m_stored)
|
||||
,iptoa(r->m_firstIp)
|
||||
);
|
||||
|
||||
sb->safePrintf("<td>%s</td>" // actual ip
|
||||
, iptoa(r->m_urlIp));
|
||||
|
||||
// print crawl delay as link to robots.txt
|
||||
sb->safePrintf( "<td><a href=\"");
|
||||
Url cu;
|
||||
cu.set ( r->ptr_url );
|
||||
bool isHttps = false;
|
||||
if ( cu.m_url && cu.m_url[4] == 's' ) isHttps = true;
|
||||
if ( isHttps ) sb->safeStrcpy ( "https://");
|
||||
else sb->safeStrcpy ( "http://" );
|
||||
sb->safeMemcpy ( cu.getHost() , cu.getHostLen() );
|
||||
int32_t port = cu.getPort();
|
||||
int32_t defPort = 80;
|
||||
if ( isHttps ) defPort = 443;
|
||||
if ( port != defPort ) sb->safePrintf ( ":%"INT32"",port );
|
||||
sb->safePrintf ( "/robots.txt\">"
|
||||
"%i"
|
||||
"</a>"
|
||||
"</td>" // crawl delay MS
|
||||
"<td>%i</td>" // proxies banning
|
||||
, r->m_crawlDelayMS
|
||||
, r->m_numBannedProxies
|
||||
);
|
||||
|
||||
// show collection name as a link, also truncate to 32 chars
|
||||
CollectionRec *cr = g_collectiondb.getRec ( r->m_collnum );
|
||||
char *coll = "none";
|
||||
if ( cr ) coll = cr->m_coll;
|
||||
sb->safePrintf("<td>");
|
||||
if ( cr ) {
|
||||
sb->safePrintf("<a href=/admin/sockets?c=");
|
||||
sb->urlEncode(coll);
|
||||
sb->safePrintf(">");
|
||||
}
|
||||
sb->safeTruncateEllipsis ( coll , 32 );
|
||||
if ( cr ) sb->safePrintf("</a>");
|
||||
sb->safePrintf("</td>");
|
||||
// then the url itself
|
||||
sb->safePrintf("<td><a href=%s>",r->ptr_url);
|
||||
sb->safeTruncateEllipsis ( r->ptr_url , 128 );
|
||||
sb->safePrintf("</a></td>");
|
||||
sb->safePrintf("</tr>\n");
|
||||
|
||||
// print next entry now
|
||||
r = r->m_nextLink;
|
||||
goto loop;
|
||||
|
||||
}
|
||||
|
4
Msg13.h
4
Msg13.h
@ -16,6 +16,7 @@
|
||||
#define MAX_PROXYCRAWLDELAYMS 60000
|
||||
|
||||
void resetMsg13Caches ( ) ;
|
||||
bool printHammerQueueTable ( SafeBuf *sb ) ;
|
||||
|
||||
extern char *g_fakeReply;
|
||||
|
||||
@ -55,6 +56,9 @@ public:
|
||||
int64_t m_urlHash48;
|
||||
int32_t m_firstIp;
|
||||
|
||||
// when it was stored in the hammer queue
|
||||
int64_t m_stored;
|
||||
|
||||
// a tmp hack var referencing into m_url[] below
|
||||
char *m_proxiedUrl;
|
||||
int32_t m_proxiedUrlLen;
|
||||
|
10
Msg3.cpp
10
Msg3.cpp
@ -829,7 +829,7 @@ bool Msg3::doneScanning ( ) {
|
||||
if ( now - s_time > 5 || g_errno != ENOTHREADSLOTS ) {
|
||||
log("net: Had error reading %s: %s. Retrying. "
|
||||
"(retry #%"INT32")",
|
||||
base->m_dbname,mstrerror(g_errno) , m_retryNum );
|
||||
base->m_dbname,mstrerror(m_errno) , m_retryNum );
|
||||
s_time = now;
|
||||
}
|
||||
// send email alert if in an infinite loop, but don't send
|
||||
@ -928,19 +928,23 @@ bool Msg3::doneScanning ( ) {
|
||||
// . this returns false and sets g_errno on error
|
||||
// . like if data is corrupt
|
||||
BigFile *ff = base->getFile(m_fileNums[i]);
|
||||
// if we did a merge really quick and delete one of the
|
||||
// files we were reading, i've seen 'ff' be NULL
|
||||
char *filename = "lostfilename";
|
||||
if ( ff ) filename = ff->getFilename();
|
||||
if ( ! m_lists[i].constrain ( m_startKey ,
|
||||
m_constrainKey , // m_endKey
|
||||
mrs , // m_minRecSizes
|
||||
m_hintOffsets[i] ,
|
||||
//m_hintKeys [i] ,
|
||||
&m_hintKeys [i*m_ks] ,
|
||||
ff->getFilename() ,
|
||||
filename,//ff->getFilename() ,
|
||||
m_niceness ) ) {
|
||||
log("net: Had error while constraining list read from "
|
||||
"%s: %s/%s. vfd=%"INT32" parts=%"INT32". "
|
||||
"This is likely caused by corrupted "
|
||||
"data on disk.",
|
||||
mstrerror(g_errno), ff->m_dir ,
|
||||
mstrerror(g_errno), ff->getDir(),
|
||||
ff->getFilename(), ff->m_vfd ,
|
||||
(int32_t)ff->m_numParts );
|
||||
}
|
||||
|
@ -1247,6 +1247,13 @@ bool Msg3a::mergeLists ( ) {
|
||||
|
||||
int32_t need = nd * (8+sizeof(double)+
|
||||
sizeof(key_t)+sizeof(DocIdScore *)+1);
|
||||
if ( need < 0 ) {
|
||||
log("msg3a: need is %i, nd = %i is too many docids",
|
||||
(int)need,(int)nd);
|
||||
g_errno = EBUFTOOSMALL;
|
||||
return true;
|
||||
}
|
||||
|
||||
// allocate it
|
||||
m_finalBuf = (char *)mmalloc ( need , "finalBuf" );
|
||||
m_finalBufSize = need;
|
||||
|
@ -5971,7 +5971,7 @@ bool Msg40::printCSVHeaderRow ( SafeBuf *sb ) {
|
||||
SafeBuf nameBuf (tmp2, 1024);
|
||||
|
||||
int32_t ct = 0;
|
||||
if ( msg20s[0] ) ct = msg20s[0]->m_r->m_contentType;
|
||||
if ( msg20s[0] && msg20s[0]->m_r ) ct = msg20s[0]->m_r->m_contentType;
|
||||
|
||||
CollectionRec *cr =g_collectiondb.getRec(m_firstCollnum);
|
||||
|
||||
|
1585
OldDiskPageCache.cpp
Normal file
1585
OldDiskPageCache.cpp
Normal file
File diff suppressed because it is too large
Load Diff
227
OldDiskPageCache.h
Normal file
227
OldDiskPageCache.h
Normal file
@ -0,0 +1,227 @@
|
||||
// Matt Wells, Copyright Jan 2004
|
||||
|
||||
// . each Rdb has its own m_pageCache member
|
||||
// . a ptr to this class is passed to all File::open() calls
|
||||
// . that ptr is stored in the File class as File::m_pageCachePtr
|
||||
// . the File class uses the virtual file descriptor, vfd, for use with
|
||||
// the pageCache since we tend to open and close files a lot when we run
|
||||
// out of actual fds
|
||||
// . every subsequent read/write to that file will then use the pageCache
|
||||
// . before doing a read in File::read() we try to increase the offset
|
||||
// by filling the beginning of the buffer with data from the page cache.
|
||||
// We also try to decrease the bytes to read by filling the end of the
|
||||
// buffer. What is left to actually read, if anything, is the middle.
|
||||
// . after File::read() completes it call DiskPageCache::storePages (buf,size,off)
|
||||
// to fill the page cache.
|
||||
// . when maxMem is reached, the DiskPageCache will unfrequently used pages by
|
||||
// using a linked list
|
||||
// . when File class releases its vfd it must call m_pageCachePtr->close(vfd)
|
||||
|
||||
// . we use PAGESIZE defined in RdbMap.h as our page size
|
||||
// . TODO: convert PAGESIZE to 8000 not 8192
|
||||
|
||||
#ifndef _PAGECACHE_H_
|
||||
#define _PAGECACHE_H_
|
||||
|
||||
// . use 128 disk megabytes per set of pages
|
||||
// . this MUST be a multiple of (PAGE_SIZE+HEADERSIZE) now
|
||||
//#define PAGE_SET_SIZE (128*1024*1024)
|
||||
//#define PHSIZE (GB_PAGE_SIZE+HEADERSIZE)
|
||||
//#define PAGE_SET_SIZE (((128*1024*1024)/PHSIZE)*PHSIZE)
|
||||
|
||||
// how many page sets can we have?
|
||||
#define MAX_PAGE_SETS 128
|
||||
|
||||
// how many BigFiles can be using the same DiskPageCache?
|
||||
#include "File.h"
|
||||
#define MAX_NUM_VFDS2 MAX_NUM_VFDS
|
||||
|
||||
extern void freeAllSharedMem ( int32_t max );
|
||||
|
||||
class DiskPageCache {
|
||||
|
||||
public:
|
||||
|
||||
DiskPageCache();
|
||||
~DiskPageCache();
|
||||
void reset();
|
||||
|
||||
// returns false and sets g_errno if unable to alloc the memory,
|
||||
// true otherwise
|
||||
bool init ( const char *dbname ,
|
||||
char rdbId, // use 0 for none
|
||||
int32_t maxMem ,
|
||||
int32_t pageSize,
|
||||
bool useRAMDisk = false,
|
||||
bool minimizeDiskSeeks = false );
|
||||
// int32_t maxMem ,
|
||||
// void (*getPages2)(DiskPageCache*, int32_t, char*, int32_t,
|
||||
// int64_t, int32_t*, int64_t*) = NULL,
|
||||
// void (*addPages2)(DiskPageCache*, int32_t, char*, int32_t,
|
||||
// int64_t) = NULL,
|
||||
// int32_t (*getVfd2)(DiskPageCache*, int64_t) = NULL,
|
||||
// void (*rmVfd2)(DiskPageCache*, int32_t) = NULL );
|
||||
|
||||
bool initRAMDisk( const char *dbname, int32_t maxMem );
|
||||
|
||||
int32_t getMemUsed () ;
|
||||
int32_t getMemAlloced () { return m_memAlloced; };
|
||||
int32_t getMemMax () { return m_maxMem; };
|
||||
|
||||
int64_t getNumHits () { return m_hits; };
|
||||
int64_t getNumMisses () { return m_misses; };
|
||||
void resetStats () { m_hits = 0 ; m_misses = 0; };
|
||||
|
||||
// verify each page in cache for this file is what is on disk
|
||||
bool verifyData ( class BigFile *f );
|
||||
bool verifyData2 ( int32_t vfd );
|
||||
|
||||
void disableCache ( ) { m_enabled = false; };
|
||||
void enableCache ( ) { m_enabled = true; };
|
||||
|
||||
// . grow/shrink m_memOff[] which maps vfd/page to a mem offset
|
||||
// . returns false and sets g_errno on error
|
||||
// . called by DiskPageCache::open()/close() respectively
|
||||
// . maxFileSize is so we can alloc m_memOff[vfd] big enough for all
|
||||
// pages that are in or will be in the file (if it is being created)
|
||||
int32_t getVfd ( int64_t maxFileSize, bool vfdAllowed );
|
||||
void rmVfd ( int32_t vfd );
|
||||
|
||||
// . this returns true iff the entire read was copied into
|
||||
// "buf" from the page cache
|
||||
// . it will move the used pages to the head of the linked list
|
||||
void getPages ( int32_t vfd ,
|
||||
char **buf ,
|
||||
int32_t numBytes ,
|
||||
int64_t offset ,
|
||||
int32_t *newNumBytes ,
|
||||
int64_t *newOffset ,
|
||||
char **allocBuf , //we alloc this if buf==NULL
|
||||
int32_t *allocSize , //size of the alloc
|
||||
int32_t allocOff );
|
||||
|
||||
// after you read/write from/to disk, copy into the page cache
|
||||
void addPages ( int32_t vfd, char *buf , int32_t numBytes, int64_t offset,
|
||||
int32_t niceness );
|
||||
|
||||
|
||||
// used for minimize disk seeks
|
||||
bool m_minimizeDiskSeeks;
|
||||
|
||||
int32_t m_diskPageSize;
|
||||
|
||||
private:
|
||||
|
||||
void addPage (int32_t vfd,int32_t pageNum,char *page,int32_t size,int32_t skip);
|
||||
void enhancePage ( int32_t poff,char *page,int32_t size,int32_t skip) ;
|
||||
void promotePage ( int32_t poff , bool isNew ) ;
|
||||
void excisePage ( int32_t poff ) ;
|
||||
|
||||
bool growCache ( int32_t mem );
|
||||
|
||||
//bool needsMerge();
|
||||
|
||||
void writeToCache ( int32_t memOff, int32_t memPageOff, void *inBuf,
|
||||
int32_t size );
|
||||
void readFromCache( void *outBuf, int32_t memOff, int32_t memPageOff,
|
||||
int32_t size );
|
||||
|
||||
char *getMemPtrFromMemOff ( int32_t off );
|
||||
|
||||
// . the pages are here
|
||||
// . there are 1024 page sets
|
||||
// . each page set can have up to 128 megabytes of pages
|
||||
// . much more than that and pthread_create() fails
|
||||
char *m_pageSet [ MAX_PAGE_SETS ];
|
||||
int32_t m_pageSetSize [ MAX_PAGE_SETS ];
|
||||
int32_t m_numPageSets;
|
||||
|
||||
// . next available page offset
|
||||
// . when storing a page we read from disk into a pageSet we first
|
||||
// try to get a memory offset from m_availMemOff, if none are there
|
||||
// then we use m_nextMemOff and increment it by PAGE_SIZE+HEADERSIZE
|
||||
// . if m_nextMemOff would breech m_upperMemOff then we call
|
||||
// growCache to increase m_upperMemOff
|
||||
// . we try to grow 100k with each call to growCache
|
||||
// . if m_upperMemOff would breech m_maxMem, then we kick out the
|
||||
// least used page using
|
||||
// . we store a linked list in bytes 4-12 of each page in memory
|
||||
int32_t m_nextMemOff; // next available mem offset to hold a page
|
||||
int32_t m_upperMemOff; // how many bytes are allocated in page sets?
|
||||
int32_t m_maxMem; // max we can allocate
|
||||
|
||||
// . available offsets of released pages
|
||||
// . offsets are into the page sets, m_pageSet[]
|
||||
int32_t *m_availMemOff;
|
||||
int32_t m_numAvailMemOffs;
|
||||
int32_t m_maxAvailMemOffs;
|
||||
|
||||
// . m_memOffFromDiskPage[vfd][diskPageNum] --> memOff
|
||||
// . maps a vfd and disk page number to a memory offset
|
||||
// . maps to -1 if not in page cache
|
||||
// . try to keep the number of pages down, under 100,000
|
||||
// . 100,000 pages would be about 800 megabytes
|
||||
// . I am only planning on using this for tfndb and Checksumdb so
|
||||
// we should be under or around this limit
|
||||
int32_t *m_memOffFromDiskPage [ MAX_NUM_VFDS2 ];
|
||||
|
||||
// . how many offsets are in m_memOffFromDiskPage?
|
||||
// . we have one offset per page in the file
|
||||
int32_t m_maxPagesInFile [ MAX_NUM_VFDS2 ];
|
||||
|
||||
// max number of pages that this file shall have
|
||||
int32_t m_maxPagesPerFile [ MAX_NUM_VFDS2 ];
|
||||
// max number of pages of file currently in the cache
|
||||
int32_t m_numPagesPresentOfFile[ MAX_NUM_VFDS2 ];
|
||||
// mem that has not been used
|
||||
int32_t m_memFree;
|
||||
|
||||
// how much memory is currently allocated?
|
||||
int32_t m_memAlloced;
|
||||
|
||||
// stats (partial hits/misses supported)
|
||||
int64_t m_hits;
|
||||
int64_t m_misses;
|
||||
|
||||
// . linked list boundary info
|
||||
// . linked list is actually stored in bytes 2-8 (next/prev) of pages
|
||||
// in memory
|
||||
int32_t m_headOff;
|
||||
int32_t m_tailOff;
|
||||
|
||||
// for selecting the next vfd in line and preventing sudden closing
|
||||
// and opening of a vfd, resulting in a thread returning and calling
|
||||
// addPages() for the wrong file!!
|
||||
int32_t m_nexti;
|
||||
|
||||
bool m_enabled;
|
||||
|
||||
int32_t m_maxPageSetSize;
|
||||
|
||||
const char *m_dbname;
|
||||
char m_rdbId;
|
||||
bool *m_switch;
|
||||
|
||||
char m_memTag[16];
|
||||
|
||||
//bool m_useRAMDisk;
|
||||
//bool m_useSHM;
|
||||
|
||||
//int m_ramfd;
|
||||
|
||||
//int m_shmids [ 4096 ];
|
||||
//int32_t m_shmidSize [ 4096 ];
|
||||
//int32_t m_numShmids;
|
||||
//int32_t m_maxAllocSize;
|
||||
//int32_t m_spageSize;
|
||||
|
||||
// for overriding the disk page cache with custom functions
|
||||
//bool m_isOverriden;
|
||||
//void (*m_getPages2)(DiskPageCache*, int32_t, char*, int32_t, int64_t,
|
||||
// int32_t*, int64_t*);
|
||||
//void (*m_addPages2)(DiskPageCache*, int32_t, char*, int32_t, int64_t);
|
||||
//int32_t (*m_getVfd2)(DiskPageCache*, int64_t);
|
||||
//void (*m_rmVfd2)(DiskPageCache*, int32_t);
|
||||
};
|
||||
|
||||
#endif
|
@ -3545,6 +3545,10 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
|
||||
"</td><td>"
|
||||
"<a href=/crawlbot/download/%s_urls.csv>"
|
||||
"csv</a>"
|
||||
|
||||
" <a href=/v3/crawl/download/%s_urls.csv>"
|
||||
"new csv format</a>"
|
||||
|
||||
"</td>"
|
||||
"</tr>"
|
||||
|
||||
@ -3613,6 +3617,10 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
|
||||
//, cr->m_coll
|
||||
//, cr->m_coll
|
||||
|
||||
// urls.csv old
|
||||
, cr->m_coll
|
||||
|
||||
// urls.csv new format v3
|
||||
, cr->m_coll
|
||||
|
||||
// latest objects in html
|
||||
@ -3623,7 +3631,6 @@ bool printCrawlBotPage2 ( TcpSocket *socket ,
|
||||
, cr->m_coll
|
||||
, rand64
|
||||
|
||||
|
||||
// latest products in html
|
||||
, cr->m_coll
|
||||
, rand64
|
||||
|
@ -3753,14 +3753,15 @@ bool printInlinkText ( SafeBuf *sb , Msg20Reply *mr , SearchInput *si ,
|
||||
if ( firstTime ) {
|
||||
sb->safePrintf("<font size=-1>");
|
||||
sb->safePrintf("<table border=1>"
|
||||
"<tr><td colspan=3>"
|
||||
"<tr><td colspan=10>"
|
||||
"<center>"
|
||||
"<b>Inlinks with Query Terms</b>"
|
||||
"</center>"
|
||||
"</td></tr>"
|
||||
"<tr>"
|
||||
"<td>Inlink Text</td>"
|
||||
"<td>From</td>"
|
||||
"<td>From Site</td>"
|
||||
"<td>Site IP</td>"
|
||||
"<td>Site Rank</td>"
|
||||
"</tr>"
|
||||
);
|
||||
@ -3780,7 +3781,13 @@ bool printInlinkText ( SafeBuf *sb , Msg20Reply *mr , SearchInput *si ,
|
||||
char *host = getHostFast(k->getUrl(),&hostLen,NULL);
|
||||
sb->safePrintf("</td><td>");
|
||||
if ( host ) sb->safeMemcpy(host,hostLen);
|
||||
sb->safePrintf("</td><td>%"INT32"</td></tr>",(int32_t)k->m_siteRank);
|
||||
sb->safePrintf("</td><td>");
|
||||
sb->safePrintf("<a href=/search?c=%s&q=ip%%3A%s"
|
||||
"+gbsortbyint%%3Agbsitenuminlinks&n=100>"
|
||||
,si->m_cr->m_coll,iptoa(k->m_ip));
|
||||
sb->safePrintf("%s</a>",iptoa(k->m_ip));
|
||||
sb->safePrintf("</td><td>%"INT32"</td></tr>"
|
||||
,(int32_t)k->m_siteRank);
|
||||
//sb->safePrintf("<br>");
|
||||
printedInlinkText = true;
|
||||
*numPrinted = *numPrinted + 1;
|
||||
@ -6212,8 +6219,8 @@ bool printPairScore ( SafeBuf *sb , SearchInput *si , PairScore *ps ,
|
||||
//int64_t tf1 = ps->m_termFreq1;//sz1 / 10;
|
||||
//int64_t tf2 = ps->m_termFreq2;//sz2 / 10;
|
||||
|
||||
QueryTerm *qt1 = &msg40->m_msg3a.m_q->m_qterms[qtn1];
|
||||
QueryTerm *qt2 = &msg40->m_msg3a.m_q->m_qterms[qtn2];
|
||||
QueryTerm *qt1 = &q->m_qterms[qtn1];
|
||||
QueryTerm *qt2 = &q->m_qterms[qtn2];
|
||||
|
||||
//int64_t tf1 = msg40->m_msg3a.m_termFreqs[qtn1];
|
||||
//int64_t tf2 = msg40->m_msg3a.m_termFreqs[qtn2];
|
||||
@ -6935,7 +6942,7 @@ bool printSingleScore ( SafeBuf *sb ,
|
||||
//int64_t tf = ss->m_termFreq;//ss->m_listSize;
|
||||
int32_t qtn = ss->m_qtermNum;
|
||||
//int64_t tf = msg40->m_msg3a.m_termFreqs[qtn];
|
||||
QueryTerm *qt = &msg40->m_msg3a.m_q->m_qterms[qtn];
|
||||
QueryTerm *qt = &q->m_qterms[qtn];
|
||||
int64_t tf = qt->m_termFreq;
|
||||
float tfw = ss->m_tfWeight;
|
||||
|
||||
|
@ -51,6 +51,9 @@ bool sendPageSockets ( TcpSocket *s , HttpRequest *r ) {
|
||||
printUdpTable(&p,"Udp Server (dns)", &g_dns.m_udpServer,
|
||||
coll,NULL,s->m_ip,true/*isDns?*/);
|
||||
|
||||
// from msg13.cpp print the queued url download requests
|
||||
printHammerQueueTable ( &p );
|
||||
|
||||
// get # of disks per machine
|
||||
int32_t count = 0;
|
||||
for ( int32_t i = 0 ; i < g_hostdb.getNumHosts(); i++ ) {
|
||||
@ -221,11 +224,7 @@ void printTcpTable ( SafeBuf* p, char *title, TcpServer *server ) {
|
||||
"<td>%s</td>" // ip
|
||||
"<td>%hu</td>" // port
|
||||
"<td>%s</td>" // state
|
||||
"<td>%"INT32"</td>" // bytes read
|
||||
"<td>%"INT32"</td>" // bytes to read
|
||||
"<td>%"INT32"</td>" // bytes sent
|
||||
"<td>%"INT32"</td>" // bytes to send
|
||||
"</tr>\n" ,
|
||||
,
|
||||
bg ,
|
||||
i,
|
||||
s->m_sd ,
|
||||
@ -234,11 +233,46 @@ void printTcpTable ( SafeBuf* p, char *title, TcpServer *server ) {
|
||||
//s->m_timeout ,
|
||||
iptoa(s->m_ip) ,
|
||||
s->m_port ,
|
||||
st ,
|
||||
s->m_readOffset ,
|
||||
st );
|
||||
|
||||
|
||||
// tool tip to show top 500 bytes of send buf
|
||||
if ( s->m_readOffset && s->m_readBuf ) {
|
||||
p->safePrintf("<td><a title=\"");
|
||||
SafeBuf tmp;
|
||||
tmp.safeTruncateEllipsis ( s->m_readBuf ,
|
||||
s->m_readOffset ,
|
||||
500 );
|
||||
p->htmlEncode ( tmp.getBufStart() );
|
||||
p->safePrintf("\">");
|
||||
p->safePrintf("<u>%"INT32"</u></td>",s->m_readOffset);
|
||||
}
|
||||
else
|
||||
p->safePrintf("<td>0</td>");
|
||||
|
||||
p->safePrintf( "<td>%"INT32"</td>" // bytes to read
|
||||
"<td>%"INT32"</td>" // bytes sent
|
||||
,
|
||||
s->m_totalToRead ,
|
||||
s->m_sendOffset ,
|
||||
s->m_totalToSend );
|
||||
s->m_sendOffset
|
||||
);
|
||||
|
||||
// tool tip to show top 500 bytes of send buf
|
||||
if ( s->m_totalToSend && s->m_sendBuf ) {
|
||||
p->safePrintf("<td><a title=\"");
|
||||
SafeBuf tmp;
|
||||
tmp.safeTruncateEllipsis ( s->m_sendBuf ,
|
||||
s->m_totalToSend ,
|
||||
500 );
|
||||
p->htmlEncode ( tmp.getBufStart() );
|
||||
p->safePrintf("\">");
|
||||
p->safePrintf("<u>%"INT32"</u></td>",s->m_totalToSend);
|
||||
}
|
||||
else
|
||||
p->safePrintf("<td>0</td>");
|
||||
|
||||
p->safePrintf("</tr>\n");
|
||||
|
||||
}
|
||||
// end the table
|
||||
p->safePrintf ("</table><br>\n" );
|
||||
@ -358,7 +392,7 @@ void printUdpTable ( SafeBuf *p, char *title, UdpServer *server ,
|
||||
"<center>"
|
||||
//"<font size=+1>"
|
||||
"<b>%s</b> (%"INT32" transactions)"
|
||||
"(%"INT32" reads ready)"
|
||||
"(%"INT32" requests waiting to processed)"
|
||||
//"</font>"
|
||||
"</td></tr>"
|
||||
"<tr bgcolor=#%s>"
|
||||
|
@ -175,9 +175,17 @@ bool sendPageStats ( TcpSocket *s , HttpRequest *r ) {
|
||||
"<tr class=poo><td><b>current allocations</b>"
|
||||
"</td>"
|
||||
"<td>%"INT32"</td></tr>\n"
|
||||
|
||||
|
||||
"<tr class=poo><td><b>max allocations</b>"
|
||||
"</td>"
|
||||
"<td>%"INT32"</td></tr>\n"
|
||||
|
||||
|
||||
"<tr class=poo><td><b>total allocations</b></td>"
|
||||
"<td>%"INT64"</td></tr>\n" ,
|
||||
g_mem.getNumAllocated() ,
|
||||
g_mem.m_memtablesize ,
|
||||
(int64_t)g_mem.getNumTotalAllocated() );
|
||||
|
||||
}
|
||||
|
192
Parms.cpp
192
Parms.cpp
@ -385,6 +385,13 @@ bool CommandAddColl ( char *rec , char customCrawl ) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// if ( ! g_parms.m_inSyncWithHost0 ) {
|
||||
// log("parms: can not add coll #%i %s until in sync with host 0",
|
||||
// (int)newCollnum,collName);
|
||||
// g_errno = EBADENGINEER;
|
||||
// return true;
|
||||
// }
|
||||
|
||||
// this saves it to disk! returns false and sets g_errno on error.
|
||||
if ( ! g_collectiondb.addNewColl ( collName,
|
||||
customCrawl ,
|
||||
@ -421,6 +428,14 @@ bool CommandResetProxyTable ( char *rec ) {
|
||||
// . returns false if would block
|
||||
bool CommandDeleteColl ( char *rec , WaitEntry *we ) {
|
||||
collnum_t collnum = getCollnumFromParmRec ( rec );
|
||||
|
||||
// if ( ! g_parms.m_inSyncWithHost0 ) {
|
||||
// log("parms: can not del collnum %i until in sync with host 0",
|
||||
// (int)collnum);
|
||||
// g_errno = EBADENGINEER;
|
||||
// return true;
|
||||
// }
|
||||
|
||||
// the delete might block because the tree is saving and we can't
|
||||
// remove our collnum recs from it while it is doing that
|
||||
if ( ! g_collectiondb.deleteRec2 ( collnum ) )
|
||||
@ -436,6 +451,14 @@ bool CommandDeleteColl2 ( char *rec , WaitEntry *we ) {
|
||||
char *data = rec + sizeof(key96_t) + 4;
|
||||
char *coll = (char *)data;
|
||||
collnum_t collnum = g_collectiondb.getCollnum ( coll );
|
||||
|
||||
// if ( ! g_parms.m_inSyncWithHost0 ) {
|
||||
// log("parms: can not del collnum %i until in sync with host 0",
|
||||
// (int)collnum);
|
||||
// g_errno = EBADENGINEER;
|
||||
// return true;
|
||||
// }
|
||||
|
||||
if ( collnum < 0 ) {
|
||||
g_errno = ENOCOLLREC;
|
||||
return true;;
|
||||
@ -671,44 +694,52 @@ bool CommandSpiderTestCont ( char *rec ) {
|
||||
|
||||
// some of these can block a little. if threads are off, a lot!
|
||||
bool CommandMerge ( char *rec ) {
|
||||
forceMergeAll ( RDB_POSDB ,1);
|
||||
forceMergeAll ( RDB_TITLEDB ,1);
|
||||
forceMergeAll ( RDB_TAGDB ,1);
|
||||
forceMergeAll ( RDB_SPIDERDB ,1);
|
||||
forceMergeAll ( RDB_LINKDB ,1);
|
||||
// most of these are probably already in good shape
|
||||
//g_checksumdb.getRdb()->attemptMerge (1,true);
|
||||
g_clusterdb.getRdb()->attemptMerge (1,true); // niceness, force?
|
||||
g_tagdb.getRdb()->attemptMerge (1,true);
|
||||
g_catdb.getRdb()->attemptMerge (1,true);
|
||||
//g_tfndb.getRdb()->attemptMerge (1,true);
|
||||
g_spiderdb.getRdb()->attemptMerge (1,true);
|
||||
// these 2 will probably need the merge the most
|
||||
g_indexdb.getRdb()->attemptMerge (1,true);
|
||||
g_datedb.getRdb()->attemptMerge (1,true);
|
||||
g_titledb.getRdb()->attemptMerge (1,true);
|
||||
//g_sectiondb.getRdb()->attemptMerge (1,true);
|
||||
g_statsdb.getRdb()->attemptMerge (1,true);
|
||||
g_linkdb .getRdb()->attemptMerge (1,true);
|
||||
// g_clusterdb.getRdb()->attemptMerge (1,true); // niceness, force?
|
||||
// g_tagdb.getRdb()->attemptMerge (1,true);
|
||||
// g_catdb.getRdb()->attemptMerge (1,true);
|
||||
// //g_tfndb.getRdb()->attemptMerge (1,true);
|
||||
// g_spiderdb.getRdb()->attemptMerge (1,true);
|
||||
// // these 2 will probably need the merge the most
|
||||
// g_indexdb.getRdb()->attemptMerge (1,true);
|
||||
// g_datedb.getRdb()->attemptMerge (1,true);
|
||||
// g_titledb.getRdb()->attemptMerge (1,true);
|
||||
// //g_sectiondb.getRdb()->attemptMerge (1,true);
|
||||
// g_statsdb.getRdb()->attemptMerge (1,true);
|
||||
// g_linkdb .getRdb()->attemptMerge (1,true);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool CommandMergePosdb ( char *rec ) {
|
||||
g_posdb.getRdb()->attemptMerge (1,true);
|
||||
forceMergeAll ( RDB_POSDB ,1);
|
||||
// set this for each posdb base
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool CommandMergeSectiondb ( char *rec ) {
|
||||
g_sectiondb.getRdb()->attemptMerge (1,true); // nice , force
|
||||
//g_sectiondb.getRdb()->attemptMerge (1,true); // nice , force
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool CommandMergeTitledb ( char *rec ) {
|
||||
g_titledb.getRdb()->attemptMerge (1,true);
|
||||
forceMergeAll ( RDB_TITLEDB ,1);
|
||||
//g_titledb.getRdb()->attemptMerge (1,true);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool CommandMergeSpiderdb ( char *rec ) {
|
||||
g_spiderdb.getRdb()->attemptMerge (1,true);
|
||||
forceMergeAll ( RDB_SPIDERDB ,1);
|
||||
//g_spiderdb.getRdb()->attemptMerge (1,true);
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -942,6 +973,7 @@ private:
|
||||
Parms::Parms ( ) {
|
||||
m_isDefaultLoaded = false;
|
||||
m_inSyncWithHost0 = false;
|
||||
m_triedToSync = false;
|
||||
}
|
||||
|
||||
void Parms::detachSafeBufs ( CollectionRec *cr ) {
|
||||
@ -3834,8 +3866,9 @@ bool Parms::saveToXml ( char *THIS , char *f , char objType ) {
|
||||
if ( g_conf.m_readOnlyMode ) return true;
|
||||
// print into buffer
|
||||
// "seeds" can be pretty big so go with safebuf now
|
||||
//char buf[MAX_CONF_SIZE];
|
||||
SafeBuf sb;
|
||||
// fix so if we core in malloc/free we can still save conf
|
||||
char tmpbuf[200000];
|
||||
SafeBuf sb(tmpbuf,200000);
|
||||
//char *p = buf;
|
||||
//char *pend = buf + MAX_CONF_SIZE;
|
||||
int32_t len ;
|
||||
@ -5082,18 +5115,6 @@ void Parms::init ( ) {
|
||||
m++;
|
||||
*/
|
||||
|
||||
m->m_title = "max mem";
|
||||
m->m_desc = "Mem available to this process. May be exceeded due "
|
||||
"to fragmentation.";
|
||||
m->m_off = (char *)&g_conf.m_maxMem - g;
|
||||
m->m_def = "8000000000";
|
||||
m->m_cgi = "maxmem";
|
||||
m->m_obj = OBJ_CONF;
|
||||
m->m_page = PAGE_NONE;
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
m->m_flags = PF_NOAPI;
|
||||
m++;
|
||||
|
||||
/*
|
||||
m->m_title = "indexdb split";
|
||||
m->m_desc = "Number of times to split indexdb across groups. "
|
||||
@ -9918,6 +9939,18 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
|
||||
m->m_title = "max mem";
|
||||
m->m_desc = "Mem available to this process. May be exceeded due "
|
||||
"to fragmentation.";
|
||||
m->m_cgi = "maxmem";
|
||||
m->m_off = (char *)&g_conf.m_maxMem - g;
|
||||
m->m_def = "8000000000";
|
||||
m->m_obj = OBJ_CONF;
|
||||
m->m_page = PAGE_MASTER; // PAGE_NONE;
|
||||
m->m_type = TYPE_LONG_LONG;
|
||||
//m->m_flags = PF_NOAPI;
|
||||
m++;
|
||||
|
||||
|
||||
m->m_title = "max total spiders";
|
||||
m->m_desc = "What is the maximum number of web "
|
||||
@ -12401,15 +12434,15 @@ void Parms::init ( ) {
|
||||
m->m_group = 0;
|
||||
m++;
|
||||
|
||||
m->m_title = "do synchronous writes";
|
||||
m->m_title = "flush disk writes";
|
||||
m->m_desc = "If enabled then all writes will be flushed to disk. "
|
||||
"This is generally a good thing.";
|
||||
"If not enabled, then gb uses the Linux disk write cache.";
|
||||
m->m_cgi = "fw";
|
||||
m->m_off = (char *)&g_conf.m_flushWrites - g;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_group = 0;
|
||||
m->m_flags = PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_flags = 0;//PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_MASTER;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m->m_group = 0;
|
||||
@ -16094,6 +16127,7 @@ void Parms::init ( ) {
|
||||
|
||||
m->m_title = "home page";
|
||||
static SafeBuf s_tmpBuf;
|
||||
s_tmpBuf.setLabel("stmpb1");
|
||||
s_tmpBuf.safePrintf (
|
||||
"Html to display for the home page. "
|
||||
"Leave empty for default home page. "
|
||||
@ -16170,6 +16204,7 @@ void Parms::init ( ) {
|
||||
|
||||
m->m_title = "html head";
|
||||
static SafeBuf s_tmpBuf2;
|
||||
s_tmpBuf2.setLabel("stmpb2");
|
||||
s_tmpBuf2.safePrintf("Html to display before the search results. ");
|
||||
char *fff = "Leave empty for default. "
|
||||
"Convenient "
|
||||
@ -16280,6 +16315,7 @@ void Parms::init ( ) {
|
||||
|
||||
m->m_title = "html tail";
|
||||
static SafeBuf s_tmpBuf3;
|
||||
s_tmpBuf3.setLabel("stmpb3");
|
||||
s_tmpBuf3.safePrintf("Html to display after the search results. ");
|
||||
s_tmpBuf3.safeStrcpy(fff);
|
||||
s_tmpBuf3.htmlEncode (
|
||||
@ -17279,6 +17315,21 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "compute site num inlinks";
|
||||
m->m_desc = "If this is true Gigablast will "
|
||||
"compute the number of site inlinks for the sites it "
|
||||
"indexes. It will cache them in tagdb for some time. "
|
||||
"The greater the number of inlinks, the longer the cached "
|
||||
"time, because the site is considered more stable.";
|
||||
m->m_cgi = "csni";
|
||||
m->m_off = (char *)&cr.m_computeSiteNumInlinks - x;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "1";
|
||||
m->m_flags = PF_CLONE|PF_API;//PF_HIDDEN | PF_NOSAVE;
|
||||
m->m_page = PAGE_SPIDER;
|
||||
m->m_obj = OBJ_COLL;
|
||||
m++;
|
||||
|
||||
m->m_title = "do link spam checking";
|
||||
m->m_desc = "If this is true, do not allow spammy inlinks to vote. "
|
||||
"This check is "
|
||||
@ -19358,6 +19409,16 @@ void Parms::init ( ) {
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
|
||||
m->m_title = "log debug disk page cache";
|
||||
m->m_cgi = "ldpc";
|
||||
m->m_off = (char *)&g_conf.m_logDebugDiskPageCache - g;
|
||||
m->m_type = TYPE_BOOL;
|
||||
m->m_def = "0";
|
||||
m->m_priv = 1;
|
||||
m->m_page = PAGE_LOG;
|
||||
m->m_obj = OBJ_CONF;
|
||||
m++;
|
||||
|
||||
m->m_title = "log debug dns messages";
|
||||
m->m_cgi = "lddns";
|
||||
m->m_off = (char *)&g_conf.m_logDebugDns - g;
|
||||
@ -20547,7 +20608,17 @@ bool Parms::addCurrentParmToList2 ( SafeBuf *parmList ,
|
||||
|
||||
//int32_t occNum = -1;
|
||||
key96_t key = makeParmKey ( collnum , m , occNum );
|
||||
|
||||
/*
|
||||
// debug it
|
||||
log("parms: adding parm collnum=%i title=%s "
|
||||
"key=%s datasize=%i data=%s hash=%"UINT32
|
||||
,(int)collnum
|
||||
,m->m_title
|
||||
,KEYSTR(&key,sizeof(key))
|
||||
,(int)dataSize
|
||||
,data
|
||||
,(uint32_t)hash32(data,dataSize));
|
||||
*/
|
||||
// then key
|
||||
if ( ! parmList->safeMemcpy ( &key , sizeof(key) ) )
|
||||
return false;
|
||||
@ -21684,20 +21755,37 @@ void handleRequest3f ( UdpSlot *slot , int32_t niceness ) {
|
||||
// have with ETRYAGAIN in Msg4.cpp
|
||||
|
||||
|
||||
void tryToSyncWrapper ( int fd , void *state ) {
|
||||
g_parms.syncParmsWithHost0();
|
||||
}
|
||||
|
||||
// host #0 just sends back an empty reply, but it will hit us with
|
||||
// 0x3f parmlist requests. that way it uses the same mechanism and can
|
||||
// guarantee ordering of the parm update requests
|
||||
void gotReplyFromHost0Wrapper ( void *state , UdpSlot *slot ) {
|
||||
// ignore his reply unless error?
|
||||
if ( g_errno )
|
||||
log("parms: got error syncing with host 0: %s",
|
||||
if ( g_errno ) {
|
||||
log("parms: got error syncing with host 0: %s. Retrying.",
|
||||
mstrerror(g_errno));
|
||||
// re-try it!
|
||||
g_parms.m_triedToSync = false;
|
||||
}
|
||||
else {
|
||||
log("parms: synced with host #0");
|
||||
// do not re-call
|
||||
g_loop.unregisterSleepCallback(NULL,tryToSyncWrapper);
|
||||
}
|
||||
|
||||
g_errno = 0;
|
||||
}
|
||||
|
||||
|
||||
// returns false and sets g_errno on error, true otherwise
|
||||
bool Parms::syncParmsWithHost0 ( ) {
|
||||
|
||||
if ( m_triedToSync ) return true;
|
||||
|
||||
m_triedToSync = true;
|
||||
|
||||
m_inSyncWithHost0 = false;
|
||||
|
||||
// dont sync with ourselves
|
||||
@ -21730,6 +21818,8 @@ bool Parms::syncParmsWithHost0 ( ) {
|
||||
|
||||
Host *h = g_hostdb.getHost(0);
|
||||
|
||||
log("parms: trying to sync with host #0");
|
||||
|
||||
// . send it off. use 3e i guess
|
||||
// . host #0 will reply using msg4 really
|
||||
// . msg4 guarantees ordering of requests
|
||||
@ -21798,6 +21888,9 @@ void handleRequest3e ( UdpSlot *slot , int32_t niceness ) {
|
||||
// get collnum
|
||||
collnum_t c = *(collnum_t *)p;
|
||||
p += sizeof(collnum_t);
|
||||
// then coll NAME hash
|
||||
uint32_t collNameHash32 = *(int32_t *)p;
|
||||
p += 4;
|
||||
// sanity check. -1 means g_conf. i guess.
|
||||
if ( c < -1 ) { char *xx=NULL;*xx=0; }
|
||||
// and parm hash
|
||||
@ -21807,6 +21900,14 @@ void handleRequest3e ( UdpSlot *slot , int32_t niceness ) {
|
||||
// him to delete it!
|
||||
CollectionRec *cr = NULL;
|
||||
if ( c >= 0 ) cr = g_collectiondb.getRec ( c );
|
||||
|
||||
// if collection names are different delete it
|
||||
if ( cr && collNameHash32 != hash32n ( cr->m_coll ) ) {
|
||||
log("sync: host had collnum %i but wrong name, "
|
||||
"name not %s like it should be",(int)c,cr->m_coll);
|
||||
cr = NULL;
|
||||
}
|
||||
|
||||
if ( c >= 0 && ! cr ) {
|
||||
// note in log
|
||||
logf(LOG_INFO,"sync: telling host #%"INT32" to delete "
|
||||
@ -21854,7 +21955,8 @@ void handleRequest3e ( UdpSlot *slot , int32_t niceness ) {
|
||||
if ( cr->m_isCustomCrawl == 2 ) cmdStr = "addBulk";
|
||||
// note in log
|
||||
logf(LOG_INFO,"sync: telling host #%"INT32" to add "
|
||||
"collnum %"INT32"", hostId,(int32_t)cr->m_collnum);
|
||||
"collnum %"INT32" coll=%s", hostId,(int32_t)cr->m_collnum,
|
||||
cr->m_coll);
|
||||
// add the parm rec as a parm cmd
|
||||
if ( ! g_parms.addNewParmToList1 ( &replyBuf,
|
||||
(collnum_t)i,
|
||||
@ -21905,17 +22007,25 @@ bool Parms::makeSyncHashList ( SafeBuf *hashList ) {
|
||||
|
||||
// first do g_conf, collnum -1!
|
||||
for ( int32_t i = -1 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
||||
// shortcut
|
||||
CollectionRec *cr = NULL;
|
||||
if ( i >= 0 ) cr = g_collectiondb.m_recs[i];
|
||||
// skip if empty
|
||||
if ( i >=0 && ! g_collectiondb.m_recs[i] ) continue;
|
||||
if ( i >=0 && ! cr ) continue;
|
||||
// clear since last time
|
||||
tmp.reset();
|
||||
// g_conf?
|
||||
// g_conf? if i is -1 do g_conf
|
||||
if ( ! addAllParmsToList ( &tmp , i ) )
|
||||
return false;
|
||||
// store collnum first as 4 bytes
|
||||
if ( ! hashList->safeMemcpy ( &i , sizeof(collnum_t) ) )
|
||||
return false;
|
||||
// hash that shit
|
||||
// then store the collection name hash, 32 bit hash
|
||||
uint32_t collNameHash32 = 0;
|
||||
if ( cr ) collNameHash32 = hash32n ( cr->m_coll );
|
||||
if ( ! hashList->safeMemcpy ( &collNameHash32, 4 ) )
|
||||
return false;
|
||||
// hash the parms
|
||||
int64_t h64 = hash64 ( tmp.getBufStart(),tmp.length() );
|
||||
// and store it
|
||||
if ( ! hashList->pushLongLong ( h64 ) )
|
||||
|
1
Parms.h
1
Parms.h
@ -516,6 +516,7 @@ class Parms {
|
||||
//
|
||||
|
||||
bool m_inSyncWithHost0;
|
||||
bool m_triedToSync;
|
||||
|
||||
bool m_isDefaultLoaded;
|
||||
|
||||
|
@ -3221,7 +3221,8 @@ void doneSendingNotifyEmailWrapper ( void *state ) {
|
||||
EmailInfo *ei = (EmailInfo *)state;
|
||||
ei->m_notifyBlocked--;
|
||||
// error?
|
||||
log("build: email notification status: %s",mstrerror(g_errno));
|
||||
log("build: email notification status (count=%i) (ei=0x%"PTRFMT"): %s",
|
||||
(int)ei->m_notifyBlocked,(PTRTYPE)ei,mstrerror(g_errno));
|
||||
// ignore it for rest
|
||||
g_errno = 0;
|
||||
// wait for post url to get done
|
||||
@ -3236,7 +3237,8 @@ void doneGettingNotifyUrlWrapper ( void *state , TcpSocket *sock ) {
|
||||
EmailInfo *ei = (EmailInfo *)state;
|
||||
ei->m_notifyBlocked--;
|
||||
// error?
|
||||
log("build: url notification status: %s",mstrerror(g_errno));
|
||||
log("build: url notification status (count=%i) (ei=0x%"PTRFMT"): %s",
|
||||
(int)ei->m_notifyBlocked,(PTRTYPE)ei,mstrerror(g_errno));
|
||||
// wait for email to get done
|
||||
if ( ei->m_notifyBlocked > 0 ) return;
|
||||
// unmark it
|
||||
@ -3253,6 +3255,10 @@ void doneGettingNotifyUrlWrapper ( void *state , TcpSocket *sock ) {
|
||||
// or maxToProcess limitation.
|
||||
bool sendNotification ( EmailInfo *ei ) {
|
||||
|
||||
// disable for now
|
||||
//log("ping: NOT SENDING NOTIFICATION -- DEBUG!!");
|
||||
//return true;
|
||||
|
||||
if ( ei->m_inUse ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// caller must set this, as well as m_finalCallback/m_finalState
|
||||
|
@ -148,9 +148,7 @@ bool Posdb::init ( ) {
|
||||
if ( ! m_pc.init ( "posdb",
|
||||
RDB_POSDB,
|
||||
pcmem ,
|
||||
pageSize ,
|
||||
true , // use RAM disk?
|
||||
false )) // minimize disk seeks?
|
||||
pageSize ))
|
||||
return log("db: Posdb init failed.");
|
||||
|
||||
// . set our own internal rdb
|
||||
|
@ -53,6 +53,7 @@
|
||||
// normally in seo.cpp, but here so it compiles
|
||||
SafeBuf g_qbuf;
|
||||
int32_t g_qbufNeedSave = 0;
|
||||
bool g_inAutoSave;
|
||||
|
||||
// for resetAll()
|
||||
//#include "Msg6.h"
|
||||
@ -467,6 +468,7 @@ Process::Process ( ) {
|
||||
}
|
||||
|
||||
bool Process::init ( ) {
|
||||
g_inAutoSave = false;
|
||||
// -1 means unknown
|
||||
m_diskUsage = -1.0;
|
||||
m_diskAvail = -1LL;
|
||||
@ -1331,7 +1333,9 @@ void processSleepWrapper ( int fd , void *state ) {
|
||||
g_process.m_lastSaveTime = nextLastSaveTime;//now;
|
||||
// save everything
|
||||
logf(LOG_INFO,"db: Autosaving.");
|
||||
g_inAutoSave = 1;
|
||||
g_process.save();
|
||||
g_inAutoSave = 0;
|
||||
}
|
||||
|
||||
bool Process::save ( ) {
|
||||
@ -1874,9 +1878,10 @@ bool Process::saveBlockingFiles1 ( ) {
|
||||
if ( g_hostdb.m_myHost && g_hostdb.m_myHost->m_isProxy )
|
||||
g_proxy.saveUserBufs();
|
||||
|
||||
// save the Conf file now
|
||||
// save the gb.conf file now
|
||||
g_conf.save();
|
||||
// save the conf files
|
||||
// if autosave and we have over 20 colls, just make host #0 do it
|
||||
g_collectiondb.save();
|
||||
// . save repair state
|
||||
// . this is repeated above too
|
||||
|
41
Query.cpp
41
Query.cpp
@ -618,7 +618,7 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
|
||||
qw->m_userTypePhrase == 'a' ) continue;
|
||||
nqt++;
|
||||
}
|
||||
// count phrase terms too!!!
|
||||
// count single terms
|
||||
for ( int32_t i = 0 ; i < m_numWords; i++ ) {
|
||||
QueryWord *qw = &m_qwords[i];
|
||||
if ( qw->m_ignoreWord &&
|
||||
@ -705,7 +705,7 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
|
||||
}
|
||||
|
||||
|
||||
//char u8Buf[256];
|
||||
// count phrase terms
|
||||
for ( int32_t i = 0 ; i < m_numWords ; i++ ) {
|
||||
// break out if no more explicit bits!
|
||||
/*
|
||||
@ -1019,6 +1019,13 @@ bool Query::setQTerms ( Words &words , Phrases &phrases ) {
|
||||
if (fieldLen > 0) {
|
||||
qt->m_term = m_qwords[fieldStart].m_word;
|
||||
qt->m_termLen = fieldLen;
|
||||
// fix for query
|
||||
// text:"" foo bar ""
|
||||
if ( pw-1 < i ) {
|
||||
log("query: bad query %s",m_orig);
|
||||
g_errno = EMALFORMEDQUERY;
|
||||
return false;
|
||||
}
|
||||
// skip past the end of the field value
|
||||
i = pw-1;
|
||||
}
|
||||
@ -2702,6 +2709,19 @@ bool Query::setQWords ( char boolFlag ,
|
||||
for ( ; s < send && *s != '-' ; s++ );
|
||||
// stop if not hyphen
|
||||
if ( *s != '-' ) break;
|
||||
|
||||
// If the first character is a hyphen, check
|
||||
// if its part of a negative number. If it is,
|
||||
// don't consider it a hyphen
|
||||
if ( sav == s && is_digit(s[1]) ) {
|
||||
// Read the entire negative number
|
||||
char *s2 = s + 1;
|
||||
for ( ; s2 < send && is_digit(s2[0]); s2++);
|
||||
// If there's a hyphen after the negative
|
||||
// number, use that as the hyphen separator
|
||||
if ( *s2 == '-' ) s = s2;
|
||||
}
|
||||
|
||||
// skip hyphen
|
||||
s++;
|
||||
// must be a digit or . or - or *
|
||||
@ -2746,6 +2766,23 @@ bool Query::setQWords ( char boolFlag ,
|
||||
for ( ; s < send && *s != '-' ; s++ );
|
||||
// stop if not hyphen
|
||||
if ( *s != '-' ) break;
|
||||
|
||||
// If the first character is a hyphen, check
|
||||
// if its part of a negative number. If it is,
|
||||
// don't consider it a hyphen
|
||||
if ( sav == s && (is_digit(s[1]) ||
|
||||
(s[1] == '.' &&
|
||||
s + 2 < send &&
|
||||
is_digit(s[2]))) ) {
|
||||
// Read the entire negative number
|
||||
char *s2 = s + 1;
|
||||
for ( ; s2 < send &&
|
||||
(is_digit(s2[0]) || s2[0] == '.'); s2++);
|
||||
// If there's a hyphen after the negative
|
||||
// number, use that as the hyphen separator
|
||||
if ( *s2 == '-' ) s = s2;
|
||||
}
|
||||
|
||||
// save that
|
||||
char *cma = s;
|
||||
// skip hyphen
|
||||
|
265
Rdb.cpp
265
Rdb.cpp
@ -1267,6 +1267,7 @@ bool Rdb::dumpTree ( int32_t niceness ) {
|
||||
if ( m_isTitledb && max > 240 ) max = 240;
|
||||
// . keep the number of files down
|
||||
// . dont dump all the way up to the max, leave one open for merging
|
||||
/*
|
||||
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
|
||||
CollectionRec *cr = g_collectiondb.m_recs[i];
|
||||
if ( ! cr ) continue;
|
||||
@ -1279,7 +1280,7 @@ bool Rdb::dumpTree ( int32_t niceness ) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
*/
|
||||
// . wait for all unlinking and renaming activity to flush out
|
||||
// . we do not want to dump to a filename in the middle of being
|
||||
// unlinked
|
||||
@ -1450,7 +1451,6 @@ bool Rdb::dumpCollLoop ( ) {
|
||||
loop:
|
||||
// if no more, we're done...
|
||||
if ( m_dumpCollnum >= getNumBases() ) return true;
|
||||
|
||||
// the only was g_errno can be set here is from a previous dump
|
||||
// error?
|
||||
if ( g_errno ) {
|
||||
@ -1548,7 +1548,7 @@ bool Rdb::dumpCollLoop ( ) {
|
||||
m_dbname,mstrerror(g_errno) );
|
||||
|
||||
log(LOG_INFO,"build: Dumping to %s/%s for coll \"%s\".",
|
||||
base->m_files[m_fn]->m_dir,
|
||||
base->m_files[m_fn]->getDir(),
|
||||
base->m_files[m_fn]->getFilename() ,
|
||||
g_collectiondb.getCollName ( m_dumpCollnum ) );
|
||||
// . append it to "sync" state we have in memory
|
||||
@ -1667,8 +1667,51 @@ bool Rdb::dumpCollLoop ( ) {
|
||||
goto loop;
|
||||
}
|
||||
|
||||
static CollectionRec *s_mergeHead = NULL;
|
||||
static CollectionRec *s_mergeTail = NULL;
|
||||
static bool s_needsBuild = true;
|
||||
|
||||
void addCollnumToLinkedListOfMergeCandidates ( collnum_t dumpCollnum ) {
|
||||
// add this collection to the linked list of merge candidates
|
||||
CollectionRec *cr = g_collectiondb.getRec ( dumpCollnum );
|
||||
if ( ! cr ) return;
|
||||
// do not double add it, if already there just return
|
||||
if ( cr->m_nextLink ) return;
|
||||
if ( cr->m_prevLink ) return;
|
||||
if ( s_mergeTail && cr ) {
|
||||
s_mergeTail->m_nextLink = cr;
|
||||
cr ->m_nextLink = NULL;
|
||||
cr ->m_prevLink = s_mergeTail;
|
||||
s_mergeTail = cr;
|
||||
}
|
||||
else if ( cr ) {
|
||||
cr->m_prevLink = NULL;
|
||||
cr->m_nextLink = NULL;
|
||||
s_mergeHead = cr;
|
||||
s_mergeTail = cr;
|
||||
}
|
||||
}
|
||||
|
||||
// this is also called in Collectiondb::deleteRec2()
|
||||
void removeFromMergeLinkedList ( CollectionRec *cr ) {
|
||||
CollectionRec *prev = cr->m_prevLink;
|
||||
CollectionRec *next = cr->m_nextLink;
|
||||
cr->m_prevLink = NULL;
|
||||
cr->m_nextLink = NULL;
|
||||
if ( prev ) prev->m_nextLink = next;
|
||||
if ( next ) next->m_prevLink = prev;
|
||||
if ( s_mergeTail == cr ) s_mergeTail = prev;
|
||||
if ( s_mergeHead == cr ) s_mergeHead = next;
|
||||
}
|
||||
|
||||
void doneDumpingCollWrapper ( void *state ) {
|
||||
Rdb *THIS = (Rdb *)state;
|
||||
|
||||
// we just finished dumping to a file,
|
||||
// so allow it to try to merge again.
|
||||
//RdbBase *base = THIS->getBase(THIS->m_dumpCollnum);
|
||||
//if ( base ) base->m_checkedForMerge = false;
|
||||
|
||||
// return if the loop blocked
|
||||
if ( ! THIS->dumpCollLoop() ) return;
|
||||
// otherwise, call big wrapper
|
||||
@ -1717,66 +1760,173 @@ void Rdb::doneDumping ( ) {
|
||||
attemptMergeAll(0,NULL);
|
||||
}
|
||||
|
||||
void forceMergeAll ( char rdbId , char niceness ) {
|
||||
// set flag on all RdbBases
|
||||
for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
||||
// we need this quickpoll for when we got 20,000+ collections
|
||||
QUICKPOLL ( niceness );
|
||||
CollectionRec *cr = g_collectiondb.m_recs[i];
|
||||
if ( ! cr ) continue;
|
||||
RdbBase *base = cr->getBase ( rdbId );
|
||||
if ( ! base ) continue;
|
||||
base->m_nextMergeForced = true;
|
||||
}
|
||||
// rebuild the linked list
|
||||
s_needsBuild = true;
|
||||
// and try to merge now
|
||||
attemptMergeAll2 ();
|
||||
}
|
||||
|
||||
// this should be called every few seconds by the sleep callback, too
|
||||
void attemptMergeAll ( int fd , void *state ) {
|
||||
|
||||
if ( state && g_conf.m_logDebugDb ) state = NULL;
|
||||
//g_checksumdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_linkdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_sectiondb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_indexdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_posdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_datedb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_titledb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_tfndb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_tagdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_catdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_clusterdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_statsdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_syncdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_placedb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_doledb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_revdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_spiderdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_cachedb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_serpdb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_monitordb.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
// if we got a rebuild going on
|
||||
g_spiderdb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_checksumdb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_indexdb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_posdb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_datedb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_sectiondb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_titledb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_tfndb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_tagdb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_catdb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_clusterdb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_statsdb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
g_linkdb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_placedb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
//g_revdb2.getRdb()->attemptMerge ( 1 , false , !state);
|
||||
attemptMergeAll2 ( );
|
||||
}
|
||||
|
||||
// called by main.cpp
|
||||
void Rdb::attemptMerge ( int32_t niceness , bool forced , bool doLog ) {
|
||||
// . TODO: if rdbbase::attemptMerge() needs to launch a merge but can't
|
||||
// then do NOT remove from linked list. maybe set a flag like 'needsMerge'
|
||||
void attemptMergeAll2 ( ) {
|
||||
|
||||
for ( int32_t i = 0 ; i < getNumBases() ; i++ ) {
|
||||
// wait for any current merge to stop!
|
||||
if ( g_merge.isMerging() ) return;
|
||||
|
||||
CollectionRec *cr = g_collectiondb.m_recs[i];
|
||||
if ( ! cr ) continue;
|
||||
// if swapped out, this will be NULL, so skip it
|
||||
RdbBase *base = cr->getBasePtr(m_rdbId);
|
||||
//RdbBase *base = getBase(i);
|
||||
if ( ! base ) continue;
|
||||
base->attemptMerge(niceness,forced,doLog);
|
||||
// stop if we got unlink/rename threads out from a merge
|
||||
// in RdbBase.cpp beause the merge can't go until this is 0
|
||||
// lest we have 2000 collections all trying to merge tagdb
|
||||
// at the same time!!!! this happened once...
|
||||
if ( g_numThreads > 0 ) break;
|
||||
int32_t niceness = MAX_NICENESS;
|
||||
collnum_t s_lastCollnum = 0;
|
||||
int32_t count = 0;
|
||||
|
||||
tryLoop:
|
||||
|
||||
// if a collection got deleted, reset this to 0
|
||||
if ( s_lastCollnum >= g_collectiondb.m_numRecs )
|
||||
s_lastCollnum = 0;
|
||||
|
||||
// limit to 1000 checks to save the cpu since we call this once
|
||||
// every 2 seconds.
|
||||
if ( ++count >= 1000 ) return;
|
||||
|
||||
CollectionRec *cr = g_collectiondb.m_recs[s_lastCollnum];
|
||||
if ( ! cr ) goto tryLoop;
|
||||
|
||||
bool force = false;
|
||||
RdbBase *base ;
|
||||
// args = niceness, forceMergeAll, doLog, minToMergeOverride
|
||||
// if RdbBase::attemptMerge() returns true that means it
|
||||
// launched a merge and it will call attemptMergeAll2() when
|
||||
// the merge completes.
|
||||
base = cr->getBasePtr(RDB_POSDB);
|
||||
if ( base && base->attemptMerge(niceness,force,true) )
|
||||
return;
|
||||
base = cr->getBasePtr(RDB_TITLEDB);
|
||||
if ( base && base->attemptMerge(niceness,force,true) )
|
||||
return;
|
||||
base = cr->getBasePtr(RDB_TAGDB);
|
||||
if ( base && base->attemptMerge(niceness,force,true) )
|
||||
return;
|
||||
base = cr->getBasePtr(RDB_LINKDB);
|
||||
if ( base && base->attemptMerge(niceness,force,true) )
|
||||
return;
|
||||
base = cr->getBasePtr(RDB_SPIDERDB);
|
||||
if ( base && base->attemptMerge(niceness,force,true) )
|
||||
return;
|
||||
|
||||
// try next collection
|
||||
s_lastCollnum++;
|
||||
|
||||
goto tryLoop;
|
||||
|
||||
/*
|
||||
|
||||
MDW: linked list approach is too prone to error. just try to
|
||||
merge 1000 collection recs in a call and keep a cursor.
|
||||
|
||||
CollectionRec *last = NULL;
|
||||
CollectionRec *cr;
|
||||
|
||||
rebuild:
|
||||
|
||||
//
|
||||
// . if the first time then build the linked list
|
||||
// . or if we set s_needsBuild to false, like above, re-build it
|
||||
//
|
||||
if ( s_needsBuild ) {
|
||||
s_mergeHead = NULL;
|
||||
s_mergeTail = NULL;
|
||||
}
|
||||
for ( int32_t i=0 ; s_needsBuild && i<g_collectiondb.m_numRecs ; i++) {
|
||||
// we need this quickpoll for when we got 20,000+ collections
|
||||
QUICKPOLL ( niceness );
|
||||
cr = g_collectiondb.getRec(i);//m_recs[i];
|
||||
if ( ! cr ) continue;
|
||||
// add it
|
||||
if ( ! s_mergeHead ) s_mergeHead = cr;
|
||||
if ( last ) last->m_nextLink = cr;
|
||||
cr->m_prevLink = last;
|
||||
cr->m_nextLink = NULL;
|
||||
s_mergeTail = cr;
|
||||
last = cr;
|
||||
}
|
||||
s_needsBuild = false;
|
||||
|
||||
bool force = false;
|
||||
|
||||
// . just scan the linked list that we now maintain
|
||||
// . if a collection is deleted then we remove it from this list too!
|
||||
cr = s_mergeHead;
|
||||
while ( cr ) {
|
||||
QUICKPOLL(niceness);
|
||||
// this is a requirement in RdbBase::attemptMerge() so check
|
||||
// for it here so we can bail out early
|
||||
if ( g_numThreads > 0 ) break;
|
||||
// sanity
|
||||
CollectionRec *vr = g_collectiondb.getRec(cr->m_collnum);
|
||||
if ( vr != cr ) {
|
||||
log("rdb: attemptmergeall: bad collnum %i. how "
|
||||
"did this happen?",
|
||||
(int)cr->m_collnum);
|
||||
s_needsBuild = true;
|
||||
goto rebuild;
|
||||
}
|
||||
// pre advance
|
||||
CollectionRec *next = cr->m_nextLink;
|
||||
// try to merge the next guy in line, in the linked list
|
||||
RdbBase *base ;
|
||||
base = cr->getBasePtr(RDB_POSDB);
|
||||
// args = niceness, forceMergeAll, doLog, minToMergeOverride
|
||||
// if RdbBase::attemptMerge() returns true that means it
|
||||
// launched a merge and it will call attemptMergeAll2() when
|
||||
// the merge completes.
|
||||
if ( base && base->attemptMerge(niceness,force,true) )
|
||||
return;
|
||||
base = cr->getBasePtr(RDB_TITLEDB);
|
||||
if ( base && base->attemptMerge(niceness,force,true) )
|
||||
return;
|
||||
base = cr->getBasePtr(RDB_TAGDB);
|
||||
if ( base && base->attemptMerge(niceness,force,true) )
|
||||
return;
|
||||
base = cr->getBasePtr(RDB_LINKDB);
|
||||
if ( base && base->attemptMerge(niceness,force,true) )
|
||||
return;
|
||||
base = cr->getBasePtr(RDB_SPIDERDB);
|
||||
if ( base && base->attemptMerge(niceness,force,true) )
|
||||
return;
|
||||
// hey, why was it in the list? remove it. we also remove
|
||||
// guys if the collection gets deleted in Collectiondb.cpp,
|
||||
// so this is a function.
|
||||
removeFromMergeLinkedList ( cr );
|
||||
cr = next;
|
||||
}
|
||||
|
||||
// every 60 seconds try to merge collectionless rdbs
|
||||
static int32_t s_count = 0;
|
||||
if ( ++s_count == 30 ) {
|
||||
s_count = 0;
|
||||
// try to merge collectionless rdbs like statsdb/catdb
|
||||
// RdbBase *base1 = g_catdb.getRdb()->getBase(0);
|
||||
// if ( base1 ) base1->attemptMerge(niceness,force,true);
|
||||
// RdbBase *base2 = g_statsdb.getRdb()->getBase(0);
|
||||
// if ( base2 ) base2->attemptMerge(niceness,force,true);
|
||||
}
|
||||
*/
|
||||
}
|
||||
|
||||
// . return false and set g_errno on error
|
||||
@ -3152,7 +3302,10 @@ RdbBase *getRdbBase ( uint8_t rdbId , char *coll ) {
|
||||
collnum = (collnum_t) 0;
|
||||
else
|
||||
collnum = g_collectiondb.getCollnum ( coll );
|
||||
if(collnum == -1) return NULL;
|
||||
if(collnum == -1) {
|
||||
g_errno = ENOCOLLREC;
|
||||
return NULL;
|
||||
}
|
||||
//return rdb->m_bases [ collnum ];
|
||||
return rdb->getBase(collnum);
|
||||
}
|
||||
|
9
Rdb.h
9
Rdb.h
@ -15,6 +15,9 @@
|
||||
|
||||
bool makeTrashDir() ;
|
||||
|
||||
void removeFromMergeLinkedList ( class CollectionRec *cr ) ;
|
||||
void addCollnumToLinkedListOfMergeCandidates ( collnum_t dumpCollnum ) ;
|
||||
|
||||
// . each Rdb instance has an ID
|
||||
// . these ids are also return values for getIdFromRdb()
|
||||
#define RDB_START 1
|
||||
@ -78,8 +81,10 @@ char *getDbnameFromId ( uint8_t rdbId ) ;
|
||||
char getKeySizeFromRdbId ( uint8_t rdbId );
|
||||
// and this is -1 if dataSize is variable
|
||||
int32_t getDataSizeFromRdbId ( uint8_t rdbId );
|
||||
void forceMergeAll ( char rdbId , char niceness ) ;
|
||||
// main.cpp calls this
|
||||
void attemptMergeAll ( int fd , void *state ) ;
|
||||
void attemptMergeAll2 ( );
|
||||
|
||||
class Rdb {
|
||||
|
||||
@ -277,8 +282,8 @@ class Rdb {
|
||||
|
||||
// private:
|
||||
|
||||
void attemptMerge ( int32_t niceness , bool forceMergeAll ,
|
||||
bool doLog = true );
|
||||
//void attemptMerge ( int32_t niceness , bool forceMergeAll ,
|
||||
// bool doLog = true );
|
||||
|
||||
bool gotTokenForDump ( ) ;
|
||||
//void gotTokenForMerge ( ) ;
|
||||
|
228
RdbBase.cpp
228
RdbBase.cpp
@ -1,7 +1,7 @@
|
||||
#include "gb-include.h"
|
||||
|
||||
#include "Rdb.h"
|
||||
#include "Msg35.h"
|
||||
//#include "Msg35.h"
|
||||
//#include "Tfndb.h"
|
||||
//#include "Checksumdb.h"
|
||||
#include "Clusterdb.h"
|
||||
@ -99,6 +99,7 @@ void RdbBase::reset ( ) {
|
||||
m_hasMergeFile = false;
|
||||
m_isUnlinking = false;
|
||||
m_numThreads = 0;
|
||||
m_checkedForMerge = false;
|
||||
}
|
||||
|
||||
RdbBase::~RdbBase ( ) {
|
||||
@ -340,6 +341,11 @@ bool RdbBase::init ( char *dir ,
|
||||
// load any saved tree
|
||||
//if ( ! loadTree ( ) ) return false;
|
||||
|
||||
// now diskpagecache is much simpler, just basically rdbcache...
|
||||
return true;
|
||||
|
||||
/*
|
||||
|
||||
// . init BigFile::m_fileSize and m_lastModifiedTime
|
||||
// . m_lastModifiedTime is now used by the merge to select older
|
||||
// titledb files to merge
|
||||
@ -423,6 +429,7 @@ bool RdbBase::init ( char *dir ,
|
||||
//int32_t n = f.write ( buf , 128*1024*5+10 , 0 );
|
||||
//fprintf(stderr,"n=%"INT32"\n",n);
|
||||
return true;
|
||||
*/
|
||||
}
|
||||
|
||||
// . move all files into trash subdir
|
||||
@ -710,38 +717,67 @@ bool RdbBase::setFiles ( ) {
|
||||
|
||||
// return the fileNum we added it to in the array
|
||||
// reutrn -1 and set g_errno on error
|
||||
int32_t RdbBase::addFile ( int32_t id , bool isNew , int32_t mergeNum , int32_t id2 ,
|
||||
bool converting ) {
|
||||
int32_t RdbBase::addFile ( int32_t id , bool isNew , int32_t mergeNum ,
|
||||
int32_t id2 , bool converting ) {
|
||||
|
||||
int32_t n = m_numFiles;
|
||||
// can't exceed this
|
||||
if ( n >= MAX_RDB_FILES ) {
|
||||
g_errno = ETOOMANYFILES;
|
||||
log(LOG_LOGIC,
|
||||
"db: Can not have more than %"INT32" files. File add failed.",
|
||||
(int32_t)MAX_RDB_FILES);
|
||||
"db: Can not have more than %"INT32" files. File add "
|
||||
"failed.",(int32_t)MAX_RDB_FILES);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// HACK: skip to avoid a OOM lockup. if RdbBase cannot dump
|
||||
// its data to disk it can backlog everyone and memory will
|
||||
// never get freed up.
|
||||
int64_t mm = g_mem.m_maxMem;
|
||||
g_mem.m_maxMem = 0x0fffffffffffffffLL;
|
||||
int64_t mm = g_conf.m_maxMem;
|
||||
g_conf.m_maxMem = 0x0fffffffffffffffLL;
|
||||
BigFile *f ;
|
||||
try { f = new (BigFile); }
|
||||
catch ( ... ) {
|
||||
g_mem.m_maxMem = mm;
|
||||
g_conf.m_maxMem = mm;
|
||||
g_errno = ENOMEM;
|
||||
log("RdbBase: new(%i): %s",
|
||||
(int)sizeof(BigFile),mstrerror(g_errno));
|
||||
return -1;
|
||||
}
|
||||
mnew ( f , sizeof(BigFile) , "RdbBFile" );
|
||||
|
||||
// set the data file's filename
|
||||
char name[512];
|
||||
if ( mergeNum <= 0 && m_isTitledb )
|
||||
snprintf(name,511,"%s%04"INT32"-%03"INT32".dat",
|
||||
m_dbname,id,id2 );
|
||||
else if ( mergeNum <= 0 )
|
||||
snprintf ( name ,511,"%s%04"INT32".dat" , m_dbname, id );
|
||||
else if ( m_isTitledb )
|
||||
snprintf ( name ,511,"%s%04"INT32"-%03"INT32".%03"INT32".dat",
|
||||
m_dbname, id , id2, mergeNum );
|
||||
else
|
||||
snprintf(name,511,"%s%04"INT32".%03"INT32".dat",
|
||||
m_dbname,id,mergeNum);
|
||||
|
||||
f->set ( getDir() , name , NULL ); // getStripeDir() );
|
||||
|
||||
// if new insure does not exist
|
||||
if ( isNew && f->doesExist() ) {
|
||||
log("rdb: creating NEW file %s/%s which already exists!",
|
||||
f->getDir(),
|
||||
f->getFilename());
|
||||
mdelete ( f , sizeof(BigFile),"RdbBFile");
|
||||
delete (f);
|
||||
return -1;
|
||||
char *xx=NULL;*xx=0;
|
||||
}
|
||||
|
||||
|
||||
RdbMap *m ;
|
||||
try { m = new (RdbMap); }
|
||||
catch ( ... ) {
|
||||
g_mem.m_maxMem = mm;
|
||||
g_conf.m_maxMem = mm;
|
||||
g_errno = ENOMEM;
|
||||
log("RdbBase: new(%i): %s",
|
||||
(int)sizeof(RdbMap),mstrerror(g_errno));
|
||||
@ -751,43 +787,23 @@ int32_t RdbBase::addFile ( int32_t id , bool isNew , int32_t mergeNum , int32_t
|
||||
}
|
||||
mnew ( m , sizeof(RdbMap) , "RdbBMap" );
|
||||
// reinstate the memory limit
|
||||
g_mem.m_maxMem = mm;
|
||||
g_conf.m_maxMem = mm;
|
||||
// sanity check
|
||||
if ( id2 < 0 && m_isTitledb ) { char *xx = NULL; *xx = 0; }
|
||||
|
||||
CollectionRec *cr = NULL;
|
||||
|
||||
// set the data file's filename
|
||||
char name[256];
|
||||
// if we're converting, just add to m_filesIds and m_fileIds2
|
||||
if ( converting ) {
|
||||
log("*-*-*-* Converting titledb files to new file name format");
|
||||
goto skip;
|
||||
}
|
||||
|
||||
if ( mergeNum <= 0 && m_isTitledb )
|
||||
sprintf ( name , "%s%04"INT32"-%03"INT32".dat" , m_dbname, id , id2 );
|
||||
else if ( mergeNum <= 0 )
|
||||
sprintf ( name , "%s%04"INT32".dat" , m_dbname, id );
|
||||
else if ( m_isTitledb )
|
||||
sprintf ( name , "%s%04"INT32"-%03"INT32".%03"INT32".dat",
|
||||
m_dbname, id , id2, mergeNum );
|
||||
else
|
||||
sprintf ( name , "%s%04"INT32".%03"INT32".dat", m_dbname, id , mergeNum);
|
||||
f->set ( getDir() , name , NULL ); // getStripeDir() );
|
||||
|
||||
// if new insure does not exist
|
||||
if ( isNew && f->doesExist() ) {
|
||||
log("rdb: creating NEW file %s/%s which already exists!",
|
||||
f->m_dir,
|
||||
f->getFilename());
|
||||
char *xx=NULL;*xx=0;
|
||||
}
|
||||
|
||||
// debug help
|
||||
if ( isNew )
|
||||
log("rdb: adding new file %s/%s",// m_numFiles=%"INT32"",
|
||||
f->m_dir,f->getFilename());//,m_numFiles);
|
||||
f->getDir(),f->getFilename());//,m_numFiles);
|
||||
|
||||
// rename bug fix?
|
||||
/*
|
||||
@ -805,13 +821,15 @@ int32_t RdbBase::addFile ( int32_t id , bool isNew , int32_t mergeNum , int32_t
|
||||
|
||||
// if not a new file sanity check it
|
||||
for ( int32_t j = 0 ; ! isNew && j < f->m_maxParts - 1 ; j++ ) {
|
||||
File *ff = f->m_files[j];
|
||||
// might be headless
|
||||
File *ff = f->getFile2(j);//m_files[j];
|
||||
if ( ! ff ) continue;
|
||||
if ( ff->getFileSize() == MAX_PART_SIZE ) continue;
|
||||
log ( "db: File %s has length %"INT64", but it should be %"INT64". "
|
||||
log ( "db: File %s/%s has length %"INT64", but it should be %"INT64". "
|
||||
"You should move it to a temporary directory "
|
||||
"and restart. It probably happened when the power went "
|
||||
"out and a file delete operation failed to complete.",
|
||||
f->getDir(),
|
||||
ff->getFilename() ,
|
||||
(int64_t)ff->getFileSize(),
|
||||
(int64_t)MAX_PART_SIZE);
|
||||
@ -870,7 +888,8 @@ int32_t RdbBase::addFile ( int32_t id , bool isNew , int32_t mergeNum , int32_t
|
||||
// these writes because it is not initialized yet and will
|
||||
// cause this write to fail!
|
||||
g_statsdb.m_disabled = true;
|
||||
bool status = m->writeMap();
|
||||
// true = alldone
|
||||
bool status = m->writeMap( true );
|
||||
g_statsdb.m_disabled = false;
|
||||
if ( ! status ) return log("db: Save failed.");
|
||||
}
|
||||
@ -1002,7 +1021,7 @@ bool RdbBase::incorporateMerge ( ) {
|
||||
// exit merge mode
|
||||
m_isMerging = false;
|
||||
// return the merge token, no need for a callback
|
||||
g_msg35.releaseToken ( );
|
||||
//g_msg35.releaseToken ( );
|
||||
//return true;
|
||||
}
|
||||
// file #x is the merge file
|
||||
@ -1026,7 +1045,8 @@ bool RdbBase::incorporateMerge ( ) {
|
||||
log(LOG_INFO,"db: Writing map %s.",m_maps[x]->getFilename());
|
||||
// . ensure we can save the map before deleting other files
|
||||
// . sets g_errno and return false on error
|
||||
m_maps[x]->writeMap();
|
||||
// . allDone = true
|
||||
m_maps[x]->writeMap( true );
|
||||
|
||||
// tfndb has his own merge class since titledb merges write tfndb recs
|
||||
RdbMerge *m = &g_merge;
|
||||
@ -1102,7 +1122,7 @@ bool RdbBase::incorporateMerge ( ) {
|
||||
if ( ! m_files[i] ) continue;
|
||||
// debug msg
|
||||
log(LOG_INFO,"merge: Unlinking merged file %s/%s (#%"INT32").",
|
||||
m_files[i]->m_dir,m_files[i]->getFilename(),i);
|
||||
m_files[i]->getDir(),m_files[i]->getFilename(),i);
|
||||
// . append it to "sync" state we have in memory
|
||||
// . when host #0 sends a OP_SYNCTIME signal we dump to disk
|
||||
//g_sync.addOp ( OP_UNLINK , m_files[i] , 0 );
|
||||
@ -1226,7 +1246,8 @@ void RdbBase::doneWrapper2 ( ) {
|
||||
void doneWrapper3 ( void *state ) {
|
||||
RdbBase *THIS = (RdbBase *)state;
|
||||
log("rdb: thread completed rename operation for collnum=%"INT32" "
|
||||
"#threads=%"INT32"",(int32_t)THIS->m_collnum,THIS->m_numThreads);
|
||||
"#thisbaserenamethreads=%"INT32"",
|
||||
(int32_t)THIS->m_collnum,THIS->m_numThreads-1);
|
||||
THIS->doneWrapper4 ( );
|
||||
}
|
||||
|
||||
@ -1243,7 +1264,7 @@ void RdbBase::doneWrapper4 ( ) {
|
||||
if ( --m_numThreads > 0 ) return;
|
||||
}
|
||||
|
||||
// some int16_thand variable notation
|
||||
// some shorthand variable notation
|
||||
int32_t a = m_mergeStartFileNum;
|
||||
int32_t b = m_mergeStartFileNum + m_numFilesToMerge;
|
||||
|
||||
@ -1290,7 +1311,7 @@ void RdbBase::doneWrapper4 ( ) {
|
||||
// exit merge mode
|
||||
m_isMerging = false;
|
||||
// return the merge token, no need for a callback
|
||||
g_msg35.releaseToken ( );
|
||||
//g_msg35.releaseToken ( );
|
||||
// the rename has completed at this point, so tell sync table in mem
|
||||
//g_sync.addOp ( OP_CLOSE , m_files[x] , 0 );
|
||||
// unlink old merge filename from sync table
|
||||
@ -1304,6 +1325,7 @@ void RdbBase::doneWrapper4 ( ) {
|
||||
//attemptMerge ( 1/*niceness*/ , false /*don't force it*/ ) ;
|
||||
// try all in case they were waiting (and not using tokens)
|
||||
//g_tfndb.getRdb()->attemptMerge ( 1 , false );
|
||||
/*
|
||||
g_clusterdb.getRdb()->attemptMerge ( 1 , false );
|
||||
g_linkdb.getRdb()->attemptMerge ( 1 , false );
|
||||
//g_sectiondb.getRdb()->attemptMerge ( 1 , false );
|
||||
@ -1323,6 +1345,10 @@ void RdbBase::doneWrapper4 ( ) {
|
||||
g_posdb.getRdb()->attemptMerge ( 1 , false );
|
||||
//g_datedb.getRdb()->attemptMerge ( 1 , false );
|
||||
g_spiderdb.getRdb()->attemptMerge ( 1 , false );
|
||||
*/
|
||||
|
||||
// try to merge more when we are done
|
||||
attemptMergeAll2 ( );
|
||||
}
|
||||
|
||||
void RdbBase::buryFiles ( int32_t a , int32_t b ) {
|
||||
@ -1360,28 +1386,33 @@ void attemptMergeWrapper ( int fd , void *state ) {
|
||||
}
|
||||
*/
|
||||
|
||||
static void gotTokenForMergeWrapper ( void *state ) ;
|
||||
//static void gotTokenForMergeWrapper ( void *state ) ;
|
||||
|
||||
// the DailyMerge.cpp will set minToMergeOverride for titledb, and this
|
||||
// overrides "forceMergeAll" which is the same as setting "minToMergeOverride"
|
||||
// to "2". (i.e. perform a merge if you got 2 or more files)
|
||||
void RdbBase::attemptMerge ( int32_t niceness, bool forceMergeAll, bool doLog ,
|
||||
// . the DailyMerge.cpp will set minToMergeOverride for titledb, and this
|
||||
// overrides "forceMergeAll" which is the same as setting
|
||||
// "minToMergeOverride" to "2". (i.e. perform a merge if you got 2 or more
|
||||
// files)
|
||||
// . now return true if we started a merge, false otherwise
|
||||
// . TODO: fix Rdb::attemptMergeAll() to not remove from linked list if
|
||||
// we had an error in addNewFile() or rdbmerge.cpp's call to rdbbase::addFile
|
||||
bool RdbBase::attemptMerge ( int32_t niceness, bool forceMergeAll, bool doLog ,
|
||||
int32_t minToMergeOverride ) {
|
||||
|
||||
// don't do merge if we're in read only mode
|
||||
if ( g_conf.m_readOnlyMode ) return ;
|
||||
if ( g_conf.m_readOnlyMode ) return false;
|
||||
// or if we are copying our files to a new host
|
||||
//if ( g_hostdb.m_syncHost == g_hostdb.m_myHost ) return;
|
||||
// nor if EITHER of the merge classes are suspended
|
||||
if ( g_merge.m_isSuspended ) return;
|
||||
if ( g_merge2.m_isSuspended ) return;
|
||||
if ( g_merge.m_isSuspended ) return false;
|
||||
if ( g_merge2.m_isSuspended ) return false;
|
||||
|
||||
// shutting down? do not start another merge then
|
||||
if ( g_process.m_mode == EXIT_MODE ) return;
|
||||
if ( g_process.m_mode == EXIT_MODE ) return false;
|
||||
|
||||
// sanity checks
|
||||
if ( g_loop.m_inQuickPoll ) {
|
||||
log("rdb: cant attempt merge in quickpoll");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( niceness == 0 ) { char *xx=NULL;*xx=0; }
|
||||
@ -1398,7 +1429,7 @@ void RdbBase::attemptMerge ( int32_t niceness, bool forceMergeAll, bool doLog ,
|
||||
if ( doLog )
|
||||
log(LOG_INFO,"db: Can not merge titledb while it "
|
||||
"is dumping.");
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
// or if in repair mode, do not mess with any files in any coll
|
||||
@ -1433,7 +1464,7 @@ void RdbBase::attemptMerge ( int32_t niceness, bool forceMergeAll, bool doLog ,
|
||||
log(LOG_INFO,"merge: Waiting for unlink/rename "
|
||||
"operations to finish before attempting merge "
|
||||
"for %s. (collnum=%"INT32")",m_dbname,(int32_t)m_collnum);
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
if ( g_numThreads > 0 ) {
|
||||
@ -1442,7 +1473,7 @@ void RdbBase::attemptMerge ( int32_t niceness, bool forceMergeAll, bool doLog ,
|
||||
"collection's unlink/rename "
|
||||
"operations to finish before attempting merge "
|
||||
"for %s (collnum=%"INT32").",m_dbname,(int32_t)m_collnum);
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
@ -1574,7 +1605,7 @@ void RdbBase::attemptMerge ( int32_t niceness, bool forceMergeAll, bool doLog ,
|
||||
// tfndb has his own merge class since titledb merges write tfndb recs
|
||||
RdbMerge *m = &g_merge;
|
||||
if ( m->isMerging() )
|
||||
return;
|
||||
return false;
|
||||
|
||||
// if we are tfndb and someone else is merging, do not merge unless
|
||||
// we have 3 or more files
|
||||
@ -1598,7 +1629,7 @@ void RdbBase::attemptMerge ( int32_t niceness, bool forceMergeAll, bool doLog ,
|
||||
|
||||
// this triggers the negative rec concentration msg below and
|
||||
// tries to merge on one file...
|
||||
if ( ! resuming && m_numFiles <= 1 ) return;
|
||||
if ( ! resuming && m_numFiles <= 1 ) return false;
|
||||
|
||||
// what percent of recs in the collections' rdb are negative?
|
||||
// the rdbmaps hold this info
|
||||
@ -1633,7 +1664,14 @@ void RdbBase::attemptMerge ( int32_t niceness, bool forceMergeAll, bool doLog ,
|
||||
|
||||
// . don't merge if we don't have the min # of files
|
||||
// . but skip this check if there is a merge to be resumed from b4
|
||||
if ( ! resuming && ! forceMergeAll && numFiles < minToMerge ) return;
|
||||
if ( ! resuming && ! forceMergeAll && numFiles < minToMerge ) {
|
||||
// now we no longer have to check this collection rdb for
|
||||
// merging. this will save a lot of cpu time when we have
|
||||
// 20,000+ collections. if we dump a file to disk for it
|
||||
// then we set this flag back to false in Rdb.cpp.
|
||||
m_checkedForMerge = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
// bail if already merging THIS class
|
||||
if ( m_isMerging ) {
|
||||
@ -1641,14 +1679,14 @@ void RdbBase::attemptMerge ( int32_t niceness, bool forceMergeAll, bool doLog ,
|
||||
log(LOG_INFO,
|
||||
"merge: Waiting for other merge to complete "
|
||||
"before merging %s.",m_dbname);
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
// bail if already waiting for it
|
||||
if ( m_waitingForTokenForMerge ) {
|
||||
if ( doLog )
|
||||
log(LOG_INFO,"merge: Already requested token. "
|
||||
"Request for %s pending.",m_dbname);
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
// score it
|
||||
m_waitingForTokenForMerge = true;
|
||||
@ -1704,15 +1742,15 @@ void RdbBase::attemptMerge ( int32_t niceness, bool forceMergeAll, bool doLog ,
|
||||
// gotTokenForMergeWrapper() may be called multiple times
|
||||
// . if a host is always in urgent mode he may starve another host
|
||||
// whose is too, but his old request has an low priority.
|
||||
int32_t priority = 0;
|
||||
//int32_t priority = 0;
|
||||
// save this so gotTokenForMerge() can use it
|
||||
m_doLog = doLog;
|
||||
//if ( m_mergeUrgent ) priority = 2;
|
||||
//else priority = 0;
|
||||
// tfndb doesn't need token, since titledb merge writes tfndb recs
|
||||
if ( //m_rdb != g_tfndb.getRdb() &&
|
||||
! g_msg35.getToken ( this , gotTokenForMergeWrapper, priority ) )
|
||||
return ;
|
||||
// if ( //m_rdb != g_tfndb.getRdb() &&
|
||||
// ! g_msg35.getToken ( this , gotTokenForMergeWrapper, priority))
|
||||
// return ;
|
||||
// bitch if we got token because there was an error somewhere
|
||||
if ( g_errno ) {
|
||||
log(LOG_LOGIC,"merge: attemptMerge: %s failed: %s",
|
||||
@ -1722,13 +1760,14 @@ void RdbBase::attemptMerge ( int32_t niceness, bool forceMergeAll, bool doLog ,
|
||||
// undo request
|
||||
m_waitingForTokenForMerge = false;
|
||||
// we don't have the token, so we're fucked...
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
// debug msg
|
||||
//if ( doLog )
|
||||
//log(LOG_INFO,"merge: Got merge token for %s without blocking.",
|
||||
// m_dbname);
|
||||
// if did not block
|
||||
/*
|
||||
gotTokenForMerge ( );
|
||||
}
|
||||
|
||||
@ -1738,14 +1777,16 @@ void gotTokenForMergeWrapper ( void *state ) {
|
||||
}
|
||||
|
||||
void RdbBase::gotTokenForMerge ( ) {
|
||||
*/
|
||||
// debug mg
|
||||
//log("RdbBase::gotTokenForMerge: for %s",m_dbname);
|
||||
// don't repeat
|
||||
m_waitingForTokenForMerge = false;
|
||||
// if a dump is happening it will always be the last file, do not
|
||||
// include it in the merge
|
||||
int32_t numFiles = m_numFiles;
|
||||
if ( numFiles > 0 && m_dump->isDumping() ) numFiles--;
|
||||
//int32_t numFiles = m_numFiles;
|
||||
//if ( numFiles > 0 && m_dump->isDumping() ) numFiles--;
|
||||
|
||||
// . if we are significantly over our m_minToMerge limit
|
||||
// then set m_mergeUrgent to true so merge disk operations will
|
||||
// starve any spider disk reads (see Threads.cpp for that)
|
||||
@ -1760,7 +1801,7 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
g_numUrgentMerges++;
|
||||
}
|
||||
// tfndb has his own merge class since titledb merges write tfndb recs
|
||||
RdbMerge *m = &g_merge;
|
||||
//RdbMerge *m = &g_merge;
|
||||
//if ( m_rdb == g_tfndb.getRdb() ) m = &g_merge2;
|
||||
// sanity check
|
||||
if ( m_isMerging || m->isMerging() ) {
|
||||
@ -1769,11 +1810,11 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
//"merge: Someone already merging. Waiting for "
|
||||
//"merge token "
|
||||
//"in order to merge %s.",m_dbname);
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
// or if # threads out is positive
|
||||
if ( m_numThreads > 0 ) return;
|
||||
if ( m_numThreads > 0 ) return false;
|
||||
|
||||
// clear for take-off
|
||||
//m_inWaiting = false;
|
||||
@ -1793,7 +1834,7 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
int32_t mini ;
|
||||
bool minOld ;
|
||||
int32_t id2 = -1;
|
||||
int32_t minToMerge;
|
||||
//int32_t minToMerge;
|
||||
bool overide = false;
|
||||
//int32_t smini = - 1;
|
||||
//int32_t sn ;
|
||||
@ -1807,7 +1848,7 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
// goto skip;
|
||||
//}
|
||||
|
||||
char rdbId = getIdFromRdb ( m_rdb );
|
||||
//char rdbId = getIdFromRdb ( m_rdb );
|
||||
|
||||
// if one file is even #'ed then we were merging into that, but
|
||||
// got interrupted and restarted. maybe the power went off or maybe
|
||||
@ -1838,8 +1879,8 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
if ( n <= 1 ) {
|
||||
log(LOG_LOGIC,"merge: attemptMerge: Resuming. bad "
|
||||
"engineer");
|
||||
g_msg35.releaseToken();
|
||||
return;
|
||||
//g_msg35.releaseToken();
|
||||
return false;
|
||||
}
|
||||
// make a log note
|
||||
log(LOG_INFO,"merge: Resuming killed merge for %s coll=%s.",
|
||||
@ -1903,7 +1944,7 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
//File *mf = m_maps[j]->getFile();
|
||||
m_maps[j]->rename(fbuf);
|
||||
log("merge: renaming final merged file %s",fbuf);
|
||||
return;
|
||||
return false;
|
||||
}
|
||||
|
||||
// resume the merging
|
||||
@ -2116,8 +2157,8 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
if ( mini == -1 ) {
|
||||
log(LOG_LOGIC,"merge: gotTokenForMerge: Bad engineer. mini "
|
||||
"is -1.");
|
||||
g_msg35.releaseToken();
|
||||
return;
|
||||
//g_msg35.releaseToken();
|
||||
return false;
|
||||
}
|
||||
// . merge from file #mini through file #(mini+n)
|
||||
// . these files should all have ODD fileIds so we can sneak a new
|
||||
@ -2133,8 +2174,8 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
log(LOG_LOGIC,"merge: attemptMerge: could not add "
|
||||
"new file for titledb. No avail ids.");
|
||||
g_errno = 0;
|
||||
g_msg35.releaseToken();
|
||||
return;
|
||||
//g_msg35.releaseToken();
|
||||
return false;
|
||||
}
|
||||
}
|
||||
// . make a filename for the merge
|
||||
@ -2148,8 +2189,8 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
if ( mergeFileNum < 0 ) {
|
||||
log(LOG_LOGIC,"merge: attemptMerge: Could not add new file.");
|
||||
g_errno = 0;
|
||||
g_msg35.releaseToken();
|
||||
return;
|
||||
//g_msg35.releaseToken();
|
||||
return false;
|
||||
}
|
||||
// we just opened a new file
|
||||
//g_sync.addOp ( OP_OPEN , m_files[mergeFileNum] , 0 );
|
||||
@ -2167,8 +2208,8 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
if ( n <= 1 && ! overide ) {
|
||||
log(LOG_LOGIC,"merge: gotTokenForMerge: Not merging %"INT32" files.",
|
||||
n);
|
||||
g_msg35.releaseToken();
|
||||
return;
|
||||
//g_msg35.releaseToken();
|
||||
return false;
|
||||
}
|
||||
|
||||
// . save the # of files we're merging for the cleanup process
|
||||
@ -2176,7 +2217,7 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
m_numFilesToMerge = n ; // numFiles - 1;
|
||||
m_mergeStartFileNum = mergeFileNum + 1; // 1
|
||||
|
||||
CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
|
||||
//CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
|
||||
char *coll = "";
|
||||
if ( cr ) coll = cr->m_coll;
|
||||
|
||||
@ -2224,7 +2265,9 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
m_niceness ,
|
||||
m_pc ,
|
||||
mint /*maxTargetFileSize*/ ,
|
||||
m_ks ) ) return;
|
||||
m_ks ) )
|
||||
// we started the merge so return true here
|
||||
return true;
|
||||
// hey, we're no longer merging i guess
|
||||
m_isMerging = false;
|
||||
// decerment this count
|
||||
@ -2243,9 +2286,12 @@ void RdbBase::gotTokenForMerge ( ) {
|
||||
m_dbname,mstrerror(g_errno));
|
||||
g_errno = 0;
|
||||
// give token back
|
||||
g_msg35.releaseToken();
|
||||
//g_msg35.releaseToken();
|
||||
// try again
|
||||
m_rdb->attemptMerge( m_niceness, false , true );
|
||||
//m_rdb->attemptMerge( m_niceness, false , true );
|
||||
// how did this happen?
|
||||
log("merge: did not block for some reason.");
|
||||
return true;
|
||||
}
|
||||
|
||||
// . use the maps and tree to estimate the size of this list w/o hitting disk
|
||||
@ -2480,16 +2526,18 @@ void RdbBase::saveMaps ( bool useThread ) {
|
||||
log("base: map for file #%i is null",i);
|
||||
continue;
|
||||
}
|
||||
m_maps[i]->writeMap ( );
|
||||
m_maps[i]->writeMap ( false );
|
||||
}
|
||||
}
|
||||
|
||||
void RdbBase::verifyDiskPageCache ( ) {
|
||||
if ( !m_pc ) return;
|
||||
for ( int32_t i = 0; i < m_numFiles; i++ ){
|
||||
BigFile *f = m_files[i];
|
||||
m_pc->verifyData(f);
|
||||
}
|
||||
// disable for now
|
||||
return;
|
||||
// for ( int32_t i = 0; i < m_numFiles; i++ ){
|
||||
// BigFile *f = m_files[i];
|
||||
// m_pc->verifyData(f);
|
||||
// }
|
||||
}
|
||||
|
||||
bool RdbBase::verifyFileSharding ( ) {
|
||||
|
12
RdbBase.h
12
RdbBase.h
@ -231,13 +231,15 @@ class RdbBase {
|
||||
|
||||
// private:
|
||||
|
||||
void attemptMerge ( int32_t niceness , bool forceMergeAll ,
|
||||
// returns true if merge was started, false if no merge could
|
||||
// be launched right now for some reason.
|
||||
bool attemptMerge ( int32_t niceness , bool forceMergeAll ,
|
||||
bool doLog = true ,
|
||||
// -1 means to not override it
|
||||
int32_t minToMergeOverride = -1 );
|
||||
|
||||
bool gotTokenForDump ( ) ;
|
||||
void gotTokenForMerge ( ) ;
|
||||
//bool gotTokenForDump ( ) ;
|
||||
//void gotTokenForMerge ( ) ;
|
||||
|
||||
// called after merge completed
|
||||
bool incorporateMerge ( );
|
||||
@ -420,7 +422,9 @@ class RdbBase {
|
||||
|
||||
// key size
|
||||
char m_ks;
|
||||
|
||||
|
||||
bool m_checkedForMerge;
|
||||
|
||||
int32_t m_pageSize;
|
||||
|
||||
// are we waiting on another merge/dump to complete before our turn?
|
||||
|
@ -1802,7 +1802,9 @@ void RdbBuckets::cleanBuckets ( ) {
|
||||
for ( int32_t i = 0; i < m_numBuckets; i++ ) {
|
||||
RdbBucket *b = m_buckets[i];
|
||||
collnum_t collnum = b->getCollnum();
|
||||
CollectionRec *cr = g_collectiondb.m_recs[collnum];
|
||||
CollectionRec *cr = NULL;
|
||||
if ( collnum < g_collectiondb.m_numRecs )
|
||||
cr = g_collectiondb.m_recs[collnum];
|
||||
if ( cr ) continue;
|
||||
// count # deleted
|
||||
count += b->getNumKeys();
|
||||
@ -2244,14 +2246,16 @@ int64_t RdbBuckets::fastLoadColl( BigFile *f,
|
||||
|
||||
m_dbname = dbname;
|
||||
|
||||
if ( g_errno ) return -1;
|
||||
if ( g_errno )
|
||||
return -1;
|
||||
|
||||
for (int32_t i = 0; i < numBuckets; i++ ) {
|
||||
m_buckets[i] = bucketFactory();
|
||||
if(m_buckets[i] == NULL) return -1;
|
||||
offset = m_buckets[i]->fastLoad(f, offset);
|
||||
// returns -1 on error
|
||||
if ( offset < 0 ) return -1;
|
||||
if ( offset < 0 )
|
||||
return -1;
|
||||
m_numBuckets++;
|
||||
}
|
||||
return offset;
|
||||
@ -2312,7 +2316,7 @@ int64_t RdbBucket::fastSave_r(int fd, int64_t offset) {
|
||||
}
|
||||
|
||||
int64_t RdbBucket::fastLoad(BigFile *f, int64_t offset) {
|
||||
errno = 0;
|
||||
//errno = 0;
|
||||
|
||||
f->read ( &m_collnum,sizeof(collnum_t), offset );
|
||||
offset += sizeof(collnum_t);
|
||||
@ -2333,7 +2337,10 @@ int64_t RdbBucket::fastLoad(BigFile *f, int64_t offset) {
|
||||
offset += recSize*m_numKeys;
|
||||
|
||||
m_endKey = m_keys + endKeyOffset;
|
||||
if ( g_errno ) {
|
||||
log("bucket: fastload %s",mstrerror(g_errno));
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(errno) return -1;
|
||||
return offset;
|
||||
}
|
||||
|
@ -28,8 +28,8 @@
|
||||
// allocating if the record size is 256k or more. Copying 256k only
|
||||
// takes .1 ms on the P4 2.60CGHz. This is on the TODO list.
|
||||
|
||||
#ifndef _RDBCACHE_H_
|
||||
#define _RDBCACHE_H_
|
||||
#ifndef RDBCACHE_H
|
||||
#define RDBCACHE_H
|
||||
|
||||
// . TODO:
|
||||
// . if size of added rec is ABOVE this, then don't use our memory buffer
|
||||
|
18
RdbDump.cpp
18
RdbDump.cpp
@ -229,8 +229,15 @@ void RdbDump::doneDumping ( ) {
|
||||
// did collection get deleted/reset from under us?
|
||||
if ( saved == ENOCOLLREC ) return;
|
||||
|
||||
// save the map to disk
|
||||
if ( m_map ) m_map->writeMap();
|
||||
// save the map to disk. true = allDone
|
||||
if ( m_map ) m_map->writeMap( true );
|
||||
|
||||
// now try to merge this collection/db again
|
||||
// if not already in the linked list. but do not add to linked list
|
||||
// if it is statsdb or catdb.
|
||||
if ( m_rdb && ! m_rdb->m_isCollectionLess )
|
||||
addCollnumToLinkedListOfMergeCandidates ( m_collnum );
|
||||
|
||||
#ifdef GBSANITYCHECK
|
||||
// sanity check
|
||||
log("DOING SANITY CHECK FOR MAP -- REMOVE ME");
|
||||
@ -678,10 +685,11 @@ bool RdbDump::doneDumpingList ( bool addToMap ) {
|
||||
// note it
|
||||
log(LOG_LOGIC,"db: setting fd for vfd to -1.");
|
||||
// mark our fd as not there...
|
||||
int32_t i = (m_offset - m_bytesToWrite) / MAX_PART_SIZE;
|
||||
//int32_t i=(m_offset-m_bytesToWrite) / MAX_PART_SIZE;
|
||||
// sets s_fds[vfd] to -1
|
||||
if ( m_file->m_files[i] )
|
||||
releaseVfd ( m_file->m_files[i]->m_vfd );
|
||||
// MDW: no, can't do this now
|
||||
// if ( m_file->m_files[i] )
|
||||
// releaseVfd ( m_file->m_files[i]->m_vfd );
|
||||
}
|
||||
//log("RdbDump::doneDumpingList: retrying.");
|
||||
return dumpList ( m_list , m_niceness , true );
|
||||
|
73
RdbMap.cpp
73
RdbMap.cpp
@ -48,13 +48,14 @@ void RdbMap::set ( char *dir , char *mapFilename,
|
||||
|
||||
bool RdbMap::close ( bool urgent ) {
|
||||
bool status = true;
|
||||
if ( /*mdw m_numPages > 0 &&*/ m_needToWrite ) status = writeMap ( );
|
||||
if ( /*mdw m_numPages > 0 &&*/ m_needToWrite ) status=writeMap(false);
|
||||
// clears and frees everything
|
||||
if ( ! urgent ) reset ();
|
||||
return status;
|
||||
}
|
||||
|
||||
void RdbMap::reset ( ) {
|
||||
m_reducedMem = false;
|
||||
m_generatingMap = false;
|
||||
int32_t pps = PAGES_PER_SEGMENT;
|
||||
if ( m_newPagesPerSegment > 0 ) pps = m_newPagesPerSegment;
|
||||
@ -70,8 +71,8 @@ void RdbMap::reset ( ) {
|
||||
|
||||
// the ptrs themselves are now a dynamic array to save mem
|
||||
// when we have thousands of collections
|
||||
mfree(m_keys,m_numSegmentPtrs*sizeof(char *),"MapPtrs");
|
||||
mfree(m_offsets,m_numSegmentOffs*sizeof(int16_t *),"MapPtrs");
|
||||
mfree(m_keys,m_numSegmentPtrs*sizeof(char *),"MapPtrs1");
|
||||
mfree(m_offsets,m_numSegmentOffs*sizeof(int16_t *),"MapPtrs2");
|
||||
m_numSegmentPtrs = 0;
|
||||
m_numSegmentOffs = 0;
|
||||
|
||||
@ -93,10 +94,12 @@ void RdbMap::reset ( ) {
|
||||
m_lastLogTime = 0;
|
||||
m_badKeys = 0;
|
||||
m_needVerify = false;
|
||||
|
||||
m_file.reset();
|
||||
}
|
||||
|
||||
|
||||
bool RdbMap::writeMap ( ) {
|
||||
bool RdbMap::writeMap ( bool allDone ) {
|
||||
if ( g_conf.m_readOnlyMode ) return true;
|
||||
// return true if nothing to write out
|
||||
// mdw if ( m_numPages <= 0 ) return true;
|
||||
@ -112,6 +115,8 @@ bool RdbMap::writeMap ( ) {
|
||||
// . close map
|
||||
// . no longer since we use BigFile
|
||||
//m_file.close ( );
|
||||
// map is done so save some memory
|
||||
if ( allDone ) reduceMemFootPrint () ;
|
||||
// return status
|
||||
return status;
|
||||
}
|
||||
@ -236,7 +241,7 @@ bool RdbMap::verifyMap ( BigFile *dataFile ) {
|
||||
"db: Map file %s says that file %s should be %"INT64" bytes "
|
||||
"long, but it is %"INT64" bytes.",
|
||||
m_file.getFilename(),
|
||||
dataFile->m_baseFilename ,
|
||||
dataFile->getFilename() ,
|
||||
m_offset - m_fileStartOffset ,
|
||||
dataFile->getFileSize() );
|
||||
// we let headless files squeak by on this because we cannot
|
||||
@ -290,7 +295,7 @@ bool RdbMap::verifyMap ( BigFile *dataFile ) {
|
||||
dataFile->doesPartExist ( numMissingParts-1 ) )
|
||||
numMissingParts--;
|
||||
if ( numMissingParts > 0 ) {
|
||||
File *f = dataFile->getFile ( numMissingParts );
|
||||
File *f = dataFile->getFile2 ( numMissingParts );
|
||||
if ( f ) log("db: Missing part file before %s.",
|
||||
f->getFilename());
|
||||
}
|
||||
@ -328,7 +333,7 @@ bool RdbMap::verifyMap2 ( ) {
|
||||
"Map or data file is "
|
||||
"corrupt, but it is probably the data file. Please "
|
||||
"delete the map file and restart.",
|
||||
m_file.m_dir,m_file.getFilename() ,
|
||||
m_file.getDir(),m_file.getFilename() ,
|
||||
i,(int64_t)m_pageSize*(int64_t)i+getOffset(i));
|
||||
|
||||
//log("db: oldk.n1=%08"XINT32" n0=%016"XINT64"",
|
||||
@ -341,13 +346,16 @@ bool RdbMap::verifyMap2 ( ) {
|
||||
|
||||
SafeBuf cmd;
|
||||
cmd.safePrintf("mv %s/%s %s/trash/",
|
||||
m_file.m_dir,
|
||||
m_file.getDir(),
|
||||
m_file.getFilename(),
|
||||
g_hostdb.m_dir);
|
||||
log("db: %s",cmd.getBufStart() );
|
||||
gbsystem ( cmd.getBufStart() );
|
||||
|
||||
exit(0);
|
||||
//exit(0);
|
||||
// make the bash shell restart us by returning a 1 error code
|
||||
exit(1);
|
||||
|
||||
//char *xx=NULL;*xx=0;
|
||||
// was k too small?
|
||||
//if ( i + 1 < m_numPages && lastKey <= getKey(i+1) ) {
|
||||
@ -504,6 +512,7 @@ int64_t RdbMap::readSegment ( int32_t seg , int64_t offset , int32_t fileSize )
|
||||
bool RdbMap::addRecord ( char *key, char *rec , int32_t recSize ) {
|
||||
// calculate size of the whole slot
|
||||
//int32_t size = sizeof(key_t) ;
|
||||
if ( m_reducedMem ) { char *xx=NULL;*xx=0; }
|
||||
// include the dataSize, 4 bytes, for each slot if it's not fixed
|
||||
//if ( m_fixedDataSize == -1 ) size += 4;
|
||||
// include the data
|
||||
@ -554,7 +563,7 @@ bool RdbMap::addRecord ( char *key, char *rec , int32_t recSize ) {
|
||||
//pageNum > 0 && getKey(pageNum-1) > getKey(pageNum) ) {
|
||||
log(LOG_LOGIC,"build: RdbMap: added key out of order. "
|
||||
"count=%"INT64" file=%s/%s.",m_badKeys,
|
||||
m_file.m_dir,m_file.getFilename());
|
||||
m_file.getDir(),m_file.getFilename());
|
||||
//log(LOG_LOGIC,"build: k.n1=%"XINT32" %"XINT64" lastKey.n1=%"XINT32" %"XINT64"",
|
||||
// key.n1,key.n0,m_lastKey.n1,m_lastKey.n0 );
|
||||
log(LOG_LOGIC,"build: offset=%"INT64"",
|
||||
@ -661,6 +670,9 @@ bool RdbMap::prealloc ( RdbList *list ) {
|
||||
if ( list->m_ks != m_ks ) { char *xx = NULL; *xx = 0; }
|
||||
// bail now if it's empty
|
||||
if ( list->isEmpty() ) return true;
|
||||
|
||||
if ( m_reducedMem ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// what is the last page we touch?
|
||||
int32_t lastPageNum = (m_offset + list->getListSize() - 1) / m_pageSize;
|
||||
// . need to pre-alloc up here so malloc does not fail mid stream
|
||||
@ -690,6 +702,9 @@ bool RdbMap::addList ( RdbList *list ) {
|
||||
|
||||
// what is the last page we touch?
|
||||
int32_t lastPageNum = (m_offset + list->getListSize() - 1) / m_pageSize;
|
||||
|
||||
if ( m_reducedMem ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// . need to pre-alloc up here so malloc does not fail mid stream
|
||||
// . TODO: only do it if list is big enough
|
||||
while ( lastPageNum + 2 >= m_maxNumPages ) {
|
||||
@ -759,6 +774,8 @@ bool RdbMap::addIndexList ( IndexList *list ) {
|
||||
// return now if empty
|
||||
if ( list->isEmpty() ) return true;
|
||||
|
||||
if ( m_reducedMem ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// we need to call writeMap() before we exit
|
||||
m_needToWrite = true;
|
||||
|
||||
@ -1231,6 +1248,7 @@ int64_t RdbMap::getMemAlloced ( ) {
|
||||
}
|
||||
|
||||
bool RdbMap::addSegmentPtr ( int32_t n ) {
|
||||
if ( m_reducedMem ) { char *xx=NULL;*xx=0; }
|
||||
// realloc
|
||||
if ( n >= m_numSegmentPtrs ) {
|
||||
char **k;
|
||||
@ -1238,7 +1256,7 @@ bool RdbMap::addSegmentPtr ( int32_t n ) {
|
||||
k = (char **) mrealloc (m_keys,
|
||||
m_numSegmentPtrs * sizeof(char *) ,
|
||||
nn * sizeof(char *) ,
|
||||
"MapPtrs" );
|
||||
"MapPtrs1" );
|
||||
// failed?
|
||||
if ( ! k ) return false;
|
||||
// succeeded
|
||||
@ -1253,7 +1271,7 @@ bool RdbMap::addSegmentPtr ( int32_t n ) {
|
||||
o = (int16_t **) mrealloc (m_offsets,
|
||||
m_numSegmentOffs * sizeof(int16_t *) ,
|
||||
nn * sizeof(int16_t *) ,
|
||||
"MapPtrs" );
|
||||
"MapPtrs2" );
|
||||
// failed?
|
||||
if ( ! o ) return false;
|
||||
// succeeded
|
||||
@ -1267,6 +1285,24 @@ bool RdbMap::addSegmentPtr ( int32_t n ) {
|
||||
void RdbMap::reduceMemFootPrint () {
|
||||
if ( m_numSegments != 1 ) return;
|
||||
if ( m_numPages >= 100 ) return;
|
||||
// if already reduced, return now
|
||||
if ( m_newPagesPerSegment > 0 ) return;
|
||||
|
||||
// if it is like posdb0054.map then it is being merged into and
|
||||
// we'll resume a killed merge, so don't mess with it, we'll need to
|
||||
// add more pages.
|
||||
char *s = m_file.getFilename();
|
||||
for ( ; s && *s && ! is_digit(*s) ; s++ );
|
||||
int id = 0;
|
||||
if ( s ) id = atoi(s);
|
||||
if ( id && (id % 2) == 0 ) return;
|
||||
|
||||
// log("map: reducing mem footprint for %s/%s",
|
||||
// m_file.getDir(),
|
||||
// m_file.getFilename());
|
||||
|
||||
// seems kinda buggy now..
|
||||
m_reducedMem = true;
|
||||
//return;
|
||||
char *oldKeys = m_keys[0];
|
||||
short *oldOffsets = m_offsets[0];
|
||||
@ -1295,6 +1331,8 @@ bool RdbMap::addSegment ( ) {
|
||||
//if ( n >= MAX_SEGMENTS ) return log("db: Mapped file is "
|
||||
// "too big. Critical error.");
|
||||
|
||||
if ( m_reducedMem ) { char *xx=NULL;*xx=0; }
|
||||
|
||||
// the array of up to MAX_SEGMENT pool ptrs is now dynamic too!
|
||||
// because diffbot uses thousands of collections, this will save
|
||||
// over 1GB of ram!
|
||||
@ -1391,7 +1429,7 @@ bool RdbMap::generateMap ( BigFile *f ) {
|
||||
reset();
|
||||
if ( g_conf.m_readOnlyMode ) return false;
|
||||
|
||||
log("db: Generating map for %s/%s",f->m_dir,f->getFilename());
|
||||
log("db: Generating map for %s/%s",f->getDir(),f->getFilename());
|
||||
|
||||
// we don't support headless datafiles right now
|
||||
if ( ! f->doesPartExist(0) ) {
|
||||
@ -1450,6 +1488,11 @@ bool RdbMap::generateMap ( BigFile *f ) {
|
||||
mfree ( buf , bufSize , "RdbMap");
|
||||
return true;
|
||||
}
|
||||
|
||||
// debug msg
|
||||
//fprintf(stderr,"reading map @ off=%"INT64" size=%"INT64"\n"
|
||||
// , offset , readSize );
|
||||
|
||||
// otherwise, read it in
|
||||
if ( ! f->read ( buf , readSize , offset ) ) {
|
||||
mfree ( buf , bufSize , "RdbMap");
|
||||
@ -1640,7 +1683,7 @@ bool RdbMap::truncateFile ( BigFile *f ) {
|
||||
int32_t numParts = f->getNumParts();
|
||||
// what part num are we on?
|
||||
int32_t partnum = f->getPartNum ( m_offset );
|
||||
File *p = f->getFile ( partnum );
|
||||
File *p = f->getFile2 ( partnum );
|
||||
if ( ! p ) return log("db: Unable to get part file.");
|
||||
// get offset relative to the part file
|
||||
int32_t newSize = m_offset % (int64_t)MAX_PART_SIZE;
|
||||
@ -1661,7 +1704,7 @@ bool RdbMap::truncateFile ( BigFile *f ) {
|
||||
// MAX_TRUNC_SIZE bytes big
|
||||
File *p2 = NULL;
|
||||
if ( partnum == numParts-2 ) {
|
||||
p2 = f->getFile ( partnum + 1 );
|
||||
p2 = f->getFile2 ( partnum + 1 );
|
||||
if ( ! p2 ) return log("db: Could not get next part in line.");
|
||||
if ( p2->getFileSize() > MAX_TRUNC_SIZE )
|
||||
return log("db: Next part file is bigger than %"INT32" "
|
||||
|
4
RdbMap.h
4
RdbMap.h
@ -111,7 +111,7 @@ class RdbMap {
|
||||
// . this is totally MTUnsafe
|
||||
// . don't be calling addRecord with this is dumping
|
||||
// . flushes when done
|
||||
bool writeMap ( );
|
||||
bool writeMap ( bool allDone );
|
||||
bool writeMap2 ( );
|
||||
int64_t writeSegment ( int32_t segment , int64_t offset );
|
||||
|
||||
@ -342,7 +342,7 @@ class RdbMap {
|
||||
int16_t **m_offsets;
|
||||
int32_t m_numSegmentOffs;
|
||||
|
||||
|
||||
bool m_reducedMem;
|
||||
|
||||
// number of valid pages in the map.
|
||||
int32_t m_numPages;
|
||||
|
@ -303,7 +303,14 @@ bool RdbMerge::getNextList ( ) {
|
||||
// no chop threads
|
||||
m_numThreads = 0;
|
||||
// get base, returns NULL and sets g_errno to ENOCOLLREC on error
|
||||
RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true;
|
||||
RdbBase *base = getRdbBase(m_rdbId,m_collnum);
|
||||
if ( ! base ) {
|
||||
// hmmm it doesn't set g_errno so we set it here now
|
||||
// otherwise we do an infinite loop sometimes if a collection
|
||||
// rec is deleted for the collnum
|
||||
g_errno = ENOCOLLREC;
|
||||
return true;
|
||||
}
|
||||
// . if a contributor has just surpassed a "part" in his BigFile
|
||||
// then we can delete that part from the BigFile and the map
|
||||
for ( int32_t i = m_startFileNum ; i < m_startFileNum + m_numFiles; i++ ){
|
||||
|
26
RdbScan.cpp
26
RdbScan.cpp
@ -203,6 +203,7 @@ void gotListWrapper ( void *state ) {
|
||||
|
||||
void RdbScan::gotList ( ) {
|
||||
char *allocBuf = m_fstate.m_allocBuf;
|
||||
int32_t allocOff = m_fstate.m_allocOff; //buf=allocBuf+allocOff
|
||||
int32_t allocSize = m_fstate.m_allocSize;
|
||||
// do not free the allocated buf for when the actual thread
|
||||
// does the read and finally completes in this case. we free it
|
||||
@ -226,7 +227,6 @@ void RdbScan::gotList ( ) {
|
||||
if ( m_fstate.m_allocBuf ) {
|
||||
// get the buffer info for setting the list
|
||||
//char *allocBuf = m_fstate.m_allocBuf;
|
||||
int32_t allocOff = m_fstate.m_allocOff; //buf=allocBuf+allocOff
|
||||
//int32_t allocSize = m_fstate.m_allocSize;
|
||||
int32_t bytesDone = m_fstate.m_bytesDone;
|
||||
// sanity checks
|
||||
@ -248,16 +248,21 @@ void RdbScan::gotList ( ) {
|
||||
m_useHalfKeys ,
|
||||
m_ks );
|
||||
}
|
||||
|
||||
// this was bitching a lot when running on a multinode cluster,
|
||||
// so i effectively disabled it by changing to _GBSANITYCHECK2_
|
||||
#ifdef GBSANITYCHECK2
|
||||
//#ifdef GBSANITYCHECK2
|
||||
// this first test, tests to make sure the read from cache worked
|
||||
DiskPageCache *pc = m_file->getDiskPageCache();
|
||||
if ( pc && ! g_errno ) {
|
||||
if ( pc &&
|
||||
! g_errno &&
|
||||
g_conf.m_logDebugDiskPageCache &&
|
||||
// if we got it from the page cache, verify with disk
|
||||
m_fstate.m_inPageCache ) {
|
||||
// ensure threads disabled
|
||||
bool on = ! g_threads.areThreadsDisabled();
|
||||
if ( on ) g_threads.disableThreads();
|
||||
pc->disableCache();
|
||||
//pc->disableCache();
|
||||
FileState fstate;
|
||||
// ensure we don't mess around
|
||||
fstate.m_allocBuf = NULL;
|
||||
@ -274,7 +279,7 @@ void RdbScan::gotList ( ) {
|
||||
NULL , // callback state
|
||||
gotListWrapper , // FAKE callback
|
||||
MAX_NICENESS , // niceness
|
||||
false, // m_allowPageCache ,
|
||||
false, // m_allowPageCache ,... not for test!
|
||||
m_hitDisk ,
|
||||
16 + m_off );
|
||||
//char *allocBuf = fstate.m_allocBuf;
|
||||
@ -289,16 +294,21 @@ void RdbScan::gotList ( ) {
|
||||
if ( m_bytesToRead != m_list->getListSize() ) {
|
||||
char *xx = NULL; *xx = 0; }
|
||||
}
|
||||
// compare
|
||||
if ( memcmp ( allocBuf+allocOff, bb , m_bytesToRead ) ) {
|
||||
log("db: failed diskpagecache verify");
|
||||
char *xx=NULL;*xx=0;
|
||||
}
|
||||
//mfree ( allocBuf , allocSize , "RS" );
|
||||
mfree ( bb , m_bytesToRead , "RS" );
|
||||
if ( on ) g_threads.enableThreads();
|
||||
pc->enableCache();
|
||||
//pc->enableCache();
|
||||
// . this test tests to make sure the page stores worked
|
||||
// . go through each page in page cache and verify on disk
|
||||
pc->verifyData ( m_file );
|
||||
//pc->verifyData ( m_file );
|
||||
}
|
||||
skip:
|
||||
#endif
|
||||
//#endif
|
||||
// assume we did not shift it
|
||||
m_shifted = 0;//false;
|
||||
// if we were doing a cache only read, and got nothing, bail now
|
||||
|
@ -302,8 +302,8 @@ bool Rebalance::saveRebalanceFile ( ) {
|
||||
binToHex ( (unsigned char *)&m_nextKey , MAX_KEY_BYTES , keyStr );
|
||||
|
||||
//log("db: saving rebalance.txt");
|
||||
|
||||
SafeBuf sb;
|
||||
char tmp[30000];
|
||||
SafeBuf sb(tmp,30000);
|
||||
sb.safePrintf (
|
||||
"myshard: %"INT32"\n"
|
||||
"numshards: %"INT32"\n"
|
||||
|
@ -77,6 +77,10 @@ SafeBuf::SafeBuf(char *heapBuf, int32_t bufMax, int32_t bytesInUse, bool ownData
|
||||
}
|
||||
|
||||
SafeBuf::~SafeBuf() {
|
||||
destructor();
|
||||
}
|
||||
|
||||
void SafeBuf::destructor() {
|
||||
if(!m_usingStack && m_buf)
|
||||
mfree(m_buf, m_capacity, "SafeBuf");
|
||||
m_buf = NULL;
|
||||
@ -475,7 +479,8 @@ int32_t SafeBuf::safeSave (char *filename ) {
|
||||
retry22:
|
||||
|
||||
// first write to tmp file
|
||||
SafeBuf fn;
|
||||
char tmp[1024];
|
||||
SafeBuf fn(tmp,1024);
|
||||
fn.safePrintf( "%s.saving",filename );
|
||||
|
||||
int32_t fd = open ( fn.getBufStart() ,
|
||||
|
@ -20,6 +20,7 @@ public:
|
||||
SafeBuf(int32_t initSize, char *label = NULL);
|
||||
|
||||
void constructor();
|
||||
void destructor ();
|
||||
|
||||
//be careful with passing in a stackBuf! it could go out
|
||||
//of scope independently of the safebuf.
|
||||
@ -53,6 +54,8 @@ public:
|
||||
|
||||
//ACCESSORS
|
||||
char *getBuf() { return m_buf + m_length; }
|
||||
char *getBufPtr() { return m_buf + m_length; }
|
||||
char *getBufCursor() { return m_buf + m_length; }
|
||||
char *getBufStart() { return m_buf; }
|
||||
char *getBufEnd() { return m_buf + m_capacity; }
|
||||
int32_t getCapacity() { return m_capacity; }
|
||||
|
@ -362,7 +362,9 @@ bool SiteGetter::gotSiteList ( ) {
|
||||
// mark it so caller knows
|
||||
m_errno = g_errno;
|
||||
// so try again without increasing m_pathDepth
|
||||
m_tryAgain = true;
|
||||
// i've seen a host return EBADRDBID for some reason
|
||||
// and put host #0 in an infinite log spam loop so stop it
|
||||
if ( g_errno != EBADRDBID ) m_tryAgain = true;
|
||||
return true;
|
||||
}
|
||||
// how many urls at this path depth?
|
||||
|
18
Spider.cpp
18
Spider.cpp
@ -652,9 +652,7 @@ bool Spiderdb::init ( ) {
|
||||
if ( ! m_pc.init ( "spiderdb",
|
||||
RDB_SPIDERDB ,
|
||||
pcmem ,
|
||||
pageSize ,
|
||||
false , // use shared mem?
|
||||
false )) // minimizeDiskSeeks?
|
||||
pageSize ))
|
||||
return log(LOG_INIT,"spiderdb: Init failed.");
|
||||
|
||||
// initialize our own internal rdb
|
||||
@ -854,9 +852,7 @@ bool Doledb::init ( ) {
|
||||
if ( ! m_pc.init ( "doledb" ,
|
||||
RDB_DOLEDB ,
|
||||
pcmem ,
|
||||
pageSize ,
|
||||
true , // use shared mem?
|
||||
false )) // minimizeDiskSeeks?
|
||||
pageSize ))
|
||||
return log(LOG_INIT,"doledb: Init failed.");
|
||||
|
||||
// initialize our own internal rdb
|
||||
@ -6340,6 +6336,8 @@ void SpiderLoop::spiderDoledUrls ( ) {
|
||||
|
||||
subloop:
|
||||
|
||||
QUICKPOLL(MAX_NICENESS);
|
||||
|
||||
// must be spidering to dole out
|
||||
if ( ! g_conf.m_spideringEnabled ) return;
|
||||
// or if trying to exit
|
||||
@ -6420,6 +6418,8 @@ void SpiderLoop::spiderDoledUrls ( ) {
|
||||
|
||||
subloopNextPriority:
|
||||
|
||||
QUICKPOLL(MAX_NICENESS);
|
||||
|
||||
// wrap it if we should
|
||||
//if ( m_cri >= g_collectiondb.m_numRecs ) m_cri = 0;
|
||||
// get rec
|
||||
@ -6679,6 +6679,8 @@ void SpiderLoop::spiderDoledUrls ( ) {
|
||||
|
||||
loop:
|
||||
|
||||
QUICKPOLL(MAX_NICENESS);
|
||||
|
||||
// shortcut
|
||||
//CrawlInfo *ci = &cr->m_localCrawlInfo;
|
||||
ci = &cr->m_localCrawlInfo;
|
||||
@ -7534,7 +7536,7 @@ bool SpiderLoop::spiderUrl9 ( SpiderRequest *sreq ,
|
||||
// this causes us to dead lock when spiders use up all the mem, and
|
||||
// file merge operation can not get any, and spiders need to add to
|
||||
// titledb but can not until the merge completes!!
|
||||
if ( g_mem.m_maxMem - g_mem.m_used < 25*1024*1024 ) {
|
||||
if ( g_conf.m_maxMem - g_mem.m_used < 25*1024*1024 ) {
|
||||
static int32_t s_lastTime = 0;
|
||||
static int32_t s_missed = 0;
|
||||
s_missed++;
|
||||
@ -7543,7 +7545,7 @@ bool SpiderLoop::spiderUrl9 ( SpiderRequest *sreq ,
|
||||
if ( now - s_lastTime > 10 ) {
|
||||
log("spider: Need 25MB of free mem to launch spider, "
|
||||
"only have %"INT64". Failed to launch %"INT32" times so "
|
||||
"far.", g_mem.m_maxMem - g_mem.m_used , s_missed );
|
||||
"far.", g_conf.m_maxMem - g_mem.m_used , s_missed );
|
||||
s_lastTime = now;
|
||||
}
|
||||
}
|
||||
|
@ -249,6 +249,10 @@ bool resetProxyStats ( ) {
|
||||
// save the stats
|
||||
bool saveSpiderProxyStats ( ) {
|
||||
|
||||
// do not save if coring in a malloc/free because we call malloc/free
|
||||
// below to save stuff possibly
|
||||
if ( g_inMemFunction ) return true;
|
||||
|
||||
// save hashtable
|
||||
s_proxyBannedTable.save(g_hostdb.m_dir,"proxybantable.dat");
|
||||
|
||||
|
@ -1349,10 +1349,10 @@ void Syncdb::syncStart_r ( bool amThread ) {
|
||||
for ( int32_t m = 0 ; m < f->m_numParts ; m++ ) {
|
||||
|
||||
// get part file
|
||||
File *p = f->m_files[m];
|
||||
File *p = f->getFile2(m);//m_files[m];
|
||||
// copy that
|
||||
sprintf ( cmd , "rcp %s %s:%scoll.%s.%"INT32"/'",
|
||||
p->m_filename,ips,dir,coll,collnum);
|
||||
p->getFilename(),ips,dir,coll,collnum);
|
||||
// excecute
|
||||
log ( LOG_INFO, "sync: %s", cmd );
|
||||
// MDW: take out for now
|
||||
|
5
Tagdb.h
5
Tagdb.h
@ -182,7 +182,10 @@ class TagRec {
|
||||
// advance
|
||||
current += recSize;
|
||||
// sanity check
|
||||
if ( recSize > 500000 ) { char *xx=NULL;*xx=0;}
|
||||
if ( recSize > 500000 || recSize < 12 ) {
|
||||
log("tagdb: corrupt tag recsize %i",(int)recSize);
|
||||
return NULL;
|
||||
char *xx=NULL;*xx=0;}
|
||||
// breach list?
|
||||
if ( current < m_listPtrs[i]->m_listEnd) return (Tag *)current;
|
||||
// advance list
|
||||
|
@ -268,7 +268,7 @@ void timePollWrapper ( int fd , void *state ) {
|
||||
THIS->closeLeastUsed( 60 );
|
||||
}
|
||||
|
||||
bool TcpServer::testBind ( uint16_t port ) {
|
||||
bool TcpServer::testBind ( uint16_t port , bool printMsg ) {
|
||||
// assign port for the test
|
||||
m_port = port;
|
||||
// sockaddr_in provides interface to sockaddr
|
||||
@ -321,6 +321,8 @@ retry19:
|
||||
g_errno = errno;
|
||||
//if ( g_errno == EINVAL ) { port++; goto again; }
|
||||
close ( m_sock );
|
||||
if ( ! printMsg )
|
||||
return false;
|
||||
fprintf(stderr,"Failed to bind socket on port %"INT32": %s."
|
||||
"\n"
|
||||
"Are you already running gb?\n"
|
||||
@ -2855,7 +2857,10 @@ int TcpServer::sslHandshake ( TcpSocket *s ) {
|
||||
SSL_set_connect_state(s->m_ssl);
|
||||
}
|
||||
|
||||
// SSL_connect() calls malloc()
|
||||
g_inMemFunction = true;
|
||||
int r = SSL_connect(s->m_ssl);
|
||||
g_inMemFunction = false;
|
||||
|
||||
if ( g_conf.m_logDebugTcp )
|
||||
log("tcp: ssl handshake on sd=%"INT32" r=%i",
|
||||
|
@ -65,7 +65,7 @@ class TcpServer {
|
||||
//int32_t maxReadBufSize = 128*1024 ,
|
||||
//int32_t maxSendBufSize = 128*1024 );
|
||||
|
||||
bool testBind ( uint16_t port ) ;
|
||||
bool testBind ( uint16_t port , bool printMsg ) ;
|
||||
|
||||
// . returns false if blocked, true otherwise
|
||||
// . sets errno on error
|
||||
|
@ -1984,13 +1984,13 @@ bool ThreadQueue::launchThread2 ( ThreadEntry *te ) {
|
||||
// . we know the stored File is still around because of that
|
||||
bool doWrite = fs->m_doWrite;
|
||||
BigFile *bb = fs->m_this;
|
||||
fs->m_fd1 = bb->getfd (fs->m_filenum1, !doWrite, &fs->m_vfd1);
|
||||
fs->m_fd2 = bb->getfd (fs->m_filenum2, !doWrite, &fs->m_vfd2);
|
||||
fs->m_fd1 = bb->getfd (fs->m_filenum1,!doWrite);//&fs->m_vfd1);
|
||||
fs->m_fd2 = bb->getfd (fs->m_filenum2,!doWrite);//&fs->m_vfd2);
|
||||
// is this bad?
|
||||
if ( fs->m_fd1 < 0 ) log("disk: fd1 is %i for %s",
|
||||
fs->m_fd1,bb->m_baseFilename);
|
||||
fs->m_fd1,bb->getFilename());
|
||||
if ( fs->m_fd2 < 0 ) log("disk: fd2 is %i for %s.",
|
||||
fs->m_fd2,bb->m_baseFilename);
|
||||
fs->m_fd2,bb->getFilename());
|
||||
fs->m_closeCount1 = getCloseCount_r ( fs->m_fd1 );
|
||||
fs->m_closeCount2 = getCloseCount_r ( fs->m_fd2 );
|
||||
}
|
||||
|
@ -17,7 +17,8 @@
|
||||
//#define TITLEREC_CURRENT_VERSION 118
|
||||
// add new link stats into LinkInfo
|
||||
//#define TITLEREC_CURRENT_VERSION 119
|
||||
#define TITLEREC_CURRENT_VERSION 120
|
||||
//#define TITLEREC_CURRENT_VERSION 120
|
||||
#define TITLEREC_CURRENT_VERSION 121
|
||||
|
||||
#include "Rdb.h"
|
||||
#include "Url.h"
|
||||
|
@ -114,6 +114,7 @@ Users::~Users(){
|
||||
}
|
||||
|
||||
bool Users::save(){
|
||||
return true;
|
||||
if ( ! m_needsSave ) return true;
|
||||
if ( ! m_loginTable.save(g_hostdb.m_dir,"userlogin.dat",NULL,0) )
|
||||
return log("users: userlogin.dat save failed");
|
||||
|
160
Xml.cpp
160
Xml.cpp
@ -427,14 +427,152 @@ bool Xml::set ( char *s ,
|
||||
}
|
||||
// ok, we got a <script> tag now
|
||||
m_numNodes++;
|
||||
|
||||
// use this for parsing consistency when deleting records
|
||||
// so they equal what we added.
|
||||
bool newVersion = true;
|
||||
if ( version <= 120 ) newVersion = false;
|
||||
//newVersion = false;
|
||||
|
||||
// retry:
|
||||
// scan for </script>
|
||||
char *pstart = &m_xml[i];
|
||||
char *p = pstart;
|
||||
char *pend = &m_xml[0] + m_xmlLen;
|
||||
bool inDoubles = false;
|
||||
bool inSingles = false;
|
||||
bool inComment1 = false;
|
||||
bool inComment2 = false;
|
||||
bool inComment3 = false;
|
||||
bool inComment4 = false;
|
||||
bool escaped = false;
|
||||
//bool newLine = false;
|
||||
// bool foo = false;
|
||||
// if ( m_xmlLen == 13257 ) { //pstart - m_xml == 88881 ) {
|
||||
// foo = true;
|
||||
// }
|
||||
// scan -- 5 continues -- node 1570 is text of script
|
||||
for ( ; p < pend ; p++ ) {
|
||||
// breathe
|
||||
QUICKPOLL(m_niceness);
|
||||
//
|
||||
// adding these new quote checks may cause a few
|
||||
// parsing inconsistencies for pages a hanful of pages
|
||||
//
|
||||
// windows-based html pages use 13 sometimes and no
|
||||
// \n at all...
|
||||
if ( p[0] =='\n' || p[0] == 13 ) { // ^m = 13 = CR
|
||||
//newLine = true;
|
||||
inComment1 = false;
|
||||
}
|
||||
if ( p[0] == '\\' ) {
|
||||
escaped = ! escaped;
|
||||
continue;
|
||||
}
|
||||
//if ( newLine && is_wspace_a(p[0]) )
|
||||
// continue;
|
||||
if ( p[0] == '<' && p[1] == '!' &&
|
||||
p[2] == '-' && p[2] == '-' &&
|
||||
! inSingles && ! inDoubles &&
|
||||
! inComment1 &&
|
||||
! inComment2 &&
|
||||
! inComment4 )
|
||||
inComment3 = true;
|
||||
if ( p[0] == '-' && p[1] == '-' &&
|
||||
p[2] == '>' &&
|
||||
inComment3 )
|
||||
inComment3 = false;
|
||||
// no. i saw <script>//</script> and </script> was
|
||||
// not considered to be in a comment
|
||||
if ( p[0] == '/' && p[1]=='/'&&
|
||||
! inSingles && ! inDoubles &&
|
||||
! inComment2 &&
|
||||
! inComment3 &&
|
||||
// allow for "//<![CDATA[..." to end in
|
||||
// "//]]>" so ignore if inComment4 is true.
|
||||
// i'd say these are the weaker of all 4
|
||||
// comment types in that regard.
|
||||
! inComment4 )
|
||||
inComment1 = true;
|
||||
// handle /* */ comments
|
||||
if ( p[0] == '/' && p[1]=='*' &&
|
||||
! inSingles && ! inDoubles &&
|
||||
! inComment1 &&
|
||||
! inComment3 &&
|
||||
! inComment4 )
|
||||
inComment2 = true;
|
||||
// <![CDATA[...]]> "comments" in <script> tags
|
||||
// are common. CDATA tags seem to prevail even if
|
||||
// within another comment tag, like i am seeing
|
||||
// "//<![CDATA[..." a lot.
|
||||
if ( p[0] == '<' &&
|
||||
p[1] == '!' &&
|
||||
p[2] == '[' &&
|
||||
p[3] == 'C' &&
|
||||
p[4] == 'D' &&
|
||||
p[5] == 'A' &&
|
||||
p[6] == 'T' &&
|
||||
p[7] == 'A' &&
|
||||
p[8] == '['
|
||||
//! inComment1 &&
|
||||
//! inComment2 &&
|
||||
//! inComment3 )
|
||||
)
|
||||
inComment4 = true;
|
||||
if ( p[0] == ']' &&
|
||||
p[1] == ']' &&
|
||||
p[2] == '>' )
|
||||
inComment4 = false;
|
||||
if ( p[0] == '*' &&
|
||||
p[1]=='/' &&
|
||||
! inComment4 )
|
||||
inComment2 = false;
|
||||
// no longer the start of a newLine
|
||||
//newLine = false;
|
||||
// don't check for quotes or </script> if in comment
|
||||
// no, if've seen <script>//</script> on ibm.com pages,
|
||||
// so just ignore ' and " for // comments
|
||||
if ( inComment1 && newVersion ) {
|
||||
escaped = false;
|
||||
//continue;
|
||||
}
|
||||
if ( inComment2 && newVersion ) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
if ( inComment3 && newVersion ) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
if ( inComment4 && newVersion ) {
|
||||
escaped = false;
|
||||
continue;
|
||||
}
|
||||
// if an unescaped double quote
|
||||
if ( p[0] == '\"' && ! escaped && ! inSingles &&
|
||||
// i've seen <script>//</script> on ibm.com pages,
|
||||
// so just ignore ' and " for // comments
|
||||
! inComment1 )
|
||||
inDoubles = ! inDoubles;
|
||||
// if an unescaped single quote.
|
||||
if ( p[0] == '\'' && ! escaped && ! inDoubles &&
|
||||
// i've seen <script>//</script> on ibm.com pages,
|
||||
// so just ignore ' and " for // comments
|
||||
! inComment1 )
|
||||
inSingles = ! inSingles;
|
||||
// no longer escaped
|
||||
escaped = false;
|
||||
// if ( foo ) {
|
||||
// fprintf(stderr,"%c [%lu](inDoubles=%i,"
|
||||
// "inSingles=%i)\n",*p,
|
||||
// (unsigned long)(uint8_t)*p,
|
||||
// (int)inDoubles,
|
||||
// (int)inSingles);
|
||||
// }
|
||||
// if ( inSingles )
|
||||
// continue;
|
||||
// if ( inDoubles )
|
||||
// continue;
|
||||
// keep going if not a tag
|
||||
if ( p[0] != '<' ) continue;
|
||||
// </script> or </gbframe> stops it
|
||||
@ -444,8 +582,11 @@ bool Xml::set ( char *s ,
|
||||
to_lower_a(p[4]) == 'r' &&
|
||||
to_lower_a(p[5]) == 'i' &&
|
||||
to_lower_a(p[6]) == 'p' &&
|
||||
to_lower_a(p[7]) == 't' )
|
||||
to_lower_a(p[7]) == 't' ) {
|
||||
if((inDoubles||inSingles)&& newVersion)
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
if ( to_lower_a(p[2]) == 'g' &&
|
||||
to_lower_a(p[3]) == 'b' &&
|
||||
to_lower_a(p[4]) == 'f' &&
|
||||
@ -460,14 +601,29 @@ bool Xml::set ( char *s ,
|
||||
to_lower_a(p[3]) == 'r' &&
|
||||
to_lower_a(p[4]) == 'i' &&
|
||||
to_lower_a(p[5]) == 'p' &&
|
||||
to_lower_a(p[6]) == 't' )
|
||||
to_lower_a(p[6]) == 't' ) {
|
||||
if ( (inDoubles || inSingles) && newVersion )
|
||||
continue;
|
||||
break;
|
||||
}
|
||||
}
|
||||
// if ( foo )
|
||||
// log("done");
|
||||
// make sure we do not breach! i saw this happen once!
|
||||
if ( m_numNodes >= m_maxNumNodes ) break;
|
||||
// was it like <script></script> then no scripttext tag?
|
||||
if ( p - pstart == 0 )
|
||||
continue;
|
||||
|
||||
// none found? allow for </script> in quotes then, maybe
|
||||
// they were unbalanced quotes. also allow for </script>
|
||||
// in a comment. do we need to do this? just enable it if
|
||||
// we find a page that needs it.
|
||||
// if ( p == pend && newVersion ) {
|
||||
// newVersion = false;
|
||||
// goto retry;
|
||||
// }
|
||||
|
||||
XmlNode *xn = &m_nodes[m_numNodes++];
|
||||
xn->m_nodeId = TAG_SCRIPTTEXT;//0; // TEXT NODE
|
||||
xn->m_node = pstart;
|
||||
|
49
XmlDoc.cpp
49
XmlDoc.cpp
@ -2505,6 +2505,10 @@ bool XmlDoc::indexDoc ( ) {
|
||||
if ( g_errno == ESHUTTINGDOWN )
|
||||
return true;
|
||||
|
||||
// i saw this on shard 9, how is it happening
|
||||
if ( g_errno == EBADRDBID )
|
||||
return true;
|
||||
|
||||
// if docid not found when trying to do a query reindex...
|
||||
// this really shouldn't happen but i think we were adding
|
||||
// additional SpiderRequests since we were using a fake first ip.
|
||||
@ -6699,6 +6703,9 @@ Xml *XmlDoc::getXml ( ) {
|
||||
// return it if it is set
|
||||
if ( m_xmlValid ) return &m_xml;
|
||||
|
||||
// note it
|
||||
setStatus ( "parsing html");
|
||||
|
||||
// get the filtered content
|
||||
char **u8 = getUtf8Content();
|
||||
if ( ! u8 || u8 == (char **)-1 ) return (Xml *)u8;
|
||||
@ -6707,8 +6714,6 @@ Xml *XmlDoc::getXml ( ) {
|
||||
uint8_t *ct = getContentType();
|
||||
if ( ! ct || ct == (void *)-1 ) return (Xml *)ct;
|
||||
|
||||
// note it
|
||||
setStatus ( "getting xml");
|
||||
// set it
|
||||
if ( ! m_xml.set ( *u8 ,
|
||||
u8len ,
|
||||
@ -7501,6 +7506,8 @@ Sections *XmlDoc::getImpliedSections ( ) {
|
||||
// add in Section::m_sentFlags bits having to do with our voting tables
|
||||
Sections *XmlDoc::getSections ( ) {
|
||||
|
||||
setStatus("getting sections");
|
||||
|
||||
// get the sections without implied sections
|
||||
Sections *ss = getImpliedSections();
|
||||
if ( ! ss || ss==(void *)-1) return (Sections *)ss;
|
||||
@ -10014,6 +10021,8 @@ char *XmlDoc::getIsDup ( ) {
|
||||
return &m_isDup;
|
||||
}
|
||||
|
||||
setStatus ( "checking for dups" );
|
||||
|
||||
// BUT if we are already indexed and a a crawlbot/bulk diffbot job
|
||||
// then do not kick us out just because another indexed doc is
|
||||
// a dup of us because it messes up the TestOnlyProcessIfNew smoketests
|
||||
@ -10047,8 +10056,6 @@ char *XmlDoc::getIsDup ( ) {
|
||||
// sanity. must be posdb list.
|
||||
if ( ! list->isEmpty() && list->m_ks != 18 ) { char *xx=NULL;*xx=0;}
|
||||
|
||||
setStatus ( "checking for dups" );
|
||||
|
||||
// . see if there are any pages that seem like they are dups of us
|
||||
// . they must also have a HIGHER score than us, for us to be
|
||||
// considered the dup
|
||||
@ -13794,6 +13801,30 @@ int32_t *XmlDoc::getSiteNumInlinks ( ) {
|
||||
// sanity check
|
||||
if ( m_setFromTitleRec && ! m_useSecondaryRdbs) {char *xx=NULL;*xx=0;}
|
||||
|
||||
CollectionRec *cr = getCollRec();
|
||||
if ( ! cr ) return NULL;
|
||||
|
||||
// hacks of speed. computeSiteNumInlinks is true by default
|
||||
// but if the user turns it off the just use sitelinks.txt
|
||||
if ( ! cr->m_computeSiteNumInlinks ) {
|
||||
int32_t hostHash32 = getHostHash32a();
|
||||
int32_t min = g_tagdb.getMinSiteInlinks ( hostHash32 );
|
||||
// try with www if not there
|
||||
if ( min < 0 && ! m_firstUrl.hasSubdomain() ) {
|
||||
int32_t wwwHash32 = m_firstUrl.getHash32WithWWW();
|
||||
min = g_tagdb.getMinSiteInlinks ( wwwHash32 );
|
||||
}
|
||||
// if still not in sitelinks.txt, just use 0
|
||||
if ( min < 0 ) {
|
||||
m_siteNumInlinksValid = true;
|
||||
m_siteNumInlinks = 0;
|
||||
return &m_siteNumInlinks;
|
||||
}
|
||||
m_siteNumInlinks = min;
|
||||
m_siteNumInlinksValid = true;
|
||||
return &m_siteNumInlinks;
|
||||
}
|
||||
|
||||
setStatus ( "getting site num inlinks");
|
||||
|
||||
// get it from the tag rec if we can
|
||||
@ -13831,9 +13862,6 @@ int32_t *XmlDoc::getSiteNumInlinks ( ) {
|
||||
if ( ! wfts ) return NULL;
|
||||
if ( wfts == -1 ) return (int32_t *)-1;
|
||||
|
||||
CollectionRec *cr = getCollRec();
|
||||
if ( ! cr ) return NULL;
|
||||
|
||||
setStatus ( "getting site num inlinks");
|
||||
// check the tag first
|
||||
Tag *tag = gr->getTag ("sitenuminlinks");
|
||||
@ -15423,7 +15451,7 @@ void gotDiffbotReplyWrapper ( void *state , TcpSocket *s ) {
|
||||
|
||||
// set the mime
|
||||
HttpMime mime;
|
||||
if ( s->m_readOffset>0 &&
|
||||
if ( ! hadError && s && s->m_readOffset>0 &&
|
||||
// set location url to "null"
|
||||
! mime.set ( s->m_readBuf , s->m_readOffset , NULL ) ) {
|
||||
// g_errno should be set
|
||||
@ -19316,6 +19344,9 @@ File *XmlDoc::getUtf8ContentInFile ( int64_t *fileSizeArg ) {
|
||||
//int32_t loaded = tmp.load ( "/home/mwells/.config/internetarchive.yml");
|
||||
int32_t loaded = tmp.load ( "auth/internetarchive.yml");
|
||||
if(loaded <= 0) {
|
||||
log("gb: failed to load auth/internetarchive.yml");
|
||||
g_errno = EDOCTOOBIG;
|
||||
return NULL;
|
||||
// FIXME
|
||||
char *xx=NULL;*xx=0;
|
||||
}
|
||||
@ -19394,6 +19425,8 @@ char **XmlDoc::getUtf8Content ( ) {
|
||||
CollectionRec *cr = getCollRec();
|
||||
if ( ! cr ) return NULL;
|
||||
|
||||
setStatus("getting utf8 content");
|
||||
|
||||
// recycle?
|
||||
if ( cr->m_recycleContent || m_recycleContent ||
|
||||
// if trying to delete from index, load from old titlerec
|
||||
|
@ -37,12 +37,12 @@ int startup ( void *state ) {
|
||||
int bytes = read(-9,buf,5);
|
||||
//errno = 7; // E2BIG;
|
||||
//assert ( errno && bytes == -1 );
|
||||
g_errno = errno;
|
||||
//g_errno = errno;
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
errno = 10; // EINVAL;
|
||||
//errno = 10; // EINVAL;
|
||||
g_errno = 10;
|
||||
char stack[10000];
|
||||
pid_t pid = clone( startup ,
|
||||
@ -53,8 +53,8 @@ int main() {
|
||||
int status;
|
||||
waitpid ( pid , &status, 0 );
|
||||
|
||||
if ( s_called ) fprintf(stderr,"__errno_location() was called %i "
|
||||
"times\n",s_called);
|
||||
fprintf(stderr,"__errno_location() was called %i "
|
||||
"times\n",s_called);
|
||||
|
||||
if ( errno != 10 ) fprintf(stderr,"errno=%i (failed)\n",errno);
|
||||
else fprintf(stderr,"errno=%i (success)\n",errno);
|
||||
|
89
main.cpp
89
main.cpp
@ -77,7 +77,7 @@
|
||||
#include "Msg9b.h"
|
||||
#include "Msg17.h"
|
||||
//#include "Msg34.h"
|
||||
#include "Msg35.h"
|
||||
//#include "Msg35.h"
|
||||
//#include "Msg24.h"
|
||||
//#include "Msg28.h"
|
||||
//#include "Msg30.h"
|
||||
@ -373,6 +373,7 @@ extern void resetQuery ( );
|
||||
extern void resetStopWords ( );
|
||||
extern void resetUnicode ( );
|
||||
|
||||
extern void tryToSyncWrapper ( int fd , void *state ) ;
|
||||
|
||||
#if 0
|
||||
void stack_test();
|
||||
@ -1357,6 +1358,18 @@ int main2 ( int argc , char *argv[] ) {
|
||||
}
|
||||
*/
|
||||
|
||||
if ( strcmp ( cmd ,"isportinuse") == 0 ) {
|
||||
if ( cmdarg+1 >= argc ) goto printHelp;
|
||||
int port = atol ( argv[cmdarg+1] );
|
||||
// make sure port is available. returns false if in use.
|
||||
if ( ! g_httpServer.m_tcp.testBind(port,false) )
|
||||
// and we should return with 1 so the keep alive
|
||||
// script will exit
|
||||
exit (1);
|
||||
// port is not in use, return 0
|
||||
exit(0);
|
||||
}
|
||||
|
||||
// need threads here for tests?
|
||||
|
||||
// gb thrutest <testDir> <fileSize>
|
||||
@ -1805,7 +1818,7 @@ int main2 ( int argc , char *argv[] ) {
|
||||
// Load categories and generate country table
|
||||
char structureFile[256];
|
||||
g_conf.m_maxMem = 1000000000LL; // 1G
|
||||
g_mem.m_maxMem = 1000000000LL; // 1G
|
||||
//g_mem.m_maxMem = 1000000000LL; // 1G
|
||||
sprintf(structureFile, "%scatdb/gbdmoz.structure.dat", g_hostdb.m_dir);
|
||||
g_categories = &g_categories1;
|
||||
if (g_categories->loadCategories(structureFile) != 0) {
|
||||
@ -2396,7 +2409,7 @@ int main2 ( int argc , char *argv[] ) {
|
||||
if ( strcmp ( cmd , "freecache" ) == 0 ) {
|
||||
int32_t max = 7000000;
|
||||
if ( cmdarg + 1 < argc ) max = atoi ( argv[cmdarg+1] );
|
||||
freeAllSharedMem( max );
|
||||
//freeAllSharedMem( max );
|
||||
return true;
|
||||
}
|
||||
|
||||
@ -3047,7 +3060,8 @@ int main2 ( int argc , char *argv[] ) {
|
||||
// make sure port is available, no use loading everything up then
|
||||
// failing because another process is already running using this port
|
||||
//if ( ! g_udpServer.testBind ( g_hostdb.getMyPort() ) )
|
||||
if ( ! g_httpServer.m_tcp.testBind(g_hostdb.getMyHost()->m_httpPort))
|
||||
if ( ! g_httpServer.m_tcp.testBind(g_hostdb.getMyHost()->m_httpPort,
|
||||
true)) // printmsg?
|
||||
return 1;
|
||||
|
||||
int32_t *ips;
|
||||
@ -3453,9 +3467,15 @@ int main2 ( int argc , char *argv[] ) {
|
||||
//}
|
||||
|
||||
// test all collection dirs for write permission -- metalincs' request
|
||||
int32_t pcount = 0;
|
||||
for ( int32_t i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
|
||||
CollectionRec *cr = g_collectiondb.m_recs[i];
|
||||
if ( ! cr ) continue;
|
||||
if ( ++pcount >= 100 ) {
|
||||
log("rdb: not checking directory permission for "
|
||||
"more than first 100 collections to save time.");
|
||||
break;
|
||||
}
|
||||
char tt[1024 + MAX_COLL_LEN ];
|
||||
sprintf ( tt , "%scoll.%s.%"INT32"",
|
||||
g_hostdb.m_dir, cr->m_coll , (int32_t)cr->m_collnum );
|
||||
@ -3838,7 +3858,8 @@ int main2 ( int argc , char *argv[] ) {
|
||||
// . put this in here instead of Rdb.cpp because we don't want
|
||||
// generator commands merging on us
|
||||
// . the (void *)1 prevents gb from logging merge info every 2 seconds
|
||||
if ( ! g_loop.registerSleepCallback(2000,(void *)1,attemptMergeAll))
|
||||
// . niceness is 1
|
||||
if ( ! g_loop.registerSleepCallback(2000,(void *)1,attemptMergeAll,1))
|
||||
log("db: Failed to init merge sleep callback.");
|
||||
|
||||
// SEO MODULE
|
||||
@ -3848,7 +3869,9 @@ int main2 ( int argc , char *argv[] ) {
|
||||
! g_loop.registerSleepCallback(2000,(void *)1,runSEOQueryLoop))
|
||||
log("db: Failed to register seo query loop");
|
||||
|
||||
|
||||
// try to sync parms (and collection recs) with host 0
|
||||
if ( ! g_loop.registerSleepCallback(1000,NULL,tryToSyncWrapper,0))
|
||||
return false;
|
||||
|
||||
//if( !g_loop.registerSleepCallback(2000,(void *)1,controlDumpTopDocs) )
|
||||
// log("db: Failed to init dump TopDocs sleep callback.");
|
||||
@ -3866,11 +3889,11 @@ int main2 ( int argc , char *argv[] ) {
|
||||
//msg3e.checkForNewParms();
|
||||
|
||||
// this stuff is similar to alden's msg3e but will sync collections
|
||||
// that were added/deleted
|
||||
if ( ! g_parms.syncParmsWithHost0() ) {
|
||||
log("parms: error syncing parms: %s",mstrerror(g_errno));
|
||||
return 0;
|
||||
}
|
||||
// that were added/deletede
|
||||
//if ( ! g_parms.syncParmsWithHost0() ) {
|
||||
// log("parms: error syncing parms: %s",mstrerror(g_errno));
|
||||
// return 0;
|
||||
//}
|
||||
|
||||
|
||||
if(g_recoveryMode) {
|
||||
@ -3896,6 +3919,7 @@ int main2 ( int argc , char *argv[] ) {
|
||||
|
||||
Json json;
|
||||
json.test();
|
||||
json.reset();
|
||||
|
||||
// . start the spiderloop
|
||||
// . comment out when testing SpiderCache
|
||||
@ -5191,6 +5215,23 @@ int install ( install_flag_konst_t installFlag , int32_t hostId , char *dir ,
|
||||
"while [ \\$EXITSTATUS != 0 ]; do "
|
||||
"{ "
|
||||
|
||||
// if gb still running, then do not try to
|
||||
// run it again. we
|
||||
// probably double-called './gb start'.
|
||||
// so see if the port is bound to.
|
||||
"./gb isportinuse %i ; "
|
||||
"if [ \\$? -eq 1 ] ; then "
|
||||
"echo \"gb or something else "
|
||||
"is already running on "
|
||||
"port %i. Not starting.\" ; "
|
||||
"exit 0; "
|
||||
"fi ; "
|
||||
|
||||
// ok, the port is available
|
||||
//"echo \"Starting gb\"; "
|
||||
|
||||
//"exit 0; "
|
||||
|
||||
// in case gb was updated...
|
||||
"mv -f gb.installed gb ; "
|
||||
|
||||
@ -5211,11 +5252,16 @@ int install ( install_flag_konst_t installFlag , int32_t hostId , char *dir ,
|
||||
"ADDARGS='-r'\\$INC ; "
|
||||
"INC=\\$((INC+1));"
|
||||
"} "
|
||||
"done >& /dev/null & \" %s",
|
||||
//"done >& /dev/null & \" %s",
|
||||
"done & \" %s",
|
||||
//"\" %s",
|
||||
iptoa(h2->m_ip),
|
||||
h2->m_dir ,
|
||||
|
||||
// for ./gb isportinuse %i
|
||||
h2->m_httpPort ,
|
||||
h2->m_httpPort ,
|
||||
|
||||
// for moving log file
|
||||
h2->m_hostId ,
|
||||
h2->m_hostId ,
|
||||
@ -5840,7 +5886,7 @@ bool registerMsgHandlers2(){
|
||||
bool registerMsgHandlers3(){
|
||||
Msg17 msg17; if ( ! msg17.registerHandler () ) return false;
|
||||
//Msg34 msg34; if ( ! msg34.registerHandler () ) return false;
|
||||
Msg35 msg35; if ( ! msg35.registerHandler () ) return false;
|
||||
//Msg35 msg35; if ( ! msg35.registerHandler () ) return false;
|
||||
//Msg24 msg24; if ( ! msg24.registerHandler () ) return false;
|
||||
//Msg40 msg40; if ( ! msg40.registerHandler () ) return false;
|
||||
//MsgB msgb; if ( ! msgb.registerHandler () ) return false;
|
||||
@ -10726,7 +10772,7 @@ bool gbgunzip (char *filename) {
|
||||
// time speed of inserts into RdbTree for indexdb
|
||||
bool bucketstest ( char* dbname ) {
|
||||
g_conf.m_maxMem = 2000000000LL; // 2G
|
||||
g_mem.m_maxMem = 2000000000LL; // 2G
|
||||
//g_mem.m_maxMem = 2000000000LL; // 2G
|
||||
|
||||
|
||||
if ( dbname ) {
|
||||
@ -12223,7 +12269,7 @@ void dumpTagdb (char *coll,int32_t startFileNum,int32_t numFiles,
|
||||
|
||||
bool parseTest ( char *coll , int64_t docId , char *query ) {
|
||||
g_conf.m_maxMem = 2000000000LL; // 2G
|
||||
g_mem.m_maxMem = 2000000000LL; // 2G
|
||||
//g_mem.m_maxMem = 2000000000LL; // 2G
|
||||
//g_conf.m_checksumdbMaxDiskPageCacheMem = 0;
|
||||
//g_conf.m_spiderdbMaxDiskPageCacheMem = 0;
|
||||
g_conf.m_tfndbMaxDiskPageCacheMem = 0;
|
||||
@ -14546,7 +14592,8 @@ int injectFile ( char *filename , char *ips ,
|
||||
int64_t startDocId = 0LL;
|
||||
int64_t endDocId = MAX_DOCID;
|
||||
|
||||
g_mem.init ( 4000000000LL );
|
||||
g_conf.m_maxMem = 4000000000LL;
|
||||
g_mem.init ( );//4000000000LL );
|
||||
|
||||
// set up the loop
|
||||
if ( ! g_loop.init() ) return log("build: inject: Loop init "
|
||||
@ -16324,8 +16371,8 @@ bool memTest() {
|
||||
// if ( ! g_log.init( "./memlog" ) ) {//g_hostdb.m_logFilename ) ) {
|
||||
// fprintf (stderr,"db: Log file init failed.\n" ); return 1; }
|
||||
//g_mem.init(0xffffffff);
|
||||
g_mem.m_maxMem = 0xffffffffLL;
|
||||
g_mem.init( g_mem.m_maxMem );
|
||||
g_conf.m_maxMem = 0xffffffffLL;
|
||||
g_mem.init( );//g_mem.m_maxMem );
|
||||
|
||||
|
||||
fprintf(stderr, "memtest: Testing memory bus bandwidth.\n");
|
||||
@ -16343,7 +16390,7 @@ bool memTest() {
|
||||
membustest ( 8000 , 100000 , true );
|
||||
|
||||
fprintf(stderr, "memtest: Allocating up to %"INT64" bytes\n",
|
||||
g_mem.m_maxMem);
|
||||
g_conf.m_maxMem);
|
||||
for (i=0;i<4096;i++) {
|
||||
ptrs[numPtrs] = mmalloc(1024*1024, "memtest");
|
||||
if (!ptrs[numPtrs]) break;
|
||||
@ -16353,7 +16400,7 @@ bool memTest() {
|
||||
fprintf(stderr, "memtest: Was able to allocate %"INT64" bytes of a "
|
||||
"total of "
|
||||
"%"INT64" bytes of memory attempted.\n",
|
||||
g_mem.m_used,g_mem.m_maxMem);
|
||||
g_mem.m_used,g_conf.m_maxMem);
|
||||
|
||||
return true;
|
||||
|
||||
@ -16483,7 +16530,7 @@ void membustest ( int32_t nb , int32_t loops , bool readf ) {
|
||||
bool cacheTest() {
|
||||
|
||||
g_conf.m_maxMem = 2000000000LL; // 2G
|
||||
g_mem.m_maxMem = 2000000000LL; // 2G
|
||||
//g_mem.m_maxMem = 2000000000LL; // 2G
|
||||
|
||||
hashinit();
|
||||
|
||||
|
7
qa.cpp
7
qa.cpp
@ -476,6 +476,9 @@ void processReply ( char *reply , int32_t replyLen ) {
|
||||
|
||||
g_numErrors++;
|
||||
|
||||
SafeBuf he;
|
||||
he.htmlEncode ( s_url.getUrl() );
|
||||
|
||||
g_qaOutput.safePrintf("<b style=color:red;>FAILED TEST</b><br>%s : "
|
||||
"<a href=%s>%s</a> (urlhash=%"UINT32")<br>"
|
||||
|
||||
@ -496,7 +499,7 @@ void processReply ( char *reply , int32_t replyLen ) {
|
||||
"<pre id=%"UINT32" style=background-color:0xffffff;>",
|
||||
s_qt->m_testName,
|
||||
s_url.getUrl(),
|
||||
s_url.getUrl(),
|
||||
he.getBufStart(),
|
||||
urlHash32,
|
||||
|
||||
// input checkbox name field
|
||||
@ -815,7 +818,7 @@ bool qainject1 ( ) {
|
||||
}
|
||||
|
||||
|
||||
// stop for now
|
||||
// stop for now so we can analyze the index
|
||||
//return true; //
|
||||
|
||||
//
|
||||
|
Loading…
Reference in New Issue
Block a user