fix bug of not using part files when generating map

This commit is contained in:
Matt 2015-09-13 17:52:40 -07:00
parent 3444c67851
commit 65613feb4c
4 changed files with 32 additions and 20 deletions

View File

@ -35,6 +35,7 @@ BigFile::~BigFile () {
BigFile::BigFile () { BigFile::BigFile () {
m_permissions = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ; m_permissions = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ;
m_flags = O_RDWR ; // | O_DIRECT; m_flags = O_RDWR ; // | O_DIRECT;
m_usePartFiles = true;
// NULLify all ptrs to files // NULLify all ptrs to files
//for ( int32_t i = 0 ; i < MAX_PART_FILES ; i++ ) m_files[i] = NULL; //for ( int32_t i = 0 ; i < MAX_PART_FILES ; i++ ) m_files[i] = NULL;
m_maxParts = 0; m_maxParts = 0;
@ -74,6 +75,8 @@ bool BigFile::set ( char *dir , char *baseFilename , char *stripeDir ) {
m_dir .setLabel("bfd"); m_dir .setLabel("bfd");
m_baseFilename.setLabel("bfbf"); m_baseFilename.setLabel("bfbf");
m_usePartFiles = true;
// use this 32 byte char buf to avoid a malloc if possible // use this 32 byte char buf to avoid a malloc if possible
m_baseFilename.setBuf (m_tmpBaseBuf,32,0,false); m_baseFilename.setBuf (m_tmpBaseBuf,32,0,false);
@ -267,12 +270,12 @@ static int64_t s_vfd = 0;
// do not use part files for this open so we can open regular really >2GB // do not use part files for this open so we can open regular really >2GB
// sized files with it // sized files with it
bool BigFile::open2 ( int flags , // bool BigFile::open2 ( int flags ,
void *pc , // void *pc ,
int64_t maxFileSize , // int64_t maxFileSize ,
int permissions ) { // int permissions ) {
return open ( flags , pc , maxFileSize , permissions , false ); // return open ( flags , pc , maxFileSize , permissions , false );
} // }
// . overide File::open so we can set m_numParts // . overide File::open so we can set m_numParts
// . set maxFileSize when opening a new file for writing and using // . set maxFileSize when opening a new file for writing and using
@ -282,15 +285,14 @@ bool BigFile::open ( int flags ,
//class DiskPageCache *pc , //class DiskPageCache *pc ,
void *pc , void *pc ,
int64_t maxFileSize , int64_t maxFileSize ,
int permissions , int permissions ) {
bool usePartFiles ) {
m_flags = flags; m_flags = flags;
//m_pc = pc; //m_pc = pc;
m_permissions = permissions; m_permissions = permissions;
m_isClosing = false; m_isClosing = false;
// this is true except when parsing big warc files // this is true except when parsing big warc files
m_usePartFiles = usePartFiles; m_usePartFiles = true;//usePartFiles;
// . init the page cache for this vfd // . init the page cache for this vfd
// . this returns our "virtual fd", not the same as File::m_vfd // . this returns our "virtual fd", not the same as File::m_vfd
// . returns -1 and sets g_errno on failure // . returns -1 and sets g_errno on failure
@ -1378,10 +1380,17 @@ bool readwrite_r ( FileState *fstate , ThreadEntry *t ) {
log("disk: Read of %"INT32" bytes at offset %"INT64" " log("disk: Read of %"INT32" bytes at offset %"INT64" "
" failed because file is too short for that " " failed because file is too short for that "
"offset? Our fd was probably stolen from us by another " "offset? Our fd was probably stolen from us by another "
"thread. Will retry. error=%s.", "thread. fd1=%i fd2=%i len=%i filenum=%i "
"localoffset=%i. usepart=%i error=%s.",
(int32_t)len,fstate->m_offset, (int32_t)len,fstate->m_offset,
//fstate->m_this->getDir(), //fstate->m_this->getDir(),
//fstate->m_this->getFilename(), //fstate->m_this->getFilename(),
fstate->m_fd1,
fstate->m_fd2,
len,
filenum,
localOffset,
fstate->m_usePartFiles,
mstrerror(errno)); mstrerror(errno));
errno = EBADENGINEER; errno = EBADENGINEER;
return false; // log("disk::read/write: offset too big"); return false; // log("disk::read/write: offset too big");

View File

@ -143,17 +143,17 @@ class BigFile {
void *pc = NULL , void *pc = NULL ,
int64_t maxFileSize = -1 , int64_t maxFileSize = -1 ,
int permissions = int permissions =
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH , S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
bool usePartFiles = true ); //bool usePartFiles = true );
// this will set usepartfiles to false! so use this to open large // this will set usepartfiles to false! so use this to open large
// warc or arc files // warc or arc files
bool open2 ( int flags , //bool open2 ( int flags ,
//class DiskPageCache *pc = NULL , // //class DiskPageCache *pc = NULL ,
void *pc = NULL , // void *pc = NULL ,
int64_t maxFileSize = -1 , // int64_t maxFileSize = -1 ,
int permissions = // int permissions =
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ); // S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );

View File

@ -148,6 +148,7 @@ bool RdbScan::setRead ( BigFile *file ,
// ensure we don't mess around // ensure we don't mess around
m_fstate.m_allocBuf = NULL; m_fstate.m_allocBuf = NULL;
m_fstate.m_buf = NULL; m_fstate.m_buf = NULL;
m_fstate.m_usePartFiles = true;
// debug msg // debug msg
//log("diskOff=%"INT64" nb=%"INT32"",offset,bytesToRead); //log("diskOff=%"INT64" nb=%"INT32"",offset,bytesToRead);
//if ( offset == 16386 && bytesToRead == 16386 ) //if ( offset == 16386 && bytesToRead == 16386 )

View File

@ -19363,8 +19363,10 @@ BigFile *XmlDoc::getUtf8ContentInFile ( int64_t *fileSizeArg ) {
m_fileSize = m_file.getFileSize(); m_fileSize = m_file.getFileSize();
m_fileValid = true; m_fileValid = true;
*fileSizeArg = m_fileSize; *fileSizeArg = m_fileSize;
// open2() has usepartfiles = false!!! m_file.open(O_RDONLY);
m_file.open2(O_RDONLY); // explicitly set it to false now to make it harder for
// it not to be true because that messes things up
m_file.m_usePartFiles = false;
return &m_file; return &m_file;
} }