mirror of
https://github.com/gigablast/open-source-search-engine.git
synced 2024-10-04 12:17:35 +03:00
fix bug of not using part files when generating map
This commit is contained in:
parent
3444c67851
commit
65613feb4c
29
BigFile.cpp
29
BigFile.cpp
@ -35,6 +35,7 @@ BigFile::~BigFile () {
|
|||||||
BigFile::BigFile () {
|
BigFile::BigFile () {
|
||||||
m_permissions = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ;
|
m_permissions = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ;
|
||||||
m_flags = O_RDWR ; // | O_DIRECT;
|
m_flags = O_RDWR ; // | O_DIRECT;
|
||||||
|
m_usePartFiles = true;
|
||||||
// NULLify all ptrs to files
|
// NULLify all ptrs to files
|
||||||
//for ( int32_t i = 0 ; i < MAX_PART_FILES ; i++ ) m_files[i] = NULL;
|
//for ( int32_t i = 0 ; i < MAX_PART_FILES ; i++ ) m_files[i] = NULL;
|
||||||
m_maxParts = 0;
|
m_maxParts = 0;
|
||||||
@ -74,6 +75,8 @@ bool BigFile::set ( char *dir , char *baseFilename , char *stripeDir ) {
|
|||||||
m_dir .setLabel("bfd");
|
m_dir .setLabel("bfd");
|
||||||
m_baseFilename.setLabel("bfbf");
|
m_baseFilename.setLabel("bfbf");
|
||||||
|
|
||||||
|
m_usePartFiles = true;
|
||||||
|
|
||||||
// use this 32 byte char buf to avoid a malloc if possible
|
// use this 32 byte char buf to avoid a malloc if possible
|
||||||
m_baseFilename.setBuf (m_tmpBaseBuf,32,0,false);
|
m_baseFilename.setBuf (m_tmpBaseBuf,32,0,false);
|
||||||
|
|
||||||
@ -267,12 +270,12 @@ static int64_t s_vfd = 0;
|
|||||||
|
|
||||||
// do not use part files for this open so we can open regular really >2GB
|
// do not use part files for this open so we can open regular really >2GB
|
||||||
// sized files with it
|
// sized files with it
|
||||||
bool BigFile::open2 ( int flags ,
|
// bool BigFile::open2 ( int flags ,
|
||||||
void *pc ,
|
// void *pc ,
|
||||||
int64_t maxFileSize ,
|
// int64_t maxFileSize ,
|
||||||
int permissions ) {
|
// int permissions ) {
|
||||||
return open ( flags , pc , maxFileSize , permissions , false );
|
// return open ( flags , pc , maxFileSize , permissions , false );
|
||||||
}
|
// }
|
||||||
|
|
||||||
// . overide File::open so we can set m_numParts
|
// . overide File::open so we can set m_numParts
|
||||||
// . set maxFileSize when opening a new file for writing and using
|
// . set maxFileSize when opening a new file for writing and using
|
||||||
@ -282,15 +285,14 @@ bool BigFile::open ( int flags ,
|
|||||||
//class DiskPageCache *pc ,
|
//class DiskPageCache *pc ,
|
||||||
void *pc ,
|
void *pc ,
|
||||||
int64_t maxFileSize ,
|
int64_t maxFileSize ,
|
||||||
int permissions ,
|
int permissions ) {
|
||||||
bool usePartFiles ) {
|
|
||||||
|
|
||||||
m_flags = flags;
|
m_flags = flags;
|
||||||
//m_pc = pc;
|
//m_pc = pc;
|
||||||
m_permissions = permissions;
|
m_permissions = permissions;
|
||||||
m_isClosing = false;
|
m_isClosing = false;
|
||||||
// this is true except when parsing big warc files
|
// this is true except when parsing big warc files
|
||||||
m_usePartFiles = usePartFiles;
|
m_usePartFiles = true;//usePartFiles;
|
||||||
// . init the page cache for this vfd
|
// . init the page cache for this vfd
|
||||||
// . this returns our "virtual fd", not the same as File::m_vfd
|
// . this returns our "virtual fd", not the same as File::m_vfd
|
||||||
// . returns -1 and sets g_errno on failure
|
// . returns -1 and sets g_errno on failure
|
||||||
@ -1378,10 +1380,17 @@ bool readwrite_r ( FileState *fstate , ThreadEntry *t ) {
|
|||||||
log("disk: Read of %"INT32" bytes at offset %"INT64" "
|
log("disk: Read of %"INT32" bytes at offset %"INT64" "
|
||||||
" failed because file is too short for that "
|
" failed because file is too short for that "
|
||||||
"offset? Our fd was probably stolen from us by another "
|
"offset? Our fd was probably stolen from us by another "
|
||||||
"thread. Will retry. error=%s.",
|
"thread. fd1=%i fd2=%i len=%i filenum=%i "
|
||||||
|
"localoffset=%i. usepart=%i error=%s.",
|
||||||
(int32_t)len,fstate->m_offset,
|
(int32_t)len,fstate->m_offset,
|
||||||
//fstate->m_this->getDir(),
|
//fstate->m_this->getDir(),
|
||||||
//fstate->m_this->getFilename(),
|
//fstate->m_this->getFilename(),
|
||||||
|
fstate->m_fd1,
|
||||||
|
fstate->m_fd2,
|
||||||
|
len,
|
||||||
|
filenum,
|
||||||
|
localOffset,
|
||||||
|
fstate->m_usePartFiles,
|
||||||
mstrerror(errno));
|
mstrerror(errno));
|
||||||
errno = EBADENGINEER;
|
errno = EBADENGINEER;
|
||||||
return false; // log("disk::read/write: offset too big");
|
return false; // log("disk::read/write: offset too big");
|
||||||
|
16
BigFile.h
16
BigFile.h
@ -143,17 +143,17 @@ class BigFile {
|
|||||||
void *pc = NULL ,
|
void *pc = NULL ,
|
||||||
int64_t maxFileSize = -1 ,
|
int64_t maxFileSize = -1 ,
|
||||||
int permissions =
|
int permissions =
|
||||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ,
|
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
|
||||||
bool usePartFiles = true );
|
//bool usePartFiles = true );
|
||||||
|
|
||||||
// this will set usepartfiles to false! so use this to open large
|
// this will set usepartfiles to false! so use this to open large
|
||||||
// warc or arc files
|
// warc or arc files
|
||||||
bool open2 ( int flags ,
|
//bool open2 ( int flags ,
|
||||||
//class DiskPageCache *pc = NULL ,
|
// //class DiskPageCache *pc = NULL ,
|
||||||
void *pc = NULL ,
|
// void *pc = NULL ,
|
||||||
int64_t maxFileSize = -1 ,
|
// int64_t maxFileSize = -1 ,
|
||||||
int permissions =
|
// int permissions =
|
||||||
S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
|
// S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH );
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@ -148,6 +148,7 @@ bool RdbScan::setRead ( BigFile *file ,
|
|||||||
// ensure we don't mess around
|
// ensure we don't mess around
|
||||||
m_fstate.m_allocBuf = NULL;
|
m_fstate.m_allocBuf = NULL;
|
||||||
m_fstate.m_buf = NULL;
|
m_fstate.m_buf = NULL;
|
||||||
|
m_fstate.m_usePartFiles = true;
|
||||||
// debug msg
|
// debug msg
|
||||||
//log("diskOff=%"INT64" nb=%"INT32"",offset,bytesToRead);
|
//log("diskOff=%"INT64" nb=%"INT32"",offset,bytesToRead);
|
||||||
//if ( offset == 16386 && bytesToRead == 16386 )
|
//if ( offset == 16386 && bytesToRead == 16386 )
|
||||||
|
@ -19363,8 +19363,10 @@ BigFile *XmlDoc::getUtf8ContentInFile ( int64_t *fileSizeArg ) {
|
|||||||
m_fileSize = m_file.getFileSize();
|
m_fileSize = m_file.getFileSize();
|
||||||
m_fileValid = true;
|
m_fileValid = true;
|
||||||
*fileSizeArg = m_fileSize;
|
*fileSizeArg = m_fileSize;
|
||||||
// open2() has usepartfiles = false!!!
|
m_file.open(O_RDONLY);
|
||||||
m_file.open2(O_RDONLY);
|
// explicitly set it to false now to make it harder for
|
||||||
|
// it not to be true because that messes things up
|
||||||
|
m_file.m_usePartFiles = false;
|
||||||
return &m_file;
|
return &m_file;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user