Class File_Namespace::FileMgr¶
-
class
FileMgr
: public AbstractBufferMgr¶ Subclassed by File_Namespace::CachingFileMgr
Public Functions
-
FileMgr
(const int32_t deviceId, GlobalFileMgr *gfm, const TablePair fileMgrKey, const int32_t max_rollback_epochs = -1, const size_t num_reader_threads = 0, const int32_t epoch = -1, const size_t defaultPageSize = DEFAULT_PAGE_SIZE)¶ Constructor.
-
FileMgr
(const int32_t deviceId, GlobalFileMgr *gfm, const TablePair fileMgrKey, const size_t defaultPageSize, const bool runCoreInit)¶
-
FileMgr
(GlobalFileMgr *gfm, const size_t defaultPageSize, std::string basePath)¶
-
~FileMgr
()¶ Destructor.
-
StorageStats
getStorageStats
() const¶
-
FileBuffer *
createBuffer
(const ChunkKey &key, size_t pageSize = 0, const size_t numBytes = 0)¶ Creates a chunk with the specified key and page size.
-
bool
isBufferOnDevice
(const ChunkKey &key)¶
-
void
deleteBuffer
(const ChunkKey &key, const bool purge = true)¶ Deletes the chunk with the specified key.
-
void
deleteBuffersWithPrefix
(const ChunkKey &keyPrefix, const bool purge = true)¶
-
FileBuffer *
getBuffer
(const ChunkKey &key, const size_t numBytes = 0)¶ Returns the a pointer to the chunk with the specified key.
-
void
fetchBuffer
(const ChunkKey &key, AbstractBuffer *destBuffer, const size_t numBytes)¶
-
FileBuffer *
putBuffer
(const ChunkKey &key, AbstractBuffer *d, const size_t numBytes = 0)¶ Puts the contents of d into the Chunk with the given key.
- Return
AbstractBuffer*
- Parameters
key
: - Unique identifier for a Chunk.d
: - An object representing the source data for the Chunk.
-
AbstractBuffer *
alloc
(const size_t numBytes)¶
-
void
free
(AbstractBuffer *buffer)¶
-
MgrType
getMgrType
()¶
-
std::string
getStringMgrType
()¶
-
std::string
printSlabs
()¶
-
size_t
getMaxSize
()¶
-
size_t
getInUseSize
()¶
-
size_t
getAllocated
()¶
-
bool
isAllocationCapped
()¶
-
FileMetadata
getMetadataForFile
(const boost::filesystem::directory_iterator &fileIterator) const¶
-
void
init
(const size_t num_reader_threads, const int32_t epochOverride)¶
-
void
init
(const std::string &dataPathToConvertFrom, const int32_t epochOverride)¶
-
void
copyPage
(Page &srcPage, FileMgr *destFileMgr, Page &destPage, const size_t reservedHeaderSize, const size_t numBytes, const size_t offset)¶
-
void
requestFreePages
(size_t npages, size_t pagesize, std::vector<Page> &pages, const bool isMetadata)¶ Obtains free pages creates new files if necessary of the requested size.
Given a page size and number of pages, this method updates the vector “pages” to include free pages of the requested size. These pages are immediately removed from the free list of the affected file(s). If there are not enough pages available among current files, new files are created and their pages are included in the vector.
- Parameters
npages
: The number of free pages requestedpagesize
: The size of each requested pagepages
: A vector containing the free pages obtained by this method
-
void
getChunkMetadataVecForKeyPrefix
(ChunkMetadataVector &chunkMetadataVec, const ChunkKey &keyPrefix)¶
-
void
checkpoint
()¶ Fsyncs data files, writes out epoch and fsyncs that.
-
void
checkpoint
(const int32_t db_id, const int32_t tb_id)¶
-
virtual int32_t
epoch
(int32_t db_id, int32_t tb_id) const¶ Returns current value of epoch - should be one greater than recorded at last checkpoint. Because FileMgr only contains buffers from one table we can just return the FileMgr’s epoch instead of finding a table-specific epoch.
-
int32_t
epochFloor
() const¶
-
int32_t
incrementEpoch
()¶
-
int32_t
lastCheckpointedEpoch
() const¶ Returns value of epoch at last checkpoint.
-
void
resetEpochFloor
()¶
-
int32_t
maxRollbackEpochs
()¶ Returns value max_rollback_epochs.
-
size_t
getNumReaderThreads
()¶ Returns number of threads defined by parameter num-reader-threads which should be used during initial load and consequent read of data.
-
FILE *
getFileForFileId
(const int32_t fileId)¶ Returns FILE pointer associated with requested fileId.
- See
-
size_t
getNumChunks
()¶
-
size_t
getNumUsedMetadataPagesForChunkKey
(const ChunkKey &chunkKey) const¶
-
int32_t
getDBVersion
() const¶ Index for looking up chunks.
-
bool
getDBConvert
() const¶
-
void
createTopLevelMetadata
()¶
-
std::string
getFileMgrBasePath
() const¶
-
void
closeRemovePhysical
()¶
-
void
removeTableRelatedDS
(const int32_t db_id, const int32_t table_id)¶
-
virtual bool
hasFileMgrKey
() const¶
-
const TablePair
get_fileMgrKey
() const¶
-
boost::filesystem::path
getFilePath
(const std::string &file_name) const¶
-
void
writePageMappingsToStatusFile
(const std::vector<PageMapping> &page_mappings)¶ Serializes a page mapping vector to expected status file. Page mapping vector is serialized in the following format: [{page mapping vector size}, {page mapping vector data bytes …}]
-
void
renameCompactionStatusFile
(const char *const from_status, const char *const to_status)¶ Renames a given status file name to a new given file name.
-
void
compactFiles
()¶ Compacts metadata and data file pages and deletes resulting empty files (if any exists). Compaction occurs in 3 idempotent phases in order to enable graceful recovery if a crash/process interruption occurs in the middle data compaction.
Phase 1: Create a status file that indicates initiation of this phase. Sort metadata/data files in order of files with the lowest number of free pages to those with the highest number of free pages. Copy over used pages from files at the end of the sorted order (files with the highest number of free pages) to those at the beginning of the sorted order (files with the lowest number of free pages). Keep destination/copied to pages as free while copying. Keep track of copied source to destination page mapping. Write page mapping to the status file (to be used during crash recovery if needed).
Phase 2: Rename status file to a file name that indicates initiation of this phase. Go through page mapping and mark source/copied from pages as free while making the destination/copied to pages as used.
Phase 3: Rename status file to a file name that indicates initiation of this phase. Delete all empty files (files containing only free pages). Delete status file.
-
bool
updatePageIfDeleted
(FileInfo *file_info, ChunkKey &chunk_key, int32_t contingent, int32_t page_epoch, int32_t page_num)¶ deletes or recovers a page based on last checkpointed epoch.
-
virtual bool
failOnReadError
() const¶ True if a read error should cause a fatal error.
-
std::string
describeSelf
() const¶
Public Members
-
ChunkKeyToChunkMap
chunkIndex_
¶
Public Static Functions
-
void
setNumPagesPerDataFile
(size_t num_pages)¶
-
void
setNumPagesPerMetadataFile
(size_t num_pages)¶
Public Static Attributes
-
constexpr size_t
DEFAULT_NUM_PAGES_PER_DATA_FILE
= {256}¶
-
constexpr size_t
DEFAULT_NUM_PAGES_PER_METADATA_FILE
= {4096}¶
-
constexpr char const *
COPY_PAGES_STATUS
= {"pending_data_compaction_0"}¶
-
constexpr char const *
UPDATE_PAGE_VISIBILITY_STATUS
= {"pending_data_compaction_1"}¶
-
constexpr char const *
DELETE_EMPTY_FILES_STATUS
= {"pending_data_compaction_2"}¶
-
constexpr char
LEGACY_EPOCH_FILENAME
[] = "epoch"¶
-
constexpr char
EPOCH_FILENAME
[] = "epoch_metadata"¶
-
constexpr char
DB_META_FILENAME
[] = "dbmeta"¶
-
constexpr char
FILE_MGR_VERSION_FILENAME
[] = "filemgr_version"¶
-
constexpr int32_t
INVALID_VERSION
= -1¶
Protected Functions
-
FileMgr
()¶
-
FileInfo *
createFile
(const size_t pageSize, const size_t numPages)¶ Adds a file to the file manager repository.
This method will create a FileInfo object for the file being added, and it will create the corresponding file on physical disk with the indicated number of pages pre-allocated.
A pointer to the FileInfo object is returned, which itself has a file pointer (FILE*) and a file identifier (int32_t fileId).
- Return
FileInfo* A pointer to the FileInfo object of the added file.
- Parameters
fileName
: The name given to the file in physical storage.pageSize
: The logical page size for the pages in the file.numPages
: The number of logical pages to initially allocate for the file.
-
FileInfo *
openExistingFile
(const std::string &path, const int32_t fileId, const size_t pageSize, const size_t numPages, std::vector<HeaderInfo> &headerVec)¶
-
void
createEpochFile
(const std::string &epochFileName)¶
-
int32_t
openAndReadLegacyEpochFile
(const std::string &epochFileName)¶
-
void
openAndReadEpochFile
(const std::string &epochFileName)¶
-
void
writeAndSyncEpochToDisk
()¶
-
void
setEpoch
(const int32_t newEpoch)¶
-
int32_t
readVersionFromDisk
(const std::string &versionFileName) const¶
-
void
writeAndSyncVersionToDisk
(const std::string &versionFileName, const int32_t version)¶
-
void
processFileFutures
(std::vector<std::future<std::vector<HeaderInfo>>> &file_futures, std::vector<HeaderInfo> &headerVec)¶
-
FileBuffer *
createBufferUnlocked
(const ChunkKey &key, size_t pageSize = 0, const size_t numBytes = 0)¶
-
FileBuffer *
createBufferFromHeaders
(const ChunkKey &key, const std::vector<HeaderInfo>::const_iterator &headerStartIt, const std::vector<HeaderInfo>::const_iterator &headerEndIt)¶
-
void
migrateToLatestFileMgrVersion
()¶
-
void
migrateEpochFileV0
()¶
-
OpenFilesResult
openFiles
()¶
-
void
clearFileInfos
()¶
-
void
copySourcePageForCompaction
(const Page &source_page, FileInfo *destination_file_info, std::vector<PageMapping> &page_mappings, std::set<Page> &touched_pages)¶ Copies a used page (indicated by the top of the source_used_pages set) from the given source file to a free page in the given destination file. Source and destination pages are recorded in the given page_mappings vector after copying is done.
-
int32_t
copyPageWithoutHeaderSize
(const Page &source_page, const Page &destination_page)¶ Copies content of source_page to destination_page without copying over the source_page header size. The header size is instead returned by the method. Not copying over the header size enables a use case where destination_page has all the content of the source_page but is still marked as a free page.
-
void
sortAndCopyFilePagesForCompaction
(size_t page_size, std::vector<PageMapping> &page_mappings, std::set<Page> &touched_pages)¶ Sorts all files with the given page size in ascending order of number of free pages. Then copy over pages from files with more free pages to those with less free pages. Leave destination/copied to pages as free when copying. Record copied source and destination pages in page mapping.
-
void
updateMappedPagesVisibility
(const std::vector<PageMapping> &page_mappings)¶ Goes through the given page mapping and marks source/copied from pages as free while marking destination/copied to pages as used (by setting the header size).
-
void
deleteEmptyFiles
()¶ Deletes files that contain only free pages. Also deletes the compaction status file.
-
void
resumeFileCompaction
(const std::string &status_file_name)¶ Resumes an interrupted file compaction process. This method would normally only be called when re-initializing the file manager after a crash occurred in the middle of file compaction.
-
std::vector<PageMapping>
readPageMappingsFromStatusFile
()¶ Deserializes a page mapping vector from expected status file.
-
FileMgr
(const int epoch)¶
-
void
closePhysicalUnlocked
()¶
-
void
syncFilesToDisk
()¶
-
void
freePages
()¶
-
void
initializeNumThreads
(size_t num_reader_threads = 0)¶
-
FileBuffer *
allocateBuffer
(const size_t page_size, const ChunkKey &key, const size_t num_bytes = 0)¶
-
FileBuffer *
allocateBuffer
(const ChunkKey &key, const std::vector<HeaderInfo>::const_iterator &headerStartIt, const std::vector<HeaderInfo>::const_iterator &headerEndIt)¶
-
ChunkKeyToChunkMap::iterator
deleteBufferUnlocked
(const ChunkKeyToChunkMap::iterator chunk_it, const bool purge = true)¶
-
FileBuffer *
getBufferUnlocked
(const ChunkKeyToChunkMap::iterator chunk_it, const size_t numBytes = 0)¶
Protected Attributes
-
int32_t
maxRollbackEpochs_
¶
-
std::string
fileMgrBasePath_
¶
-
std::map<int32_t, FileInfo *>
files_
¶ The OS file system path containing files related to this FileMgr
-
PageSizeFileMMap
fileIndex_
¶ A map of files accessible via a file identifier.
-
size_t
defaultPageSize_
¶ number of threads used when loading data
-
unsigned
nextFileId_
¶
-
int32_t
db_version_
¶ the index of the next file id
-
int32_t
fileMgrVersion_
¶ DB version from dbmeta file, should be compatible with GlobalFileMgr::omnisci_db_version_
-
const int32_t
latestFileMgrVersion_
= {1}¶
-
FILE *
DBMetaFile_
= nullptr¶
-
std::mutex
getPageMutex_
¶ pointer to DB level metadata
-
mapd_shared_mutex
chunkIndexMutex_
¶
-
mapd_shared_mutex
files_rw_mutex_
¶
-
mapd_shared_mutex
mutex_free_page_
¶
-
bool
isFullyInitted_
= {false}¶
Protected Static Attributes
-
size_t
num_pages_per_data_file_
= {DEFAULT_NUM_PAGES_PER_DATA_FILE}¶
-
size_t
num_pages_per_metadata_file_
= {DEFAULT_NUM_PAGES_PER_METADATA_FILE}¶
Private Functions
-
void
rollOffOldData
(const int32_t epochCeiling, const bool shouldCheckpoint)¶
-
void
freePagesBeforeEpoch
(const int32_t min_epoch)¶
-
void
freePagesBeforeEpochUnlocked
(const int32_t min_epoch, const ChunkKeyToChunkMap::iterator lower_bound, const ChunkKeyToChunkMap::iterator upper_bound)¶
-
FileBuffer *
getOrCreateBuffer
(const ChunkKey &key)¶
-
bool
coreInit
()¶ Determines file path, and if exists, runs file migration and opens and reads epoch file.
- Return
a boolean representing whether the directory path existed
-
int32_t
epoch
() const¶
-
void
writeDirtyBuffers
()¶
-
void
setDataAndMetadataFileStats
(StorageStats &storage_stats) const¶
-
uint32_t
getFragmentCount
() const¶
Friends
-
friend
File_Namespace::FileMgr::GlobalFileMgr
-