Class Fragmenter_Namespace::InsertOrderFragmenter

class InsertOrderFragmenter : public Fragmenter_Namespace::AbstractFragmenter

The InsertOrderFragmenter is a child class of AbstractFragmenter, and fragments data in insert order. Likely the default fragmenter.

InsertOrderFragmenter

Subclassed by Fragmenter_Namespace::SortedOrderFragmenter

Public Types

using ModifyTransactionTracker = UpdelRoll

Public Functions

InsertOrderFragmenter(const std::vector<int> chunkKeyPrefix, std::vector<Chunk_NS::Chunk> &chunkVec, Data_Namespace::DataMgr *dataMgr, Catalog_Namespace::Catalog *catalog, const int physicalTableId, const int shard, const size_t maxFragmentRows = DEFAULT_FRAGMENT_ROWS, const size_t maxChunkSize = DEFAULT_MAX_CHUNK_SIZE, const size_t pageSize = DEFAULT_PAGE_SIZE, const size_t maxRows = DEFAULT_MAX_ROWS, const Data_Namespace::MemoryLevel defaultInsertLevel = Data_Namespace::DISK_LEVEL, const bool uses_foreign_storage = false)
~InsertOrderFragmenter()
size_t getNumFragments()

returns the number of fragments in a table

TableInfo getFragmentsForQuery()

returns (inside QueryInfo) object all ids and row sizes of fragments

void insertData(InsertData &insert_data_struct)

appends data onto the most recently occuring fragment, creating a new one if necessary

void insertChunks(const InsertChunks &insert_chunk)

Insert chunks into minimal number of fragments.

Parameters
  • insert_chunk: - the chunks to insert

void insertDataNoCheckpoint(InsertData &insert_data_struct)

Given data wrapped in an InsertData struct, inserts it into the correct partitions No locks and checkpoints taken needs to be managed externally.

void insertChunksNoCheckpoint(const InsertChunks &insert_chunk)

Insert chunks into minimal number of fragments; no locks or checkpoints taken.

Parameters
  • chunk: - the chunks to insert

void dropFragmentsToSize(const size_t maxRows)

Will truncate table to less than maxRows by dropping fragments.

void updateColumnChunkMetadata(const ColumnDescriptor *cd, const int fragment_id, const std::shared_ptr<ChunkMetadata> metadata)

Updates the metadata for a column chunk.

Parameters
  • cd: - ColumnDescriptor for the column

  • fragment_id: - Fragment id of the chunk within the column

  • metadata: - shared_ptr of the metadata to update column chunk with

void updateChunkStats(const ColumnDescriptor *cd, std::unordered_map<int, ChunkStats> &stats_map, std::optional<Data_Namespace::MemoryLevel> memory_level)

Update chunk stats.

FragmentInfo *getFragmentInfo(const int fragment_id) const

Retrieve the fragment info object for an individual fragment for editing.

int getFragmenterId()

get fragmenter’s id

std::vector<int> getChunkKeyPrefix() const
std::string getFragmenterType()

get fragmenter’s type (as string

size_t getNumRows()
void setNumRows(const size_t numTuples)
std::optional<ChunkUpdateStats> updateColumn(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const ColumnDescriptor *cd, const int fragment_id, const std::vector<uint64_t> &frag_offsets, const std::vector<ScalarTargetValue> &rhs_values, const SQLTypeInfo &rhs_type, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll)
void updateColumns(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragmentId, const std::vector<TargetMetaInfo> sourceMetaInfo, const std::vector<const ColumnDescriptor *> columnDescriptors, const RowDataProvider &sourceDataProvider, const size_t indexOffFragmentOffsetColumn, const Data_Namespace::MemoryLevel memoryLevel, UpdelRoll &updelRoll, Executor *executor)
void updateColumn(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const ColumnDescriptor *cd, const int fragment_id, const std::vector<uint64_t> &frag_offsets, const ScalarTargetValue &rhs_value, const SQLTypeInfo &rhs_type, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll)
void updateColumnMetadata(const ColumnDescriptor *cd, FragmentInfo &fragment, std::shared_ptr<Chunk_NS::Chunk> chunk, const UpdateValuesStats &update_values_stats, const SQLTypeInfo &rhs_type, UpdelRoll &updel_roll)
void updateMetadata(const Catalog_Namespace::Catalog *catalog, const MetaDataKey &key, UpdelRoll &updel_roll)
void compactRows(const Catalog_Namespace::Catalog *catalog, const TableDescriptor *td, const int fragment_id, const std::vector<uint64_t> &frag_offsets, const Data_Namespace::MemoryLevel memory_level, UpdelRoll &updel_roll)
const std::vector<uint64_t> getVacuumOffsets(const std::shared_ptr<Chunk_NS::Chunk> &chunk)
auto getChunksForAllColumns(const TableDescriptor *td, const FragmentInfo &fragment, const Data_Namespace::MemoryLevel memory_level)
void dropColumns(const std::vector<int> &columnIds)
bool hasDeletedRows(const int delete_column_id)

Iterates through chunk metadata to return whether any rows have been deleted.

void resetSizesFromFragments()

Resets the fragmenter’s size related metadata using the internal fragment info vector. This is typically done after operations, such as vacuuming, which can change fragment sizes.

Protected Functions

FragmentInfo *createNewFragment(const Data_Namespace::MemoryLevel memory_level = Data_Namespace::DISK_LEVEL)

creates new fragment, calling createChunk() method of BufferMgr to make a new chunk for each column of the table.

Also unpins the chunks of the previous insert buffer

void deleteFragments(const std::vector<int> &dropFragIds)
void conditionallyInstantiateFileMgrWithParams()
void getChunkMetadata()
void lockInsertCheckpointData(const InsertData &insertDataStruct)
void insertDataImpl(InsertData &insert_data)
void insertChunksImpl(const InsertChunks &insert_chunk)
void addColumns(const InsertData &insertDataStruct)
InsertOrderFragmenter(const InsertOrderFragmenter&)
InsertOrderFragmenter &operator=(const InsertOrderFragmenter&)
FragmentInfo &getFragmentInfoFromId(const int fragment_id)
auto vacuum_fixlen_rows(const FragmentInfo &fragment, const std::shared_ptr<Chunk_NS::Chunk> &chunk, const std::vector<uint64_t> &frag_offsets)
auto vacuum_varlen_rows(const FragmentInfo &fragment, const std::shared_ptr<Chunk_NS::Chunk> &chunk, const std::vector<uint64_t> &frag_offsets)

Protected Attributes

std::vector<int> chunkKeyPrefix_
std::map<int, Chunk_NS::Chunk> columnMap_

stores a map of column id to metadata about that column

std::deque<std::unique_ptr<FragmentInfo>> fragmentInfoVec_

data about each fragment stored - id and number of rows

Data_Namespace::DataMgr *dataMgr_
Catalog_Namespace::Catalog *catalog_
const int physicalTableId_
const int shard_
size_t maxFragmentRows_
size_t pageSize_
size_t numTuples_
int maxFragmentId_
size_t maxChunkSize_
size_t maxRows_
std::string fragmenterType_
mapd_shared_mutex fragmentInfoMutex_
mapd_shared_mutex insertMutex_
Data_Namespace::MemoryLevel defaultInsertLevel_
const bool uses_foreign_storage_
bool hasMaterializedRowId_
int rowIdColId_
std::unordered_map<int, size_t> varLenColInfo_
std::shared_ptr<std::mutex> mutex_access_inmem_states
std::mutex temp_mutex_

Private Functions

bool isAddingNewColumns(const InsertData &insert_data) const
void dropFragmentsToSizeNoInsertLock(const size_t max_rows)
void setLastFragmentVarLenColumnSizes()
void insertChunksIntoFragment(const InsertChunks &insert_chunks, const std::optional<int> delete_column_id, FragmentInfo *current_fragment, const size_t num_rows_to_insert, size_t &num_rows_inserted, size_t &num_rows_left, std::vector<size_t> &valid_row_indices, const size_t start_fragment)