Class QueryMemoryDescriptor

class QueryMemoryDescriptor

Public Functions

QueryMemoryDescriptor()
QueryMemoryDescriptor(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector<InputTableInfo> &query_infos, const bool allow_multifrag, const bool keyless_hash, const bool interleaved_bins_on_gpu, const int32_t idx_target_as_key, const ColRangeInfo &col_range_info, const ColSlotContext &col_slot_context, const std::vector<int8_t> &group_col_widths, const int8_t group_col_compact_width, const std::vector<int64_t> &target_groupby_indices, const size_t entry_count, const CountDistinctDescriptors count_distinct_descriptors, const bool sort_on_gpu_hint, const bool output_columnar, const bool render_output, const bool must_use_baseline_sort, const bool use_streaming_top_n)
QueryMemoryDescriptor(const Executor *executor, const size_t entry_count, const QueryDescriptionType query_desc_type, const bool is_table_function)
QueryMemoryDescriptor(const QueryDescriptionType query_desc_type, const int64_t min_val, const int64_t max_val, const bool has_nulls, const std::vector<int8_t> &group_col_widths)
QueryMemoryDescriptor(const TResultSetBufferDescriptor &thrift_query_memory_descriptor)
bool operator==(const QueryMemoryDescriptor &other) const
std::unique_ptr<QueryExecutionContext> getQueryExecutionContext(const RelAlgExecutionUnit &ra_exe_unit, const Executor *executor, const ExecutorDeviceType device_type, const ExecutorDispatchMode dispatch_mode, const int device_id, const int outer_table_id, const int64_t num_rows, const std::vector<std::vector<const int8_t *>> &col_buffers, const std::vector<std::vector<uint64_t>> &frag_offsets, std::shared_ptr<RowSetMemoryOwner> row_set_mem_owner, const bool output_columnar, const bool sort_on_gpu, const size_t thread_idx, RenderInfo *render_info) const
bool countDistinctDescriptorsLogicallyEmpty() const
const Executor *getExecutor() const
QueryDescriptionType getQueryDescriptionType() const
void setQueryDescriptionType(const QueryDescriptionType val)
bool isSingleColumnGroupByWithPerfectHash() const
bool hasKeylessHash() const
void setHasKeylessHash(const bool val)
bool hasInterleavedBinsOnGpu() const
void setHasInterleavedBinsOnGpu(const bool val)
int32_t getTargetIdxForKey() const
void setTargetIdxForKey(const int32_t val)
int8_t groupColWidth(const size_t key_idx) const
size_t getPrependedGroupColOffInBytes(const size_t group_idx) const
size_t getPrependedGroupBufferSizeInBytes() const
const auto groupColWidthsBegin() const
const auto groupColWidthsEnd() const
void clearGroupColWidths()
bool isGroupBy() const
void setGroupColCompactWidth(const int8_t val)
size_t getColCount() const
size_t getSlotCount() const
const int8_t getPaddedSlotWidthBytes(const size_t slot_idx) const
const int8_t getLogicalSlotWidthBytes(const size_t slot_idx) const
void setPaddedSlotWidthBytes(const size_t slot_idx, const int8_t bytes)
const int8_t getSlotIndexForSingleSlotCol(const size_t col_idx) const
size_t getPaddedColWidthForRange(const size_t offset, const size_t range) const
void useConsistentSlotWidthSize(const int8_t slot_width_size)
size_t getRowWidth() const
int8_t updateActualMinByteWidth(const int8_t actual_min_byte_width) const
void addColSlotInfo(const std::vector<std::tuple<int8_t, int8_t>> &slots_for_col)
void clearSlotInfo()
void alignPaddedSlots()
int64_t getTargetGroupbyIndex(const size_t target_idx) const
void setAllTargetGroupbyIndices(std::vector<int64_t> group_by_indices)
size_t targetGroupbyIndicesSize() const
size_t targetGroupbyNegativeIndicesSize() const
void clearTargetGroupbyIndices()
size_t getEntryCount() const
void setEntryCount(const size_t val)
int64_t getMinVal() const
int64_t getMaxVal() const
int64_t getBucket() const
bool hasNulls() const
const CountDistinctDescriptor &getCountDistinctDescriptor(const size_t idx) const
size_t getCountDistinctDescriptorsSize() const
bool sortOnGpu() const
bool canOutputColumnar() const
bool didOutputColumnar() const
void setOutputColumnar(const bool val)
bool useStreamingTopN() const
bool isLogicalSizedColumnsAllowed() const
bool mustUseBaselineSort() const
bool forceFourByteFloat() const
void setForceFourByteFloat(const bool val)
size_t getGroupbyColCount() const
size_t getKeyCount() const
size_t getBufferColSlotCount() const
size_t getBufferSizeBytes(const RelAlgExecutionUnit &ra_exe_unit, const unsigned thread_count, const ExecutorDeviceType device_type) const
size_t getBufferSizeBytes(const ExecutorDeviceType device_type) const
size_t getBufferSizeBytes(const ExecutorDeviceType device_type, const size_t override_entry_count) const

Returns total amount of output buffer memory for each device (CPU/GPU)

Columnar: if projection: it returns index buffer + columnar buffer (all non-lazy columns) if table function: only the columnar buffer if group by: it returns the amount required for each group column (assumes 64-bit per group) + columnar buffer (all involved agg columns)

Row-wise: returns required memory per row multiplied by number of entries

const ColSlotContext &getColSlotContext() const
bool usesGetGroupValueFast() const
bool blocksShareMemory() const
bool threadsShareMemory() const
bool lazyInitGroups(const ExecutorDeviceType device_type) const
bool interleavedBins(const ExecutorDeviceType device_type) const
size_t getColOffInBytes(const size_t col_idx) const
size_t getColOffInBytesInNextBin(const size_t col_idx) const
size_t getNextColOffInBytes(const int8_t *col_ptr, const size_t bin, const size_t col_idx) const
size_t getNextColOffInBytesRowOnly(const int8_t *col_ptr, const size_t col_idx) const
size_t getColOnlyOffInBytes(const size_t col_idx) const
size_t getRowSize() const
size_t getColsSize() const
size_t getWarpCount() const
size_t getCompactByteWidth() const
size_t getEffectiveKeyWidth() const
bool isWarpSyncRequired(const ExecutorDeviceType device_type) const
std::string queryDescTypeToString() const
std::string toString() const
std::string reductionKey() const
bool hasVarlenOutput() const
std::optional<size_t> varlenOutputBufferElemSize() const
size_t varlenOutputRowSizeToSlot(const size_t slot_idx) const
bool slotIsVarlenOutput(const size_t slot_idx) const

Public Static Functions

static TResultSetBufferDescriptor toThrift(const QueryMemoryDescriptor&)
std::unique_ptr<QueryMemoryDescriptor> init(const Executor *executor, const RelAlgExecutionUnit &ra_exe_unit, const std::vector<InputTableInfo> &query_infos, const ColRangeInfo &col_range_info, const KeylessInfo &keyless_info, const bool allow_multifrag, const ExecutorDeviceType device_type, const int8_t crt_min_byte_width, const bool sort_on_gpu_hint, const size_t shard_count, const size_t max_groups_buffer_entry_count, RenderInfo *render_info, const CountDistinctDescriptors count_distinct_descriptors, const bool must_use_baseline_sort, const bool output_columnar_hint, const bool streaming_top_n_hint)
static bool many_entries(const int64_t max_val, const int64_t min_val, const int64_t bucket)
static bool countDescriptorsLogicallyEmpty(const CountDistinctDescriptors &count_distinct_descriptors)
int8_t pick_target_compact_width(const RelAlgExecutionUnit &ra_exe_unit, const std::vector<InputTableInfo> &query_infos, const int8_t crt_min_byte_width)

Protected Functions

void resetGroupColWidths(const std::vector<int8_t> &new_group_col_widths)

Private Functions

size_t getTotalBytesOfColumnarBuffers() const

Returns the maximum total number of bytes (including required paddings) to store all non-lazy columns’ results for columnar cases.

size_t getTotalBytesOfColumnarBuffers(const size_t num_entries_per_column) const

This is a helper function that returns the total number of bytes (including required paddings) to store all non-lazy columns’ results for columnar cases.

size_t getTotalBytesOfColumnarProjections(const size_t projection_count) const

Returns the effective total number of bytes from columnar projections, which includes 1) total number of bytes used to store all non-lazy columns 2) total number of bytes used to store row indices (for lazy fetches, etc.)

NOTE: this function does not represent the buffer sizes dedicated for the results, but the required memory to fill all valid results into a compact new buffer (with no holes in it)

Private Members

const Executor *executor_
bool allow_multifrag_
QueryDescriptionType query_desc_type_
bool keyless_hash_
bool interleaved_bins_on_gpu_
int32_t idx_target_as_key_
std::vector<int8_t> group_col_widths_
int8_t group_col_compact_width_
std::vector<int64_t> target_groupby_indices_
size_t entry_count_
int64_t min_val_
int64_t max_val_
int64_t bucket_
bool has_nulls_
CountDistinctDescriptors count_distinct_descriptors_
bool sort_on_gpu_
bool output_columnar_
bool render_output_
bool must_use_baseline_sort_
bool is_table_function_
bool use_streaming_top_n_
bool force_4byte_float_
ColSlotContext col_slot_context_

Friends

friend QueryMemoryDescriptor::ResultSet
friend QueryMemoryDescriptor::QueryExecutionContext