Class QueryFragmentDescriptor

class QueryFragmentDescriptor

Public Functions

QueryFragmentDescriptor(const RelAlgExecutionUnit &ra_exe_unit, const std::vector<InputTableInfo> &query_infos, const std::vector<Data_Namespace::MemoryInfo> &gpu_mem_infos, const double gpu_input_mem_limit_percent, const std::vector<size_t> allowed_outer_fragment_indices)
void buildFragmentKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector<uint64_t> &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)
template<typename DISPATCH_FCN>
void assignFragsToMultiDispatch(DISPATCH_FCN f) const

Dispatch multi-fragment kernels. Currently GPU only. Each GPU should have only one kernel, with multiple fragments in its fragments list.

template<typename DISPATCH_FCN>
void assignFragsToKernelDispatch(DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const

Dispatch one fragment for each device. Iterate the device map and dispatch one kernel for each device per iteration. This allows balanced dispatch as well as early termination if the number of rows passing the kernel can be computed at dispatch time and the scan limit is reached.

bool shouldCheckWorkUnitWatchdog() const

Public Static Functions

void computeAllTablesFragments(std::map<int, const TableFragments *> &all_tables_fragments, const RelAlgExecutionUnit &ra_exe_unit, const std::vector<InputTableInfo> &query_infos)

Protected Functions

void buildFragmentPerKernelMapForUnion(const RelAlgExecutionUnit &ra_exe_unit, const std::vector<uint64_t> &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)
void buildFragmentPerKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector<uint64_t> &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)
void buildMultifragKernelMap(const RelAlgExecutionUnit &ra_exe_unit, const std::vector<uint64_t> &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, const bool enable_inner_join_fragment_skipping, Executor *executor)
void buildFragmentPerKernelForTable(const TableFragments *fragments, const RelAlgExecutionUnit &ra_exe_unit, const InputDescriptor &table_desc, const bool is_temporary_table, const std::vector<uint64_t> &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ChunkMetadataVector &deleted_chunk_metadata_vec, const std::optional<size_t> table_desc_offset, const ExecutorDeviceType &device_type, Executor *executor)
bool terminateDispatchMaybe(size_t &tuple_count, const RelAlgExecutionUnit &ra_exe_unit, const ExecutionKernelDescriptor &kernel) const
void checkDeviceMemoryUsage(const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)

Protected Attributes

std::vector<size_t> allowed_outer_fragment_indices_
size_t outer_fragments_size_ = 0
int64_t rowid_lookup_key_ = -1
std::map<int, const TableFragments *> selected_tables_fragments_
std::map<int, std::vector<ExecutionKernelDescriptor>> execution_kernels_per_device_
double gpu_input_mem_limit_percent_
std::map<size_t, size_t> tuple_count_per_device_
std::map<size_t, size_t> available_gpu_mem_bytes_