Class QueryFragmentDescriptor¶
-
class
QueryFragmentDescriptor
¶ Public Functions
-
QueryFragmentDescriptor
(const RelAlgExecutionUnit &ra_exe_unit, const std::vector<InputTableInfo> &query_infos, const std::vector<Data_Namespace::MemoryInfo> &gpu_mem_infos, const double gpu_input_mem_limit_percent, const std::vector<size_t> allowed_outer_fragment_indices)¶
-
void
buildFragmentKernelMap
(const RelAlgExecutionUnit &ra_exe_unit, const std::vector<uint64_t> &frag_offsets, const int device_count, const ExecutorDeviceType &device_type, const bool enable_multifrag_kernels, const bool enable_inner_join_fragment_skipping, Executor *executor)¶
-
template<typename
DISPATCH_FCN
>
voidassignFragsToMultiDispatch
(DISPATCH_FCN f) const¶ Dispatch multi-fragment kernels. Currently GPU only. Each GPU should have only one kernel, with multiple fragments in its fragments list.
-
template<typename
DISPATCH_FCN
>
voidassignFragsToKernelDispatch
(DISPATCH_FCN f, const RelAlgExecutionUnit &ra_exe_unit) const¶ Dispatch one fragment for each device. Iterate the device map and dispatch one kernel for each device per iteration. This allows balanced dispatch as well as early termination if the number of rows passing the kernel can be computed at dispatch time and the scan limit is reached.
-
bool
shouldCheckWorkUnitWatchdog
() const¶
Public Static Functions
-
void
computeAllTablesFragments
(std::map<int, const TableFragments *> &all_tables_fragments, const RelAlgExecutionUnit &ra_exe_unit, const std::vector<InputTableInfo> &query_infos)¶
Protected Functions
-
void
buildFragmentPerKernelMapForUnion
(const RelAlgExecutionUnit &ra_exe_unit, const std::vector<uint64_t> &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)¶
-
void
buildFragmentPerKernelMap
(const RelAlgExecutionUnit &ra_exe_unit, const std::vector<uint64_t> &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, Executor *executor)¶
-
void
buildMultifragKernelMap
(const RelAlgExecutionUnit &ra_exe_unit, const std::vector<uint64_t> &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ExecutorDeviceType &device_type, const bool enable_inner_join_fragment_skipping, Executor *executor)¶
-
void
buildFragmentPerKernelForTable
(const TableFragments *fragments, const RelAlgExecutionUnit &ra_exe_unit, const InputDescriptor &table_desc, const bool is_temporary_table, const std::vector<uint64_t> &frag_offsets, const int device_count, const size_t num_bytes_for_row, const ChunkMetadataVector &deleted_chunk_metadata_vec, const std::optional<size_t> table_desc_offset, const ExecutorDeviceType &device_type, Executor *executor)¶
-
bool
terminateDispatchMaybe
(size_t &tuple_count, const RelAlgExecutionUnit &ra_exe_unit, const ExecutionKernelDescriptor &kernel) const¶
-
void
checkDeviceMemoryUsage
(const Fragmenter_Namespace::FragmentInfo &fragment, const int device_id, const size_t num_cols)¶
Protected Attributes
-
std::vector<size_t>
allowed_outer_fragment_indices_
¶
-
size_t
outer_fragments_size_
= 0¶
-
int64_t
rowid_lookup_key_
= -1¶
-
std::map<int, const TableFragments *>
selected_tables_fragments_
¶
-
std::map<int, std::vector<ExecutionKernelDescriptor>>
execution_kernels_per_device_
¶
-
double
gpu_input_mem_limit_percent_
¶
-
std::map<size_t, size_t>
tuple_count_per_device_
¶
-
std::map<size_t, size_t>
available_gpu_mem_bytes_
¶
-