Class OverlapsJoinHashTable

class OverlapsJoinHashTable : public HashJoin

Subclassed by RangeJoinHashTable

Public Functions

OverlapsJoinHashTable(const std::shared_ptr<Analyzer::BinOper> condition, const JoinType join_type, const std::vector<InputTableInfo> &query_infos, const Data_Namespace::MemoryLevel memory_level, ColumnCacheMap &column_cache, Executor *executor, const std::vector<InnerOuter> &inner_outer_pairs, const int device_count, HashtableAccessPathInfo hashtable_access_path_info, const TableIdToNodeMap &table_id_to_node_map)
virtual ~OverlapsJoinHashTable()

Public Static Functions

std::shared_ptr<OverlapsJoinHashTable> getInstance(const std::shared_ptr<Analyzer::BinOper> condition, const std::vector<InputTableInfo> &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)

Make hash table from an in-flight SQL query’s parse tree etc.

static void invalidateCache()
static void markCachedItemAsDirty(size_t table_key)
static HashtableRecycler *getHashTableCache()
static OverlapsTuningParamRecycler *getOverlapsTuningParamCache()

Protected Functions

void reify(const HashType preferred_layout)
void reifyWithLayout(const HashType layout)
void reifyImpl(std::vector<ColumnsForDevice> &columns_per_device, const Fragmenter_Namespace::TableInfo &query_info, const HashType layout, const size_t shard_count, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
void reifyForDevice(const ColumnsForDevice &columns_for_device, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching, const int device_id, const logger::ThreadId parent_thread_id)
size_t calculateHashTableSize(size_t number_of_dimensions, size_t emitted_keys_count, size_t entry_count) const
ColumnsForDevice fetchColumnsForDevice(const std::vector<Fragmenter_Namespace::FragmentInfo> &fragments, const int device_id, DeviceAllocator *dev_buff_owner)
std::pair<size_t, size_t> approximateTupleCount(const std::vector<double> &inverse_bucket_sizes_for_dimension, std::vector<ColumnsForDevice> &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
std::pair<size_t, size_t> computeHashTableCounts(const size_t shard_count, const std::vector<double> &inverse_bucket_sizes_for_dimension, std::vector<ColumnsForDevice> &columns_per_device, const size_t chosen_max_hashtable_size, const double chosen_bucket_threshold)
void setInverseBucketSizeInfo(const std::vector<double> &inverse_bucket_sizes, std::vector<ColumnsForDevice> &columns_per_device, const size_t device_count)
size_t getKeyComponentWidth() const
size_t getKeyComponentCount() const
HashType getHashType() const
Data_Namespace::MemoryLevel getMemoryLevel() const
int getDeviceCount() const
std::shared_ptr<BaselineHashTable> initHashTableOnCpu(const std::vector<JoinColumn> &join_columns, const std::vector<JoinColumnTypeInfo> &join_column_types, const std::vector<JoinBucketInfo> &join_bucket_info, const HashType layout, const size_t entry_count, const size_t emitted_keys_count, const bool skip_hashtable_caching)
HashJoinMatchingSet codegenMatchingSet(const CompilationOptions &co, const size_t index)
std::string toString(const ExecutorDeviceType device_type, const int device_id = 0, bool raw = false) const
std::set<DecodedJoinHashBufferEntry> toSet(const ExecutorDeviceType device_type, const int device_id) const
llvm::Value *codegenSlot(const CompilationOptions&, const size_t)
const RegisteredQueryHint &getRegisteredQueryHint()
void registerQueryHint(const RegisteredQueryHint &query_hint)
size_t getEntryCount() const
size_t getEmittedKeysCount() const
size_t getComponentBufferSize() const
size_t shardCount() const
Data_Namespace::MemoryLevel getEffectiveMemoryLevel(const std::vector<InnerOuter> &inner_outer_pairs) const
int getInnerTableId() const
int getInnerTableRteIdx() const
size_t getKeyBufferSize() const
size_t offsetBufferOff() const
size_t countBufferOff() const
size_t payloadBufferOff() const
std::string getHashJoinType() const
bool isBitwiseEq() const
std::shared_ptr<HashTable> initHashTableOnCpuFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
std::optional<std::pair<size_t, size_t>> getApproximateTupleCountFromCache(QueryPlanHash key, CacheItemType item_type, DeviceIdentifier device_identifier)
void putHashTableOnCpuToCache(QueryPlanHash key, CacheItemType item_type, std::shared_ptr<HashTable> hashtable_ptr, DeviceIdentifier device_identifier, size_t hashtable_building_time)
llvm::Value *codegenKey(const CompilationOptions &co)
std::vector<llvm::Value *> codegenManyKey(const CompilationOptions &co)
std::optional<OverlapsHashTableMetaInfo> getOverlapsHashTableMetaInfo()
QueryPlanHash getAlternativeCacheKey(AlternativeCacheKeyForOverlapsHashJoin &info)
void generateCacheKey(const size_t max_hashtable_size, const double bucket_threshold, const std::vector<double> &bucket_sizes)
QueryPlanHash getCacheKey() const
const std::vector<InnerOuter> &getInnerOuterPairs() const
void setOverlapsHashtableMetaInfo(size_t max_table_size_bytes, double bucket_threshold, std::vector<double> &bucket_sizes)

Protected Attributes

const std::shared_ptr<Analyzer::BinOper> condition_
const JoinType join_type_
const std::vector<InputTableInfo> &query_infos_
const Data_Namespace::MemoryLevel memory_level_
Executor *executor_
ColumnCacheMap &column_cache_
std::vector<InnerOuter> inner_outer_pairs_
const int device_count_
std::vector<double> inverse_bucket_sizes_for_dimension_
double chosen_overlaps_bucket_threshold_
size_t chosen_overlaps_max_table_size_bytes_
CompositeKeyInfo composite_key_info_
std::optional<HashType> layout_override_
std::mutex cpu_hash_table_buff_mutex_
RegisteredQueryHint query_hint_
QueryPlanDAG query_plan_dag_
QueryPlanHash hashtable_cache_key_
HashtableCacheMetaInfo hashtable_cache_meta_info_
std::unordered_set<size_t> table_keys_
const TableIdToNodeMap table_id_to_node_map_

Protected Static Attributes

std::unique_ptr<HashtableRecycler> hash_table_cache_ = std::make_unique<HashtableRecycler>(CacheItemType::OVERLAPS_HT, )
std::unique_ptr<OverlapsTuningParamRecycler> auto_tuner_cache_ = std::make_unique<OverlapsTuningParamRecycler>()
struct AlternativeCacheKeyForOverlapsHashJoin

Public Members

std::vector<InnerOuter> inner_outer_pairs
const size_t num_elements
const std::vector<ChunkKey> chunk_key
const SQLOps optype
const size_t max_hashtable_size
const double bucket_threshold
const std::vector<double> inverse_bucket_sizes = {}