Class HashJoin

class HashJoin

Subclassed by BaselineJoinHashTable, OverlapsJoinHashTable, PerfectJoinHashTable

Public Functions

virtual std::string toString(const ExecutorDeviceType device_type, const int device_id = 0, bool raw = false) const = 0
std::string toStringFlat64(const ExecutorDeviceType device_type, const int device_id) const
std::string toStringFlat32(const ExecutorDeviceType device_type, const int device_id) const
virtual DecodedJoinHashBufferSet toSet(const ExecutorDeviceType device_type, const int device_id) const = 0
virtual llvm::Value *codegenSlot(const CompilationOptions&, const size_t) = 0
virtual HashJoinMatchingSet codegenMatchingSet(const CompilationOptions&, const size_t) = 0
virtual int getInnerTableId() const = 0
virtual int getInnerTableRteIdx() const = 0
virtual HashType getHashType() const = 0
virtual Data_Namespace::MemoryLevel getMemoryLevel() const = 0
virtual int getDeviceCount() const = 0
virtual size_t offsetBufferOff() const = 0
virtual size_t countBufferOff() const = 0
virtual size_t payloadBufferOff() const = 0
virtual std::string getHashJoinType() const = 0
virtual bool isBitwiseEq() const = 0
JoinColumn fetchJoinColumn(const Analyzer::ColumnVar *hash_col, const std::vector<Fragmenter_Namespace::FragmentInfo> &fragment_info, const Data_Namespace::MemoryLevel effective_memory_level, const int device_id, std::vector<std::shared_ptr<Chunk_NS::Chunk>> &chunks_owner, DeviceAllocator *dev_buff_owner, std::vector<std::shared_ptr<void>> &malloc_owner, Executor *executor, ColumnCacheMap *column_cache)

fetchJoinColumn() calls ColumnFetcher::makeJoinColumn(), then copies the JoinColumn’s col_chunks_buff memory onto the GPU if required by the effective_memory_level parameter. The dev_buff_owner parameter will manage the GPU memory.

HashTable *getHashTableForDevice(const size_t device_id) const
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type)
size_t getJoinHashBufferSize(const ExecutorDeviceType device_type, const int device_id) const
int8_t *getJoinHashBuffer(const ExecutorDeviceType device_type, const int device_id) const
void freeHashBufferMemory()

Public Static Functions

static bool layoutRequiresAdditionalBuffers(HashType layout)
static std::string getHashTypeString(HashType ht)
HashJoinMatchingSet codegenMatchingSet(const std::vector<llvm::Value *> &hash_join_idx_args_in, const bool is_sharded, const bool col_is_nullable, const bool is_bw_eq, const int64_t sub_buff_size, Executor *executor, const bool is_bucketized = false)
llvm::Value *codegenHashTableLoad(const size_t table_idx, Executor *executor)
std::shared_ptr<HashJoin> getInstance(const std::shared_ptr<Analyzer::BinOper> qual_bin_oper, const std::vector<InputTableInfo> &query_infos, const Data_Namespace::MemoryLevel memory_level, const JoinType join_type, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor, const HashTableBuildDagMap &hashtable_build_dag_map, const RegisteredQueryHint &query_hint, const TableIdToNodeMap &table_id_to_node_map)

Make hash table from an in-flight SQL query’s parse tree etc.

std::shared_ptr<HashJoin> getSyntheticInstance(std::string_view table1, std::string_view column1, std::string_view table2, std::string_view column2, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)

Make hash table from named tables and columns (such as for testing).

std::shared_ptr<HashJoin> getSyntheticInstance(const std::shared_ptr<Analyzer::BinOper> qual_bin_oper, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)

Make hash table from named tables and columns (such as for testing).

std::pair<std::string, std::shared_ptr<HashJoin>> getSyntheticInstance(std::vector<std::shared_ptr<Analyzer::BinOper>> qual_bin_opers, const Data_Namespace::MemoryLevel memory_level, const HashType preferred_hash_type, const int device_count, ColumnCacheMap &column_cache, Executor *executor)
static int getInnerTableId(const std::vector<InnerOuter> &inner_outer_pairs)
void checkHashJoinReplicationConstraint(const int table_id, const size_t shard_count, const Executor *executor)
InnerOuter normalizeColumnPair(const Analyzer::Expr *lhs, const Analyzer::Expr *rhs, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables, const bool is_overlaps_join = false)
std::vector<InnerOuter> normalizeColumnPairs(const Analyzer::BinOper *condition, const Catalog_Namespace::Catalog &cat, const TemporaryTables *temporary_tables)
CompositeKeyInfo getCompositeKeyInfo(const std::vector<InnerOuter> &inner_outer_pairs, const Executor *executor)
std::vector<const StringDictionaryProxy::IdMap *> translateCompositeStrDictProxies(const CompositeKeyInfo &composite_key_info, const Executor *executor)
std::pair<const StringDictionaryProxy *, const StringDictionaryProxy *> getStrDictProxies(const InnerOuter &cols, const Executor *executor)
const StringDictionaryProxy::IdMap *translateInnerToOuterStrDictProxies(const InnerOuter &cols, const Executor *executor)

Protected Functions

virtual size_t getComponentBufferSize() const = 0

Protected Attributes

std::vector<std::shared_ptr<HashTable>> hash_tables_for_device_