Class ResultSet¶
-
class
ResultSet
¶ Public Types
-
enum
GeoReturnType
¶ Geo return type options when accessing geo columns from a result set.
Values:
-
GeoTargetValue
¶ Copies the geo data into a struct of vectors - coords are uncompressed
-
WktString
¶ Returns the geo data as a WKT string
-
GeoTargetValuePtr
¶ Returns only the pointers of the underlying buffers for the geo data.
-
GeoTargetValueGpuPtr
¶ If geo data is currently on a device, keep the data on the device and return the device ptrs
-
Public Functions
-
ResultSet
(const std::string &explanation)¶
-
~ResultSet
()¶
-
std::string
toString
() const¶
-
std::string
summaryToString
() const¶
-
ResultSetRowIterator
rowIterator
(size_t from_logical_index, bool translate_strings, bool decimal_to_double) const¶
-
ResultSetRowIterator
rowIterator
(bool translate_strings, bool decimal_to_double) const¶
-
ExecutorDeviceType
getDeviceType
() const¶
-
const ResultSetStorage *
allocateStorage
() const¶
-
const ResultSetStorage *
allocateStorage
(const std::vector<int64_t>&) const¶
-
void
updateStorageEntryCount
(const size_t new_entry_count)¶
-
std::vector<TargetValue>
getNextRow
(const bool translate_strings, const bool decimal_to_double) const¶
-
size_t
getCurrentRowBufferIndex
() const¶
-
std::vector<TargetValue>
getRowAt
(const size_t index) const¶
-
TargetValue
getRowAt
(const size_t row_idx, const size_t col_idx, const bool translate_strings, const bool decimal_to_double = true) const¶
-
OneIntegerColumnRow
getOneColRow
(const size_t index) const¶
-
std::vector<TargetValue>
getRowAtNoTranslations
(const size_t index, const std::vector<bool> &targets_to_skip = {}) const¶
-
bool
isRowAtEmpty
(const size_t index) const¶
-
void
sort
(const std::list<Analyzer::OrderEntry> &order_entries, size_t top_n, const Executor *executor)¶
-
void
keepFirstN
(const size_t n)¶
-
void
dropFirstN
(const size_t n)¶
-
const ResultSetStorage *
getStorage
() const¶
-
size_t
colCount
() const¶
-
SQLTypeInfo
getColType
(const size_t col_idx) const¶
-
size_t
rowCount
(const bool force_parallel = false) const¶ Returns the number of valid entries in the result set (i.e that will be returned from the SQL query or inputted into the next query step)
Note that this can be less than or equal to the value returned by ResultSet::getEntries(), whether due to a SQL LIMIT/OFFSET applied or because the result set representation is inherently sparse (i.e. baseline hash group by).
Internally this function references/sets a cached value (
cached_row_count_
) so that the cost of computing the result is only paid once per result set.If the actual row count is not cached and needs to be computed, in some cases that can be O(1) (i.e. if limits and offsets are present, or for the output of a table function). For projections, we use a binary search, so it is O(log n), otherwise it is O(n) (with n being ResultSet::entryCount()), which will be run in parallel if the entry count >= the default of 20000 or if
force_parallel
is set to trueNote that we currently do not invalidate the cache if the result set is changed (i.e appended to), so this function should only be called after the result set is finalized.
- Parameters
force_parallel
: Forces the row count to be computed in parallel if the row count cannot be otherwise be computed from metadata or via a binary search (otherwise parallel search is automatically used for result sets withentryCount() >= 20000
)
-
void
invalidateCachedRowCount
() const¶
-
void
setCachedRowCount
(const size_t row_count) const¶
-
bool
isEmpty
() const¶ Returns a boolean signifying whether there are valid entries in the result set.
Note a result set can be logically empty even if the value returned by
ResultSet::entryCount()
is > 0, whether due to a SQL LIMIT/OFFSET applied or because the result set representation is inherently sparse (i.e. baseline hash group by).Internally this function is just implemented as
ResultSet::rowCount() == 0
, which caches it’s value so the row count will only be computed once per finalized result set.
-
size_t
entryCount
() const¶ Returns the number of entries the result set is allocated to hold.
Note that this can be greater than or equal to the actual number of valid rows in the result set, whether due to a SQL LIMIT/OFFSET applied or because the result set representation is inherently sparse (i.e. baseline hash group by)
For getting the number of valid rows in the result set (inclusive of any applied LIMIT and/or OFFSET), use
ResultSet::rowCount().
Or to just test if there are any valid rows, useResultSet::entryCount()
, as a return value fromentryCount()
greater than 0 does not neccesarily mean the result set is empty.
-
size_t
getBufferSizeBytes
(const ExecutorDeviceType device_type) const¶
-
bool
definitelyHasNoRows
() const¶
-
const QueryMemoryDescriptor &
getQueryMemDesc
() const¶
-
const std::vector<TargetInfo> &
getTargetInfos
() const¶
-
const std::vector<int64_t> &
getTargetInitVals
() const¶
-
int8_t *
getDeviceEstimatorBuffer
() const¶
-
int8_t *
getHostEstimatorBuffer
() const¶
-
void
syncEstimatorBuffer
() const¶
-
size_t
getNDVEstimator
() const¶
-
void
setQueueTime
(const int64_t queue_time)¶
-
void
setKernelQueueTime
(const int64_t kernel_queue_time)¶
-
void
addCompilationQueueTime
(const int64_t compilation_queue_time)¶
-
int64_t
getQueueTime
() const¶
-
int64_t
getRenderTime
() const¶
-
void
moveToBegin
() const¶
-
bool
isTruncated
() const¶
-
bool
isExplain
() const¶
-
void
setValidationOnlyRes
()¶
-
bool
isValidationOnlyRes
() const¶
-
std::string
getExplanation
() const¶
-
bool
isGeoColOnGpu
(const size_t col_idx) const¶
-
int
getDeviceId
() const¶
-
void
fillOneEntry
(const std::vector<int64_t> &entry)¶
-
void
initializeStorage
() const¶
-
void
holdLiterals
(std::vector<int8_t> &literal_buff)¶
-
std::shared_ptr<RowSetMemoryOwner>
getRowSetMemOwner
() const¶
-
const Permutation &
getPermutationBuffer
() const¶
-
const bool
isPermutationBufferEmpty
() const¶
-
void
serialize
(TSerializedRows &serialized_rows) const¶
-
size_t
getLimit
() const¶
-
ResultSetPtr
copy
()¶
-
void
clearPermutation
()¶
-
void
initStatus
()¶
-
void
invalidateResultSetChunks
()¶
-
const bool
isEstimator
() const¶
-
void
setCached
(bool val)¶
-
const bool
isCached
() const¶
-
void
setExecTime
(const long exec_time)¶
-
const long
getExecTime
() const¶
-
void
setQueryPlanHash
(const QueryPlanHash query_plan)¶
-
const QueryPlanHash
getQueryPlanHash
()¶
-
std::unordered_set<size_t>
getInputTableKeys
() const¶
-
void
setInputTableKeys
(std::unordered_set<size_t> &&intput_table_keys)¶
-
void
setTargetMetaInfo
(const std::vector<TargetMetaInfo> &target_meta_info)¶
-
std::vector<TargetMetaInfo>
getTargetMetaInfo
()¶
-
std::optional<bool>
canUseSpeculativeTopNSort
() const¶
-
void
setUseSpeculativeTopNSort
(bool value)¶
-
const bool
hasValidBuffer
() const¶
-
GeoReturnType
getGeoReturnType
() const¶
-
void
setGeoReturnType
(const GeoReturnType val)¶
-
void
copyColumnIntoBuffer
(const size_t column_idx, int8_t *output_buffer, const size_t output_buffer_size) const¶ For each specified column, this function goes through all available storages and copies its content into a contiguous output_buffer
-
bool
isDirectColumnarConversionPossible
() const¶ Determines if it is possible to directly form a ColumnarResults class from this result set, bypassing the default columnarization.
NOTE: If there exists a permutation vector (i.e., in some ORDER BY queries), it becomes equivalent to the row-wise columnarization.
-
bool
didOutputColumnar
() const¶
-
bool
isZeroCopyColumnarConversionPossible
(size_t column_idx) const¶
-
const int8_t *
getColumnarBuffer
(size_t column_idx) const¶
-
QueryDescriptionType
getQueryDescriptionType
() const¶
-
const int8_t
getPaddedSlotWidthBytes
(const size_t slot_idx) const¶
-
std::tuple<std::vector<bool>, size_t>
getSingleSlotTargetBitmap
() const¶
-
std::tuple<std::vector<bool>, size_t>
getSupportedSingleSlotTargetBitmap
() const¶ This function returns a bitmap and population count of it, where it denotes all supported single-column targets suitable for direct columnarization.
The final goal is to remove the need for such selection, but at the moment for any target that doesn’t qualify for direct columnarization, we use the traditional result set’s iteration to handle it (e.g., count distinct, approximate count distinct)
-
std::vector<size_t>
getSlotIndicesForTargetIndices
() const¶
-
const std::vector<ColumnLazyFetchInfo> &
getLazyFetchInfo
() const¶
-
bool
areAnyColumnsLazyFetched
() const¶
-
size_t
getNumColumnsLazyFetched
() const¶
-
void
setSeparateVarlenStorageValid
(const bool val)¶
-
const std::vector<std::string>
getStringDictionaryPayloadCopy
(const int dict_id) const¶
-
const std::pair<std::vector<int32_t>, std::vector<std::string>>
getUniqueStringsForDictEncodedTargetCol
(const size_t col_idx) const¶
-
StringDictionaryProxy *
getStringDictionaryProxy
(int const dict_id) const¶
-
template<typename
ENTRY_TYPE
, QueryDescriptionTypeQUERY_TYPE
, boolCOLUMNAR_FORMAT
>
ENTRY_TYPEgetEntryAt
(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const¶
-
ChunkStats
getTableFunctionChunkStats
(const size_t target_idx) const¶
-
void
translateDictEncodedColumns
(std::vector<TargetInfo> const &targets, size_t const start_idx)¶
-
void
eachCellInColumn
(RowIterationState &state, CellCallback const &func)¶
-
template<typename
ENTRY_TYPE
, QueryDescriptionTypeQUERY_TYPE
, boolCOLUMNAR_FORMAT
>
ENTRY_TYPEgetEntryAt
(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
-
template<typename
ENTRY_TYPE
>
ENTRY_TYPEgetColumnarPerfectHashEntryAt
(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const¶ Directly accesses the result set’s storage buffer for a particular data type (columnar output, perfect hash group by)
NOTE: Currently, only used in direct columnarization
-
template<typename
ENTRY_TYPE
>
ENTRY_TYPEgetRowWisePerfectHashEntryAt
(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const¶ Directly accesses the result set’s storage buffer for a particular data type (row-wise output, perfect hash group by)
NOTE: Currently, only used in direct columnarization
-
template<typename
ENTRY_TYPE
>
ENTRY_TYPEgetRowWiseBaselineEntryAt
(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const¶ Directly accesses the result set’s storage buffer for a particular data type (columnar output, baseline hash group by)
NOTE: Currently, only used in direct columnarization
-
template<typename
ENTRY_TYPE
>
ENTRY_TYPEgetColumnarBaselineEntryAt
(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const¶ Directly accesses the result set’s storage buffer for a particular data type (row-wise output, baseline hash group by)
NOTE: Currently, only used in direct columnarization
Public Members
-
friend ResultSet::ResultSetBuilder
Public Static Functions
-
QueryMemoryDescriptor
fixupQueryMemoryDescriptor
(const QueryMemoryDescriptor &query_mem_desc)¶
-
static std::unique_ptr<ResultSet>
unserialize
(const TSerializedRows &serialized_rows, const Executor *)¶
-
double
calculateQuantile
(quantile::TDigest *const t_digest)¶
Private Types
-
using
ApproxQuantileBuffers
= std::vector<std::vector<double>>¶
-
using
SerializedVarlenBufferStorage
= std::vector<std::string>¶
Private Functions
-
void
advanceCursorToNextEntry
(ResultSetRowIterator &iter) const¶
-
std::vector<TargetValue>
getNextRowImpl
(const bool translate_strings, const bool decimal_to_double) const¶
-
std::vector<TargetValue>
getNextRowUnlocked
(const bool translate_strings, const bool decimal_to_double) const¶
-
std::vector<TargetValue>
getRowAt
(const size_t index, const bool translate_strings, const bool decimal_to_double, const bool fixup_count_distinct_pointers, const std::vector<bool> &targets_to_skip = {}) const¶
-
template<typename
ENTRY_TYPE
>
ENTRY_TYPEgetColumnarPerfectHashEntryAt
(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
-
template<typename
ENTRY_TYPE
>
ENTRY_TYPEgetRowWisePerfectHashEntryAt
(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
-
template<typename
ENTRY_TYPE
>
ENTRY_TYPEgetRowWiseBaselineEntryAt
(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
-
template<typename
ENTRY_TYPE
>
ENTRY_TYPEgetColumnarBaselineEntryAt
(const size_t row_idx, const size_t target_idx, const size_t slot_idx) const
-
size_t
binSearchRowCount
() const¶
-
size_t
parallelRowCount
() const¶
-
size_t
advanceCursorToNextEntry
() const¶
-
void
radixSortOnGpu
(const std::list<Analyzer::OrderEntry> &order_entries) const¶
-
void
radixSortOnCpu
(const std::list<Analyzer::OrderEntry> &order_entries) const¶
-
TargetValue
getTargetValueFromBufferRowwise
(int8_t *rowwise_target_ptr, int8_t *keys_ptr, const size_t entry_buff_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t slot_idx, const bool translate_strings, const bool decimal_to_double, const bool fixup_count_distinct_pointers) const¶
-
TargetValue
getTargetValueFromBufferColwise
(const int8_t *col_ptr, const int8_t *keys_ptr, const QueryMemoryDescriptor &query_mem_desc, const size_t local_entry_idx, const size_t global_entry_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t slot_idx, const bool translate_strings, const bool decimal_to_double) const¶
-
TargetValue
makeTargetValue
(const int8_t *ptr, const int8_t compact_sz, const TargetInfo &target_info, const size_t target_logical_idx, const bool translate_strings, const bool decimal_to_double, const size_t entry_buff_idx) const¶
-
TargetValue
makeVarlenTargetValue
(const int8_t *ptr1, const int8_t compact_sz1, const int8_t *ptr2, const int8_t compact_sz2, const TargetInfo &target_info, const size_t target_logical_idx, const bool translate_strings, const size_t entry_buff_idx) const¶
-
TargetValue
makeGeoTargetValue
(const int8_t *geo_target_ptr, const size_t slot_idx, const TargetInfo &target_info, const size_t target_logical_idx, const size_t entry_buff_idx) const¶
-
InternalTargetValue
getVarlenOrderEntry
(const int64_t str_ptr, const size_t str_len) const¶
-
int64_t
lazyReadInt
(const int64_t ival, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const¶
-
std::pair<size_t, size_t>
getStorageIndex
(const size_t entry_idx) const¶ Returns (storageIdx, entryIdx) pair, where: storageIdx : 0 is storage_, storageIdx-1 is index into appended_storage_. entryIdx : local index into the storage object.
-
const std::vector<const int8_t *> &
getColumnFrag
(const size_t storge_idx, const size_t col_logical_idx, int64_t &global_idx) const¶
-
const VarlenOutputInfo *
getVarlenOutputInfo
(const size_t entry_idx) const¶
-
ResultSet::StorageLookupResult
findStorage
(const size_t entry_idx) const¶
-
Comparator
createComparator
(const std::list<Analyzer::OrderEntry> &order_entries, const PermutationView permutation, const Executor *executor, const bool single_threaded)¶
-
PermutationView
initPermutationBuffer
(PermutationView permutation, PermutationIdx const begin, PermutationIdx const end) const¶
-
void
parallelTop
(const std::list<Analyzer::OrderEntry> &order_entries, const size_t top_n, const Executor *executor)¶
-
void
baselineSort
(const std::list<Analyzer::OrderEntry> &order_entries, const size_t top_n, const Executor *executor)¶
-
void
doBaselineSort
(const ExecutorDeviceType device_type, const std::list<Analyzer::OrderEntry> &order_entries, const size_t top_n, const Executor *executor)¶
-
bool
canUseFastBaselineSort
(const std::list<Analyzer::OrderEntry> &order_entries, const size_t top_n)¶
-
size_t
rowCountImpl
(const bool force_parallel) const¶
-
int
getGpuCount
() const¶
-
void
serializeProjection
(TSerializedRows &serialized_rows) const¶
-
void
serializeVarlenAggColumn
(int8_t *buf, std::vector<std::string> &varlen_bufer) const¶
-
void
serializeCountDistinctColumns
(TSerializedRows&) const¶
-
void
unserializeCountDistinctColumns
(const TSerializedRows&)¶
-
void
fixupCountDistinctPointers
()¶
-
void
create_active_buffer_set
(CountDistinctSet &count_distinct_active_buffer_set) const¶
-
int64_t
getDistinctBufferRefFromBufferRowwise
(int8_t *rowwise_target_ptr, const TargetInfo &target_info) const¶
Private Members
-
const std::vector<TargetInfo>
targets_
¶
-
const ExecutorDeviceType
device_type_
¶
-
const int
device_id_
¶
-
QueryMemoryDescriptor
query_mem_desc_
¶
-
std::unique_ptr<ResultSetStorage>
storage_
¶
-
AppendedStorage
appended_storage_
¶
-
size_t
crt_row_buff_idx_
¶
-
size_t
fetched_so_far_
¶
-
size_t
drop_first_
¶
-
size_t
keep_first_
¶
-
std::shared_ptr<RowSetMemoryOwner>
row_set_mem_owner_
¶
-
Permutation
permutation_
¶
-
unsigned
block_size_
= {0}¶
-
unsigned
grid_size_
= {0}¶
-
QueryExecutionTimings
timings_
¶
-
std::vector<std::vector<int8_t>>
literal_buffers_
¶
-
std::vector<ColumnLazyFetchInfo>
lazy_fetch_info_
¶
-
std::vector<std::vector<std::vector<const int8_t *>>>
col_buffers_
¶
-
std::vector<std::vector<std::vector<int64_t>>>
frag_offsets_
¶
-
std::vector<std::vector<int64_t>>
consistent_frag_sizes_
¶
-
Data_Namespace::AbstractBuffer *
device_estimator_buffer_
= {nullptr}¶
-
int8_t *
host_estimator_buffer_
= {nullptr}¶
-
std::vector<SerializedVarlenBufferStorage>
serialized_varlen_buffer_
¶
-
bool
separate_varlen_storage_valid_
¶
-
std::string
explanation_
¶
-
const bool
just_explain_
¶
-
bool
for_validation_only_
¶
-
std::atomic<int64_t>
cached_row_count_
¶
-
std::mutex
row_iteration_mutex_
¶
-
GeoReturnType
geo_return_type_
¶
-
bool
cached_
¶
-
size_t
query_exec_time_
¶
-
QueryPlanHash
query_plan_
¶
-
std::unordered_set<size_t>
input_table_keys_
¶
-
std::vector<TargetMetaInfo>
target_meta_info_
¶
-
std::optional<bool>
can_use_speculative_top_n_sort
¶
Private Static Functions
-
bool
isNull
(const SQLTypeInfo &ti, const InternalTargetValue &val, const bool float_argument_input)¶
-
PermutationView
topPermutation
(PermutationView permutation, const size_t n, const Comparator &compare)¶
Friends
-
friend
ResultSet::ResultSetManager
-
friend
ResultSet::ResultSetRowIterator
-
friend
ResultSet::ColumnarResults
-
struct
ColumnWiseTargetAccessor
¶ Public Functions
-
void
initializeOffsetsForStorage
()¶
-
InternalTargetValue
getColumnInternal
(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const¶
Public Members
-
std::vector<std::vector<TargetOffsets>>
offsets_for_storage_
¶
-
void
-
struct
QueryExecutionTimings
¶
-
template<typename
BUFFER_ITERATOR_TYPE
>
structResultSetComparator
¶ Public Types
-
template<>
usingBufferIteratorType
= BUFFER_ITERATOR_TYPE¶
Public Functions
-
ResultSetComparator
(const std::list<Analyzer::OrderEntry> &order_entries, const ResultSet *result_set, const PermutationView permutation, const Executor *executor, const bool single_threaded)¶
-
void
materializeCountDistinctColumns
()¶
-
ResultSet::ApproxQuantileBuffers
materializeApproxQuantileColumns
() const¶
-
std::vector<int64_t>
materializeCountDistinctColumn
(const Analyzer::OrderEntry &order_entry) const¶
-
ResultSet::ApproxQuantileBuffers::value_type
materializeApproxQuantileColumn
(const Analyzer::OrderEntry &order_entry) const¶
-
bool
operator()
(const PermutationIdx lhs, const PermutationIdx rhs) const¶
Public Members
-
const std::list<Analyzer::OrderEntry> &
order_entries_
¶
-
const PermutationView
permutation_
¶
-
const BufferIteratorType
buffer_itr_
¶
-
const bool
single_threaded_
¶
-
std::vector<std::vector<int64_t>>
count_distinct_materialized_buffers_
¶
-
const ApproxQuantileBuffers
approx_quantile_materialized_buffers_
¶
-
template<>
-
struct
RowIterationState
¶
-
struct
RowWiseTargetAccessor
¶ Public Functions
-
InternalTargetValue
getColumnInternal
(const int8_t *buff, const size_t entry_idx, const size_t target_logical_idx, const StorageLookupResult &storage_lookup_result) const¶
-
void
initializeOffsetsForStorage
()¶
-
const int8_t *
get_rowwise_ptr
(const int8_t *buff, const size_t entry_idx) const¶
-
InternalTargetValue
-
struct
StorageLookupResult
¶ Public Members
-
const ResultSetStorage *
storage_ptr
¶
-
const size_t
fixedup_entry_idx
¶
-
const size_t
storage_idx
¶
-
const ResultSetStorage *
-
struct
TargetOffsets
¶
-
enum