Class ArrowResultSetConverter

class ArrowResultSetConverter

Public Functions

ArrowResultSetConverter(const std::shared_ptr<ResultSet> &results, const std::shared_ptr<Data_Namespace::DataMgr> data_mgr, const ExecutorDeviceType device_type, const int32_t device_id, const std::vector<std::string> &col_names, const int32_t first_n, const ArrowTransport transport_method)
ArrowResultSetConverter(const std::shared_ptr<ResultSet> &results, const std::shared_ptr<Data_Namespace::DataMgr> data_mgr, const ExecutorDeviceType device_type, const int32_t device_id, const std::vector<std::string> &col_names, const int32_t first_n, const ArrowTransport transport_method, const size_t min_result_size_for_bulk_dictionary_fetch, const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch)
ArrowResult getArrowResult() const

Serialize an Arrow result to IPC memory. Users are responsible for freeing all CPU IPC buffers using deallocateArrowResultBuffer. GPU buffers will become owned by the caller upon deserialization, and will be automatically freed when they go out of scope.

ArrowResultSetConverter(const std::shared_ptr<ResultSet> &results, const std::vector<std::string> &col_names, const int32_t first_n)
ArrowResultSetConverter(const std::shared_ptr<ResultSet> &results, const std::vector<std::string> &col_names, const int32_t first_n, const size_t min_result_size_for_bulk_dictionary_fetch, const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch)
std::shared_ptr<arrow::RecordBatch> convertToArrow() const

Public Static Attributes

constexpr size_t default_min_result_size_for_bulk_dictionary_fetch = {10000UL}
constexpr double default_max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch = {0.1}

Private Functions

std::shared_ptr<arrow::RecordBatch> getArrowBatch(const std::shared_ptr<arrow::Schema> &schema) const
std::shared_ptr<arrow::Field> makeField(const std::string name, const SQLTypeInfo &target_type) const
ArrowResultSetConverter::SerializedArrowOutput getSerializedArrowOutput(arrow::ipc::DictionaryFieldMapper *mapper) const
void initializeColumnBuilder(ColumnBuilder &column_builder, const SQLTypeInfo &col_type, const size_t result_col_idx, const std::shared_ptr<arrow::Field> &field) const
void append(ColumnBuilder &column_builder, const ValueArray &values, const std::shared_ptr<std::vector<bool>> &is_valid) const
std::shared_ptr<arrow::Array> finishColumnBuilder(ColumnBuilder &column_builder) const

Private Members

std::shared_ptr<ResultSet> results_
std::shared_ptr<Data_Namespace::DataMgr> data_mgr_ = nullptr
ExecutorDeviceType device_type_ = ExecutorDeviceType::GPU
int32_t device_id_ = 0
std::vector<std::string> col_names_
int32_t top_n_
ArrowTransport transport_method_
const size_t min_result_size_for_bulk_dictionary_fetch_
const double max_dictionary_to_result_size_ratio_for_bulk_dictionary_fetch_

Friends

friend ArrowResultSetConverter::ArrowResultSet
struct ColumnBuilder

Public Types

using StrId = int32_t
using ArrowStrId = int32_t

Public Members

std::shared_ptr<arrow::Field> field
std::unique_ptr<arrow::ArrayBuilder> builder
SQLTypeInfo col_type
SQLTypes physical_type
ArrowStringRemapMode string_remap_mode = {ArrowStringRemapMode::INVALID}
std::unordered_map<StrId, ArrowStrId> string_remapping
struct SerializedArrowOutput

Public Members

std::shared_ptr<arrow::Buffer> schema
std::shared_ptr<arrow::Buffer> records