Class foreign_storage::TypedParquetInPlaceEncoder

template<typename V, typename T, typename NullType = V>
class TypedParquetInPlaceEncoder : public foreign_storage::ParquetInPlaceEncoder

Subclassed by foreign_storage::ParquetDecimalEncoder< V, T, NullType >, foreign_storage::ParquetFixedLengthEncoder< V, T, NullType >, foreign_storage::ParquetTimeEncoder< V, T, conversion_denominator, NullType >, foreign_storage::ParquetTimestampEncoder< V, T, conversion_denominator, NullType >, foreign_storage::ParquetUnsignedFixedLengthEncoder< V, T, U, NullType >, foreign_storage::ParquetTimestampEncoder< V, T, conversion_denominator *kSecsPerDay, NullType >

Public Functions

TypedParquetInPlaceEncoder(Data_Namespace::AbstractBuffer *buffer, const ColumnDescriptor *column_desciptor, const parquet::ColumnDescriptor *parquet_column_descriptor)
TypedParquetInPlaceEncoder(Data_Namespace::AbstractBuffer *buffer, const size_t omnisci_data_type_byte_size, const size_t parquet_data_type_byte_size)
void validate(const int8_t *parquet_data, const int64_t j, const SQLTypeInfo &column_type) const
void validateUsingEncodersColumnType(const int8_t *parquet_data, const int64_t j) const
void reserve(const size_t num_elements)
void appendDataTrackErrors(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values)
void validateAndAppendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values, const SQLTypeInfo &column_type, InvalidRowGroupIndices &invalid_indices)
void eraseInvalidIndicesInBuffer(const InvalidRowGroupIndices &invalid_indices)
void appendData(const int16_t *def_levels, const int16_t *rep_levels, const int64_t values_read, const int64_t levels_read, int8_t *values)

This is a specialization of ParquetInPlaceEncoder::appendData for known types that allows for optimization.

See comment for ParquetInPlaceEncoder::appendData for details.

void encodeAndCopyContiguous(const int8_t *parquet_data_bytes, int8_t *omnisci_data_bytes, const size_t num_elements)
void setNull(int8_t *omnisci_data_bytes)
void copy(const int8_t *omnisci_data_bytes_source, int8_t *omnisci_data_bytes_destination)
std::shared_ptr<ChunkMetadata> getRowGroupMetadata(const parquet::RowGroupMetaData *group_metadata, const int parquet_column_index, const SQLTypeInfo &column_type)

Protected Functions

virtual bool encodingIsIdentityForSameTypes() const
std::pair<T, T> getUnencodedStats(std::shared_ptr<parquet::Statistics> stats) const

Private Functions

std::pair<V, V> getEncodedStats(const parquet::ColumnDescriptor *parquet_column_descriptor, std::shared_ptr<parquet::Statistics> stats)

Private Members

int64_t current_batch_offset_ = 0

Private Static Functions

static ChunkStats getUpdatedStats(V &stats_min, V &stats_max, const SQLTypeInfo &column_type)