Class StringDictionary¶
-
class
StringDictionary
¶ Public Functions
-
StringDictionary
(const DictRef &dict_ref, const std::string &folder, const bool isTemp, const bool recover, const bool materializeHashes = false, size_t initial_capacity = 256)¶
-
StringDictionary
(const LeafHostInfo &host, const DictRef dict_ref)¶
-
~StringDictionary
()¶
-
int32_t
getDbId
() const¶
-
int32_t
getDictId
() const¶
-
void
eachStringSerially
(int64_t const generation, StringCallback &serial_callback) const¶
-
std::function<int32_t(std::string const&)>
makeLambdaStringToId
() const¶
-
int32_t
getOrAdd
(const std::string &str)¶
-
template<class
T
, classString
>
size_tgetBulk
(const std::vector<String> &string_vec, T *encoded_vec) const¶
-
template<class
T
, classString
>
size_tgetBulk
(const std::vector<String> &string_vec, T *encoded_vec, const int64_t generation) const¶
-
template<class T, class String>template void StringDictionary::getOrAddBulk(const std::vector< String > & string_vec, T * encoded_vec)
-
template<class
T
, classString
>
voidgetOrAddBulkParallel
(const std::vector<String> &string_vec, T *encoded_vec)¶
-
template<class String>template void StringDictionary::getOrAddBulkArray(const std::vector< std::vector< String >> & string_array_vec, std::vector< std::vector< int32_t >> & ids_array_vec)
-
template<class String>template int32_t StringDictionary::getIdOfString(const String & str) const
-
std::string
getString
(int32_t string_id) const¶
-
std::pair<char *, size_t>
getStringBytes
(int32_t string_id) const¶
-
size_t
storageEntryCount
() const¶
-
std::vector<int32_t>
getLike
(const std::string &pattern, const bool icase, const bool is_simple, const char escape, const size_t generation) const¶
-
std::vector<int32_t>
getCompare
(const std::string &pattern, const std::string &comp_operator, const size_t generation)¶
-
std::vector<int32_t>
getRegexpLike
(const std::string &pattern, const char escape, const size_t generation) const¶
-
std::vector<std::string>
copyStrings
() const¶
-
std::vector<std::string_view>
getStringViews
() const¶
-
std::vector<std::string_view>
getStringViews
(const size_t generation) const¶
-
size_t
buildDictionaryTranslationMap
(const StringDictionary *dest_dict, int32_t *translated_ids, const int64_t source_generation, const int64_t dest_generation, const bool dest_has_transients, StringLookupCallback const &dest_transient_lookup_callback) const¶
-
bool
checkpoint
()¶
-
bool
isClient
() const¶
-
void
update_leaf
(const LeafHostInfo &host_info)¶
Public Static Functions
-
void
populate_string_ids
(std::vector<int32_t> &dest_ids, StringDictionary *dest_dict, const std::vector<int32_t> &source_ids, const StringDictionary *source_dict, const std::vector<std::string const *> &transient_string_vec = {})¶ Populates provided
dest_ids
vector with string ids corresponding to given source strings.Given a vector of source string ids and corresponding source dictionary, this method populates a vector of destination string ids by either returning the string id of matching strings in the destination dictionary or creating new entries in the dictionary. Source string ids can also be transient if they were created by a function (e.g LOWER/UPPER functions). A map of transient string ids to string values is provided in order to handle this use case.
- Parameters
dest_ids
: - vector of destination string ids to be populateddest_dict
: - destination dictionarysource_ids
: - vector of source string ids for which destination ids are neededsource_dict
: - source dictionarytransient_string_vec
: - ordered vector of string value pointers
-
void
populate_string_array_ids
(std::vector<std::vector<int32_t>> &dest_array_ids, StringDictionary *dest_dict, const std::vector<std::vector<int32_t>> &source_array_ids, const StringDictionary *source_dict)¶
Public Static Attributes
-
constexpr int32_t
INVALID_STR_ID
= -1¶
-
constexpr size_t
MAX_STRLEN
= (1 << 15) - 1¶
-
constexpr size_t
MAX_STRCOUNT
= (1U << 31) - 1¶
Private Functions
-
void
processDictionaryFutures
(std::vector<std::future<std::vector<std::pair<string_dict_hash_t, unsigned int>>>> &dictionary_futures)¶
-
size_t
getNumStringsFromStorage
(const size_t storage_slots) const¶ Method to retrieve number of strings in storage via a binary search for the first canary
- Return
number of strings in storage
- Parameters
storage_slots
: number of storage entries we should search to find the minimum canary
-
bool
fillRateIsHigh
(const size_t num_strings) const¶
-
void
increaseHashTableCapacity
()¶
-
template<class
String
>
voidincreaseHashTableCapacityFromStorageAndMemory
(const size_t str_count, const size_t storage_high_water_mark, const std::vector<String> &input_strings, const std::vector<size_t> &string_memory_ids, const std::vector<string_dict_hash_t> &input_strings_hashes)¶
-
int32_t
getOrAddImpl
(const std::string_view &str)¶
-
template<class
String
>
voidhashStrings
(const std::vector<String> &string_vec, std::vector<string_dict_hash_t> &hashes) const¶ Method to hash a vector of strings in parallel.
- Parameters
string_vec
: input vector of strings to be hashedhashes
: space for the output - should be pre-sized to match string_vec size
-
int32_t
getUnlocked
(const std::string_view sv) const¶
-
std::string
getStringUnlocked
(int32_t string_id) const¶
-
std::string
getStringChecked
(const int string_id) const¶
-
std::pair<char *, size_t>
getStringBytesChecked
(const int string_id) const¶
-
template<class
String
>
uint32_tcomputeBucket
(const string_dict_hash_t hash, const String &input_string, const std::vector<int32_t> &string_id_string_dict_hash_table) const¶
-
template<class
String
>
uint32_tcomputeBucketFromStorageAndMemory
(const string_dict_hash_t input_string_hash, const String &input_string, const std::vector<int32_t> &string_id_string_dict_hash_table, const size_t storage_high_water_mark, const std::vector<String> &input_strings, const std::vector<size_t> &string_memory_ids) const¶
-
uint32_t
computeUniqueBucketWithHash
(const string_dict_hash_t hash, const std::vector<int32_t> &string_id_string_dict_hash_table)¶
-
void
checkAndConditionallyIncreasePayloadCapacity
(const size_t write_length)¶
-
void
checkAndConditionallyIncreaseOffsetCapacity
(const size_t write_length)¶
-
template<class
String
>
voidappendToStorageBulk
(const std::vector<String> &input_strings, const std::vector<size_t> &string_memory_ids, const size_t sum_new_strings_lengths)¶
-
StringDictionary::PayloadString
getStringFromStorage
(const int string_id) const¶
-
std::string_view
getStringFromStorageFast
(const int string_id) const¶
-
void
addPayloadCapacity
(const size_t min_capacity_requested = 0)¶
-
void
addOffsetCapacity
(const size_t min_capacity_requested = 0)¶
-
size_t
addStorageCapacity
(int fd, const size_t min_capacity_requested = 0)¶
-
void *
addMemoryCapacity
(void *addr, size_t &mem_size, const size_t min_capacity_requested = 0)¶
-
void
invalidateInvertedIndex
()¶
-
std::vector<int32_t>
getEquals
(std::string pattern, std::string comp_operator, size_t generation)¶
-
void
buildSortedCache
()¶
-
void
insertInSortedCache
(std::string str, int32_t str_id)¶
-
void
sortCache
(std::vector<int32_t> &cache)¶
-
void
mergeSortedCache
(std::vector<int32_t> &temp_sorted_cache)¶
-
compare_cache_value_t *
binary_search_cache
(const std::string &pattern) const¶
Private Members
-
const DictRef
dict_ref_
¶
-
const std::string
folder_
¶
-
size_t
str_count_
¶
-
size_t
collisions_
¶
-
std::vector<int32_t>
string_id_string_dict_hash_table_
¶
-
std::vector<string_dict_hash_t>
hash_cache_
¶
-
std::vector<int32_t>
sorted_cache
¶
-
bool
isTemp_
¶
-
bool
materialize_hashes_
¶
-
std::string
offsets_path_
¶
-
int
payload_fd_
¶
-
int
offset_fd_
¶
-
StringIdxEntry *
offset_map_
¶
-
char *
payload_map_
¶
-
size_t
offset_file_size_
¶
-
size_t
payload_file_size_
¶
-
size_t
payload_file_off_
¶
-
mapd_shared_mutex
rw_mutex_
¶
-
std::map<std::tuple<std::string, bool, bool, char>, std::vector<int32_t>>
like_cache_
¶
-
std::map<std::pair<std::string, char>, std::vector<int32_t>>
regex_cache_
¶
-
std::map<std::string, int32_t>
equal_cache_
¶
-
DictionaryCache<std::string, compare_cache_value_t>
compare_cache_
¶
-
std::shared_ptr<std::vector<std::string>>
strings_cache_
¶
-
std::unique_ptr<StringDictionaryClient>
client_
¶
-
std::unique_ptr<StringDictionaryClient>
client_no_timeout_
¶
-
char *
CANARY_BUFFER
= {nullptr}¶
-
size_t
canary_buffer_size
= 0¶
Friends
-
friend
StringDictionary::StringLocalCallback
-
struct
compare_cache_value_t
¶
-
struct
PayloadString
¶
-
class
StringCallback
¶ Subclassed by anonymous_namespace{StringDictionary.cpp}::MapMaker, StringLocalCallback, StringNetworkCallback
-
struct
StringIdxEntry
¶
-