Class StringDictionaryProxy

class StringDictionaryProxy

Public Types

using TransientMap = robin_hood::unordered_node_map<std::string, int32_t, HeterogeneousStringHash, HeterogeneousStringEqual>

Public Functions

StringDictionaryProxy(StringDictionaryProxy const&)
StringDictionaryProxy const &operator=(StringDictionaryProxy const&)
StringDictionaryProxy(std::shared_ptr<StringDictionary> sd, const int32_t string_dict_id, const int64_t generation)
int32_t getDictId() const
bool operator==(StringDictionaryProxy const &rhs) const
bool operator!=(StringDictionaryProxy const &rhs) const
int32_t getOrAdd(const std::string &str)
StringDictionary *getDictionary() const
int64_t getGeneration() const
std::vector<int32_t> getTransientBulk(const std::vector<std::string> &strings) const

Executes read-only lookup of a vector of strings and returns a vector of their integer ids.

This function, unlike getOrAddTransientBulk, will not add strings to the dictionary. Use this function if strings that don’t currently exist in the StringDictionaryProxy should not be added to the proxy as transient entries. This method also has performance advantages over getOrAddTransientBulk for read-only use cases, in that it can: 1) Take a read lock instead of a write lock for the transient lookups 2) Use a tbb::parallel_for implementation of the transient string lookups as we are guaranteed that the underlying map of strings to int ids cannot change


A vector of string_ids of the same length as strings, containing the id of any strings for which were found in the underlying StringDictionary instance or in the proxy’s tranient map, otherwise StringDictionary::INVALID_STR_ID for strings not found.

  • strings: - Vector of strings to perform string id lookups on

int32_t getOrAddTransient(const std::string &str)
std::vector<int32_t> getOrAddTransientBulk(const std::vector<std::string> &strings)
int32_t getIdOfString(const std::string &str) const
int32_t getIdOfStringNoGeneration(const std::string &str) const
std::string getString(int32_t string_id) const
std::vector<std::string> getStrings(const std::vector<int32_t> &string_ids) const
std::pair<const char *, size_t> getStringBytes(int32_t string_id) const
IdMap initIdMap() const
StringDictionaryProxy::IdMap buildIntersectionTranslationMapToOtherProxy(const StringDictionaryProxy *dest_proxy) const

Builds a vectorized string_id translation map from this proxy to dest_proxy.


An IdMap which encapsulates a std::vector<int32_t> of string ids for both transient and non-transient strings, mapping to their translated string_ids. offset_ is defined to be the number of transient entries + 1. The ordering of values in the vector_map_ is:

  • the transient ids (there are offset_-1 of these)

  • INVALID_STR_ID (=-1)

  • the non-transient string ids For example if there are 3 transient entries in this proxy and 20 in the underlying string dictionary, then vector_map_ will be of size() == 24 and offset_=3+1. The formula to translate ids is new_id = vector_map_[offset_ + old_id]. It is always the case that vector_map_[offset_-1]==-1 so that INVALID_STR_ID maps to INVALID_STR_ID.


StringDictionaryProxy::IdMap buildUnionTranslationMapToOtherProxy(StringDictionaryProxy *dest_proxy) const
size_t storageEntryCount() const

Returns the number of string entries in the underlying string dictionary, at this proxy’s generation_ if it is set/valid, otherwise just the current size of the dictionary.


size_t Number of entries in the string dictionary (at this proxy’s generation if set)

size_t transientEntryCount() const

Returns the number of transient string entries for this proxy,.


size_t Number of transient string entries for this proxy

size_t entryCount() const

Returns the number of total string entries for this proxy, both stored in the underlying dictionary and in the transient map. Equal to storageEntryCount() + transientEntryCount()


size_t Number of total string entries for this proxy

void updateGeneration(const int64_t generation)
std::vector<int32_t> getLike(const std::string &pattern, const bool icase, const bool is_simple, const char escape) const
std::vector<int32_t> getCompare(const std::string &pattern, const std::string &comp_operator) const
std::vector<int32_t> getRegexpLike(const std::string &pattern, const char escape) const
const std::vector<std::string const *> &getTransientVector() const
void eachStringSerially(StringDictionary::StringCallback &serial_callback) const
StringDictionaryProxy::IdMap transientUnion(StringDictionaryProxy const &sdp_rhs)

Public Static Functions

static unsigned transientIdToIndex(int32_t const id)
static int32_t transientIndexToId(unsigned const index)

Private Functions

std::string getStringUnlocked(const int32_t string_id) const
size_t transientEntryCountUnlocked() const
size_t entryCountUnlocked() const
template<typename String>
int32_t lookupTransientStringUnlocked(const String &lookup_string) const
size_t getTransientBulkImpl(const std::vector<std::string> &strings, int32_t *string_ids, const bool take_read_lock) const
template<typename String>
size_t transientLookupBulk(const std::vector<String> &lookup_strings, int32_t *string_ids, const bool take_read_lock) const
template<typename String>
size_t transientLookupBulkUnlocked(const std::vector<String> &lookup_strings, int32_t *string_ids) const
template<typename String>
size_t transientLookupBulkParallelUnlocked(const std::vector<String> &lookup_strings, int32_t *string_ids) const
StringDictionaryProxy::IdMap buildIntersectionTranslationMapToOtherProxyUnlocked(const StringDictionaryProxy *dest_proxy) const
template<typename String>
int32_t getIdOfStringFromClient(String const &str) const
template<typename String>
int32_t getOrAddTransientUnlocked(String const &str)

Private Members

std::shared_ptr<StringDictionary> string_dict_
const int32_t string_dict_id_
TransientMap transient_str_to_int_
std::vector<std::string const *> transient_string_vec_
int64_t generation_
std::shared_mutex rw_mutex_


friend StringDictionaryProxy::StringLocalCallback
friend StringDictionaryProxy::StringNetworkCallback
struct HeterogeneousStringEqual

Public Types

using is_transparent = void

Public Functions

bool operator()(std::string_view const lhs, std::string_view const rhs) const
struct HeterogeneousStringHash

Public Types

using is_transparent = void

Public Functions

size_t operator()(std::string_view const key) const
class IdMap

Public Functions

IdMap(uint32_t const tran_size, uint32_t const dict_size)
IdMap(IdMap const&)
bool empty() const
size_t getIndex(int32_t const id) const
std::vector<int32_t> const &getVectorMap() const
size_t size() const
size_t numTransients() const
size_t numNonTransients() const
int32_t *data()
int32_t const *data() const
int32_t domainStart() const
int32_t domainEnd() const
size_t numUntranslatedStrings() const
void setNumUntranslatedStrings(const size_t num_untranslated_strings)
int32_t *storageData()
int32_t &operator[](int32_t const id)
int32_t operator[](int32_t const id) const

Private Members

size_t const offset_
std::vector<int32_t> vector_map_
int64_t num_untranslated_strings_ = {-1}


std::ostream &operator<<(std::ostream &os, IdMap const &id_map)