diff --git a/src/access/pg_tde_tdemap.c b/src/access/pg_tde_tdemap.c index 2254c0d4..024f4425 100644 --- a/src/access/pg_tde_tdemap.c +++ b/src/access/pg_tde_tdemap.c @@ -32,6 +32,7 @@ #include #include +#include #include #include "pg_tde_defines.h" @@ -79,22 +80,34 @@ typedef struct TDEMapFilePath char keydata_path[MAXPGPATH]; } TDEMapFilePath; -/* Relation key cache. - * - * TODO: For now it is just a linked list. Data can only be added w/o any - * ability to remove or change it. Also consider usage of more efficient data - * struct (hash map) in the shared memory(?) - currently allocated in the - * TopMemoryContext of the process. - */ -typedef struct RelKey + +typedef struct RelKeyCacheRec { Oid rel_id; RelKeyData key; - struct RelKey *next; -} RelKey; +} RelKeyCacheRec; + +/* + * Relation keys cache. + * + * This is a slice backed by memory `*data`. Initially, we allocate one memory + * page (usually 4Kb). We reallocate it by adding another page when we run out + * of space. This memory is locked in the RAM so it won't be paged to the swap + * (we don't want decrypted keys on disk). We do allocations in mem pages as + * these are the units `mlock()` operations are performed in. + * + * Currently, the cache can only grow (no eviction). The data is located in + * TopMemoryContext hence being wiped when the process exits, as well as memory + * is being unlocked by OS. + */ +typedef struct RelKeyCache +{ + RelKeyCacheRec *data; /* must be a multiple of a memory page (usually 4Kb) */ + int len; /* num of RelKeyCacheRecs currenty in cache */ + int cap; /* max amount of RelKeyCacheRec data can fit */ +} RelKeyCache; -/* Head of the key cache (linked list) */ -RelKey *tde_rel_key_map = NULL; +RelKeyCache *tde_rel_key_cache = NULL; static int pg_tde_open_file_basic(char *tde_filename, int fileFlags, bool ignore_missing); static int pg_tde_file_header_write(char *tde_filename, int fd, TDEPrincipalKeyInfo *principal_key_info, off_t *bytes_written); @@ -115,6 +128,8 @@ static RelKeyData* pg_tde_read_one_keydata(int keydata_fd, int32 key_index, TDEP static int keyrotation_init_file(TDEPrincipalKeyInfo *new_principal_key_info, char *rotated_filename, char *filename, bool *is_new_file, off_t *curr_pos); static void finalize_key_rotation(char *m_path_old, char *k_path_old, char *m_path_new, char *k_path_new); +static RelKeyData *pg_tde_get_key_from_cache(Oid rel_id); + /* * Generate an encrypted key for the relation and store it in the keymap file. */ @@ -179,46 +194,116 @@ pg_tde_create_key_map_entry(const RelFileLocator *newrlocator) RelKeyData * GetRelationKey(RelFileLocator rel) { - RelKey *curr; RelKeyData *key; Oid rel_id = rel.relNumber; - for (curr = tde_rel_key_map; curr != NULL; curr = curr->next) + key = pg_tde_get_key_from_cache(rel_id); + if (key != NULL) { - if (curr->rel_id == rel_id) - { - return &curr->key; - } + return key; } key = pg_tde_get_key_from_file(&rel); if (key != NULL) { - RelKeyData* cached_key = pg_tde_put_key_into_map(rel.relNumber, key); + RelKeyData* cached_key = pg_tde_put_key_into_cache(rel.relNumber, key); pfree(key); return cached_key; } - return key; /* returning NULL key */ + return NULL; +} + +static RelKeyData * +pg_tde_get_key_from_cache(Oid rel_id) +{ + RelKeyCacheRec *rec; + + if (tde_rel_key_cache == NULL) + return NULL; + + for (int i = 0; i < tde_rel_key_cache->len; i++) + { + rec = tde_rel_key_cache->data+i; + if (rec != NULL && rec->rel_id == rel_id) + { + return &rec->key; + } + } + + return NULL; } +/* Add key to cache. See comments on `RelKeyCache`. + * + * TODO: add tests. + */ RelKeyData * -pg_tde_put_key_into_map(Oid rel_id, RelKeyData *key) +pg_tde_put_key_into_cache(Oid rel_id, RelKeyData *key) { - RelKey *new = (RelKey *) MemoryContextAlloc(TopMemoryContext, sizeof(RelKey)); - new->rel_id = rel_id; - memcpy(&new->key, key, sizeof(RelKeyData)); - new->next = NULL; + static long pageSize = 0; + RelKeyCacheRec *rec; + MemoryContext oldCtx; - if (tde_rel_key_map == NULL) - tde_rel_key_map = new; - else + if (pageSize == 0) + { + #ifndef _SC_PAGESIZE + pageSize = getpagesize(); + #else + pageSize = sysconf(_SC_PAGESIZE); + #endif + } + + if (tde_rel_key_cache == NULL) + { + oldCtx = MemoryContextSwitchTo(TopMemoryContext); + tde_rel_key_cache = palloc(sizeof(RelKeyCache)); + + tde_rel_key_cache->data = palloc_aligned(pageSize, pageSize, MCXT_ALLOC_ZERO); + MemoryContextSwitchTo(oldCtx); + + if (mlock(tde_rel_key_cache->data, pageSize) == -1) + elog(ERROR, "could not mlock internal key initial cache page: %m"); + + tde_rel_key_cache->len = 0; + tde_rel_key_cache->cap = pageSize / sizeof(RelKeyCacheRec); + } + + /* Add another mem page if there is no more room left for another key. We + * allocate `current_memory_size` + 1 page and copy data there. + */ + if (tde_rel_key_cache->len+1 > + (tde_rel_key_cache->cap * sizeof(RelKeyCacheRec)) / sizeof(RelKeyCacheRec)) { - new->next = tde_rel_key_map; - tde_rel_key_map = new; + size_t size; + size_t old_size; + RelKeyCacheRec *chachePage; + + size = TYPEALIGN(pageSize, (tde_rel_key_cache->cap+1) * sizeof(RelKeyCacheRec)); + old_size = TYPEALIGN(pageSize, (tde_rel_key_cache->cap) * sizeof(RelKeyCacheRec)); + + oldCtx = MemoryContextSwitchTo(TopMemoryContext); + chachePage = palloc_aligned(pageSize, size, MCXT_ALLOC_ZERO); + MemoryContextSwitchTo(oldCtx); + + memcpy(chachePage, tde_rel_key_cache->data, old_size); + pfree(tde_rel_key_cache->data); + tde_rel_key_cache->data = chachePage; + + if (mlock(tde_rel_key_cache->data, pageSize) == -1) + elog(ERROR, "could not mlock internal key cache page: %m"); + + tde_rel_key_cache->cap = size / sizeof(RelKeyCacheRec); } - return &new->key; + + rec = tde_rel_key_cache->data + tde_rel_key_cache->len; + + rec->rel_id = rel_id; + memcpy(&rec->key, key, sizeof(RelKeyCacheRec)); + tde_rel_key_cache->len++; + + return &rec->key; } const char * @@ -246,7 +331,7 @@ tde_create_rel_key(Oid rel_id, InternalKey *key, TDEPrincipalKeyInfo *principal_ rel_key_data.internal_key.ctx = NULL; /* Add to the decrypted key to cache */ - return pg_tde_put_key_into_map(rel_id, &rel_key_data); + return pg_tde_put_key_into_cache(rel_id, &rel_key_data); } /* * Encrypts a given key and returns the encrypted one. diff --git a/src/catalog/tde_global_space.c b/src/catalog/tde_global_space.c index 88e395da..f279c19c 100644 --- a/src/catalog/tde_global_space.c +++ b/src/catalog/tde_global_space.c @@ -67,7 +67,7 @@ TDEInitGlobalKeys(void) * local ot the backend. * (see https://github.com/Percona-Lab/pg_tde/pull/214#discussion_r1648998317) */ - pg_tde_put_key_into_map(XLOG_TDE_OID, ikey); + pg_tde_put_key_into_cache(XLOG_TDE_OID, ikey); } } diff --git a/src/include/access/pg_tde_tdemap.h b/src/include/access/pg_tde_tdemap.h index 0a6b632d..4b298a71 100644 --- a/src/include/access/pg_tde_tdemap.h +++ b/src/include/access/pg_tde_tdemap.h @@ -56,6 +56,6 @@ extern void pg_tde_set_db_file_paths(const RelFileLocator *rlocator, char *map_p const char * tde_sprint_key(InternalKey *k); -extern RelKeyData *pg_tde_put_key_into_map(Oid rel_id, RelKeyData *key); +extern RelKeyData *pg_tde_put_key_into_cache(Oid rel_id, RelKeyData *key); #endif /*PG_TDE_MAP_H*/