Skip to content

Commit

Permalink
Lock internal keys in RAM
Browse files Browse the repository at this point in the history
This commit makes internal keys cache data being locked in RAM to
prevent this data from being paged to the swap.

As memory locking is performed in units of whole pages, this commit
also redesigns the cache so all records are compactly placed in pages.
Before it was a linked list with nodes in random places. So locking
each key would mean potential "wasting" of the locked page as its
number is limited (although on modern systems it is a fairly big number)
and we can't be sure that the next record would be on the same page.
Also, having records sequentially placed makes iterations through them
CPU cache friendly in contrast to random memory pointers of linked
lists.
  • Loading branch information
dAdAbird committed Sep 13, 2024
1 parent 37630d6 commit b62b165
Show file tree
Hide file tree
Showing 3 changed files with 119 additions and 34 deletions.
149 changes: 117 additions & 32 deletions src/access/pg_tde_tdemap.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@

#include <openssl/rand.h>
#include <openssl/err.h>
#include <sys/mman.h>
#include <unistd.h>

#include "pg_tde_defines.h"
Expand Down Expand Up @@ -79,22 +80,34 @@ typedef struct TDEMapFilePath
char keydata_path[MAXPGPATH];
} TDEMapFilePath;

/* Relation key cache.
*
* TODO: For now it is just a linked list. Data can only be added w/o any
* ability to remove or change it. Also consider usage of more efficient data
* struct (hash map) in the shared memory(?) - currently allocated in the
* TopMemoryContext of the process.
*/
typedef struct RelKey

typedef struct RelKeyCacheRec
{
Oid rel_id;
RelKeyData key;
struct RelKey *next;
} RelKey;
} RelKeyCacheRec;

/*
* Relation keys cache.
*
* This is a slice backed by memory `*data`. Initially, we allocate one memory
* page (usually 4Kb). We reallocate it by adding another page when we run out
* of space. This memory is locked in the RAM so it won't be paged to the swap
* (we don't want decrypted keys on disk). We do allocations in mem pages as
* these are the units `mlock()` operations are performed in.
*
* Currently, the cache can only grow (no eviction). The data is located in
* TopMemoryContext hence being wiped when the process exits, as well as memory
* is being unlocked by OS.
*/
typedef struct RelKeyCache
{
RelKeyCacheRec *data; /* must be a multiple of a memory page (usually 4Kb) */
int len; /* num of RelKeyCacheRecs currenty in cache */
int cap; /* max amount of RelKeyCacheRec data can fit */
} RelKeyCache;

/* Head of the key cache (linked list) */
RelKey *tde_rel_key_map = NULL;
RelKeyCache *tde_rel_key_cache = NULL;

static int pg_tde_open_file_basic(char *tde_filename, int fileFlags, bool ignore_missing);
static int pg_tde_file_header_write(char *tde_filename, int fd, TDEPrincipalKeyInfo *principal_key_info, off_t *bytes_written);
Expand All @@ -115,6 +128,8 @@ static RelKeyData* pg_tde_read_one_keydata(int keydata_fd, int32 key_index, TDEP
static int keyrotation_init_file(TDEPrincipalKeyInfo *new_principal_key_info, char *rotated_filename, char *filename, bool *is_new_file, off_t *curr_pos);
static void finalize_key_rotation(char *m_path_old, char *k_path_old, char *m_path_new, char *k_path_new);

static RelKeyData *pg_tde_get_key_from_cache(Oid rel_id);

/*
* Generate an encrypted key for the relation and store it in the keymap file.
*/
Expand Down Expand Up @@ -179,46 +194,116 @@ pg_tde_create_key_map_entry(const RelFileLocator *newrlocator)
RelKeyData *
GetRelationKey(RelFileLocator rel)
{
RelKey *curr;
RelKeyData *key;
Oid rel_id = rel.relNumber;

for (curr = tde_rel_key_map; curr != NULL; curr = curr->next)
key = pg_tde_get_key_from_cache(rel_id);
if (key != NULL)
{
if (curr->rel_id == rel_id)
{
return &curr->key;
}
return key;
}

key = pg_tde_get_key_from_file(&rel);

if (key != NULL)
{
RelKeyData* cached_key = pg_tde_put_key_into_map(rel.relNumber, key);
RelKeyData* cached_key = pg_tde_put_key_into_cache(rel.relNumber, key);
pfree(key);
return cached_key;
}

return key; /* returning NULL key */
return NULL;
}

static RelKeyData *
pg_tde_get_key_from_cache(Oid rel_id)
{
RelKeyCacheRec *rec;

if (tde_rel_key_cache == NULL)
return NULL;

for (int i = 0; i < tde_rel_key_cache->len; i++)
{
rec = tde_rel_key_cache->data+i;
if (rec != NULL && rec->rel_id == rel_id)
{
return &rec->key;
}
}

return NULL;
}

/* Add key to cache. See comments on `RelKeyCache`.
*
* TODO: add tests.
*/
RelKeyData *
pg_tde_put_key_into_map(Oid rel_id, RelKeyData *key)
pg_tde_put_key_into_cache(Oid rel_id, RelKeyData *key)
{
RelKey *new = (RelKey *) MemoryContextAlloc(TopMemoryContext, sizeof(RelKey));
new->rel_id = rel_id;
memcpy(&new->key, key, sizeof(RelKeyData));
new->next = NULL;
static long pageSize = 0;
RelKeyCacheRec *rec;
MemoryContext oldCtx;

if (tde_rel_key_map == NULL)
tde_rel_key_map = new;
else
if (pageSize == 0)
{
#ifndef _SC_PAGESIZE
pageSize = getpagesize();
#else
pageSize = sysconf(_SC_PAGESIZE);
#endif
}

if (tde_rel_key_cache == NULL)
{
oldCtx = MemoryContextSwitchTo(TopMemoryContext);
tde_rel_key_cache = palloc(sizeof(RelKeyCache));

tde_rel_key_cache->data = palloc_aligned(pageSize, pageSize, MCXT_ALLOC_ZERO);
MemoryContextSwitchTo(oldCtx);

if (mlock(tde_rel_key_cache->data, pageSize) == -1)
elog(ERROR, "could not mlock internal key initial cache page: %m");

tde_rel_key_cache->len = 0;
tde_rel_key_cache->cap = pageSize / sizeof(RelKeyCacheRec);
}

/* Add another mem page if there is no more room left for another key. We
* allocate `current_memory_size` + 1 page and copy data there.
*/
if (tde_rel_key_cache->len+1 >
(tde_rel_key_cache->cap * sizeof(RelKeyCacheRec)) / sizeof(RelKeyCacheRec))
{
new->next = tde_rel_key_map;
tde_rel_key_map = new;
size_t size;
size_t old_size;
RelKeyCacheRec *chachePage;

size = TYPEALIGN(pageSize, (tde_rel_key_cache->cap+1) * sizeof(RelKeyCacheRec));
old_size = TYPEALIGN(pageSize, (tde_rel_key_cache->cap) * sizeof(RelKeyCacheRec));

oldCtx = MemoryContextSwitchTo(TopMemoryContext);
chachePage = palloc_aligned(pageSize, size, MCXT_ALLOC_ZERO);
MemoryContextSwitchTo(oldCtx);

memcpy(chachePage, tde_rel_key_cache->data, old_size);
pfree(tde_rel_key_cache->data);
tde_rel_key_cache->data = chachePage;

if (mlock(tde_rel_key_cache->data, pageSize) == -1)
elog(ERROR, "could not mlock internal key cache page: %m");

tde_rel_key_cache->cap = size / sizeof(RelKeyCacheRec);
}
return &new->key;

rec = tde_rel_key_cache->data + tde_rel_key_cache->len;

rec->rel_id = rel_id;
memcpy(&rec->key, key, sizeof(RelKeyCacheRec));
tde_rel_key_cache->len++;

return &rec->key;
}

const char *
Expand Down Expand Up @@ -246,7 +331,7 @@ tde_create_rel_key(Oid rel_id, InternalKey *key, TDEPrincipalKeyInfo *principal_
rel_key_data.internal_key.ctx = NULL;

/* Add to the decrypted key to cache */
return pg_tde_put_key_into_map(rel_id, &rel_key_data);
return pg_tde_put_key_into_cache(rel_id, &rel_key_data);
}
/*
* Encrypts a given key and returns the encrypted one.
Expand Down
2 changes: 1 addition & 1 deletion src/catalog/tde_global_space.c
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ TDEInitGlobalKeys(void)
* local ot the backend.
* (see https://github.com/Percona-Lab/pg_tde/pull/214#discussion_r1648998317)
*/
pg_tde_put_key_into_map(XLOG_TDE_OID, ikey);
pg_tde_put_key_into_cache(XLOG_TDE_OID, ikey);
}
}

Expand Down
2 changes: 1 addition & 1 deletion src/include/access/pg_tde_tdemap.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,6 @@ extern void pg_tde_set_db_file_paths(const RelFileLocator *rlocator, char *map_p

const char * tde_sprint_key(InternalKey *k);

extern RelKeyData *pg_tde_put_key_into_map(Oid rel_id, RelKeyData *key);
extern RelKeyData *pg_tde_put_key_into_cache(Oid rel_id, RelKeyData *key);

#endif /*PG_TDE_MAP_H*/

0 comments on commit b62b165

Please sign in to comment.