Skip to content

Commit

Permalink
add index mapping buckets for faster search
Browse files Browse the repository at this point in the history
  • Loading branch information
Cryptkeeper committed Jul 5, 2024
1 parent 713ded5 commit a4db515
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 77 deletions.
67 changes: 41 additions & 26 deletions include/index.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,36 +11,51 @@ struct inode_s {
struct inode_s* next; /* next node in linked list */
};

/// @brief Searches the linked node list for an entry with a matching `fp` value.
/// @param idx The head node of the linked list
/// @param fp The search value (filepath)
/// @return If a match is found, its allocation pointer is returned, otherwise NULL.
struct inode_s* indexfind(struct inode_s* idx, const char* fp);

/// @brief Iterates and writes the linked node list to a file stream for future
/// deserializing using `indexread`.
/// @param idx The head node of the linked list
#define INDEXBUCKETS 16

struct index_s {
struct inode_s* buckets[INDEXBUCKETS];
size_t size;
};

/// @brief Searches the index for a node with a matching filepath.
/// @param idx The index to search
/// @param fp The search value (filepath) to compare
/// @return If a match is found, its pointer is returned, otherwise NULL.
struct inode_s* indexfind(struct index_s* idx, const char* fp);

/// @brief Flattens the index map into a sorted array of nodes (by filepath).
/// The list is then written to the file stream and freed.
/// @param idx The index to flatten
/// @param s The file stream to write to
/// @return If successful, 0 is returned. Otherwise, -1 is returned and `errno`
/// is set.
int indexwrite(struct inode_s* idx, FILE* s);
int indexwrite(struct index_s* idx, FILE* s);

/// @brief Reads a file stream and deserializes the contents into a linked node list.
/// Invalid lines are skipped without warning.
/// @param idx Return value of the the head node of the linked list
/// @brief Reads a file stream and deserializes the contents into a map of
/// individual file nodes.
/// @param idx The index to populate
/// @param s The file stream to read from
/// @return If successful, 0 is returned. Otherwise, -1 is returned and `errno`
/// is set.
int indexread(struct inode_s** idx, FILE* s);

/// @brief Dynamically allocates a new node and prepends it to the linked list.
/// @param idx The previous head node of the linked list
/// @param tail The new node to copy and prepend
/// @return The new head node of the linked list, or NULL if an error occurred.
struct inode_s* indexprepend(struct inode_s* idx, struct inode_s tail);

/// @brief Iterates and frees the memory allocated by the linked node list.
/// @param idx The head node of the linked list
void indexfree_r(struct inode_s* idx);

#endif
int indexread(struct index_s* idx, FILE* s);

/// @brief Copies the node and inserts it into the index mapping.
/// @param idx The index to insert into
/// @param tail The new node to copy and insert
/// @return The pointer to the new node in the index map, otherwise NULL.
struct inode_s* indexput(struct index_s* idx, struct inode_s node);

/// @brief Frees all nodes in the index map.
/// @param idx The index to free
void indexfree(struct index_s* idx);

/// @brief Flattens the index map into a sorted array of nodes (by filepath).
/// The list is dynamically allocated and must be freed by the caller. Array
/// size is determined by the `size` field in the index struct.
/// @param idx The index to flatten
/// @return If successful, a pointer to an array of size `idx->size` is
/// returned. Otherwise, NULL.
struct inode_s** indexlist(const struct index_s* idx);

#endif// EASYLIB_INDEX_H
82 changes: 63 additions & 19 deletions src/index.c
Original file line number Diff line number Diff line change
@@ -1,66 +1,110 @@
#include "index.h"

#include <assert.h>
#include <inttypes.h>
#include <stdlib.h>
#include <string.h>

#define INDEXMAXFP 512

struct inode_s* indexfind(struct inode_s* idx, const char* fp) {
struct inode_s* head = idx;
static int indexhash(const char* fp) {
int h = 0;
for (const char* p = fp; *p != '\0'; p++) h = (h << 5) - h + *p;
if (h < 0) h = -h;
return h % INDEXBUCKETS;
}

struct inode_s* indexfind(struct index_s* idx, const char* fp) {
struct inode_s* head = idx->buckets[indexhash(fp)];
while (head != NULL) {
if (strcmp(head->fp, fp) == 0) return head;
head = head->next;
}
return NULL;
}

int indexwrite(struct inode_s* idx, FILE* s) {
static int indexnodecmp(const void* a, const void* b) {
const struct inode_s* na = *(const struct inode_s**) a;
const struct inode_s* nb = *(const struct inode_s**) b;
return strcmp(na->fp, nb->fp);
}

int indexwrite(struct index_s* idx, FILE* s) {
struct inode_s** fl;
if ((fl = indexlist(idx)) == NULL) return -1;
qsort(fl, idx->size, sizeof(struct inode_s*), indexnodecmp);

char lbuf[INDEXMAXFP]; /* line output format buffer */

struct inode_s* head;
for (head = idx; head != NULL; head = head->next) {
int err = 0;
for (size_t i = 0; i < idx->size; i++) {
struct inode_s* node = fl[i];
const int n = snprintf(lbuf, sizeof(lbuf), "%s,%" PRIu64 ",%" PRIu64 "\n",
head->fp, head->st.lmod, head->st.fsze);
if (fwrite(lbuf, n, 1, s) != 1) return -1;
node->fp, node->st.lmod, node->st.fsze);
if (fwrite(lbuf, n, 1, s) != 1) {
err = -1;
break;
}
}

return 0;
free(fl);
return err;
}

int indexread(struct inode_s** idx, FILE* s) {
char fp[INDEXMAXFP] = {0}; /* fscanf filepath string buffer */
int indexread(struct index_s* idx, FILE* s) {
char fp[INDEXMAXFP] = {0}; /* fscanf filepath string buffer */
struct inode_s b = {fp, {0}, NULL}; /* fscanf node buffer */

while (fscanf(s, "%[^,],%" PRIu64 ",%" PRIu64 "\n", b.fp, &b.st.lmod,
&b.st.fsze) == 3) {
// duplicate the string onto the heap
if ((b.fp = strdup(b.fp)) == NULL) return -1;
struct inode_s* r;
if ((r = indexprepend(*idx, b)) == NULL) {
free(*idx), *idx = NULL;
return -1;
}
*idx = r;
if (indexput(idx, b) == NULL) return -1;
b.fp = fp;
}

return 0;
}

struct inode_s* indexprepend(struct inode_s* idx, const struct inode_s tail) {
static struct inode_s* indexprepend(struct inode_s* idx,
const struct inode_s tail) {
struct inode_s* node;
if ((node = malloc(sizeof(tail))) == NULL) return NULL;
memcpy(node, &tail, sizeof(tail));
node->next = idx;
return node;
}

void indexfree_r(struct inode_s* idx) {
struct inode_s* indexput(struct index_s* idx, const struct inode_s node) {
struct inode_s* bucket = idx->buckets[indexhash(node.fp)];
struct inode_s* head = indexprepend(bucket, node);
if (head == NULL) return NULL;
idx->buckets[indexhash(node.fp)] = head;
idx->size++;
return head;
}

static void indexfree_r(struct inode_s* idx) {
struct inode_s *head, *prev;
for (head = idx; head != NULL;) {
free(head->fp);
prev = head, head = head->next;
free(prev); /* free previous node */
}
}

void indexfree(struct index_s* idx) {
for (int i = 0; i < INDEXBUCKETS; i++) indexfree_r(idx->buckets[i]);
}

struct inode_s** indexlist(const struct index_s* idx) {
struct inode_s** fl;
if ((fl = calloc(idx->size, sizeof(struct inode_s*))) == NULL) return NULL;
size_t ni = 0;
for (int i = 0; i < INDEXBUCKETS; i++) {
for (struct inode_s* head = idx->buckets[i]; head != NULL;
head = head->next)
fl[ni++] = head;
}
assert(ni == idx->size);
return fl;
}
59 changes: 28 additions & 31 deletions src/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -36,12 +36,12 @@ static struct lcmdset_s** cmdsets;

static void freecmdsets(void) { lcmdfree_r(cmdsets); }

static struct inode_s* lastmap; /* stored index from previous run (if any) */
static struct inode_s* thismap; /* live checked index from this run */
static struct index_s lastmap; /* stored index from previous run (if any) */
static struct index_s thismap; /* live checked index from this run */

static void freeindexmaps(void) {
indexfree_r(lastmap);
indexfree_r(thismap);
indexfree(&lastmap);
indexfree(&thismap);
}

static void freeglobals(void) {
Expand Down Expand Up @@ -138,7 +138,7 @@ static int parseinitargs(const int argc, char** const argv) {
}

static int loadlastmap(const char* fp) {
assert(lastmap == NULL);
assert(lastmap.size == 0);
FILE* s = fopen(fp, "r");
if (s == NULL) return -1;
const int err = indexread(&lastmap, s);
Expand All @@ -149,8 +149,7 @@ static int loadlastmap(const char* fp) {
static int savethismap(const char* fp) {
FILE* s = fopen(fp, "w");
if (s == NULL) return -1;
int err = 0;
if (thismap != NULL) err = indexwrite(thismap, s);
const int err = indexwrite(&thismap, s);
fclose(s);
return err;
}
Expand All @@ -169,16 +168,12 @@ static int fsprocfile_pre(const char* fp) {
if (fsstat(fp, &finfo.st)) return -1;

// attempt to match file in previous index
struct inode_s* prev = indexfind(lastmap, fp);
struct inode_s* prev = indexfind(&lastmap, fp);

// lookup from previous iteration or insert new record and lookup
struct inode_s* curr = indexfind(thismap, fp);
struct inode_s* curr = indexfind(&thismap, fp);
if (curr == NULL) {
struct inode_s* r;
if ((r = indexprepend(thismap, finfo)) == NULL) return -1;
thismap = r;
curr = indexfind(thismap, fp);
assert(curr != NULL); /* should+must exist in the list */
if ((curr = indexput(&thismap, finfo)) == NULL) return -1;
}

if (prev != NULL) {
Expand Down Expand Up @@ -220,7 +215,7 @@ static int fsprocfile_post(const char* fp) {
return 0;
}

struct inode_s* curr = indexfind(thismap, fp);
struct inode_s* curr = indexfind(&thismap, fp);
if (curr != NULL) {
// check if the file was modified during the command execution
struct fsstat_s mod = {0};
Expand All @@ -242,11 +237,7 @@ static int fsprocfile_post(const char* fp) {
if ((finfo.fp = strdup(fp)) == NULL) return -1;
if (fsstat(fp, &finfo.st)) return -1;

struct inode_s* r;
if ((r = indexprepend(thismap, finfo)) == NULL) return -1;
thismap = r;
curr = indexfind(thismap, fp);
assert(curr != NULL); /* should+must exist in the list */
if ((curr = indexput(&thismap, finfo)) == NULL) return -1;

log_info("[+] %s", curr->fp);

Expand All @@ -268,24 +259,28 @@ static int waitforwork(void) {
}

static int checkremoved(void) {
if (lastmap == NULL) return 0;// no previous map entries to check
if (lastmap.size == 0) return 0;// no previous map entries to check

struct inode_s* head = lastmap;
while (head != NULL) {
if (indexfind(thismap, head->fp) == NULL) { /* file no longer exists */
log_info("[-] %s", head->fp);
struct inode_s** lastlist;
if ((lastlist = indexlist(&lastmap)) == NULL) return -1;

if (initargs.skipproc) goto next;
for (size_t i = 0; i < lastmap.size; i++) {
struct inode_s* prev = lastlist[i];
if (indexfind(&thismap, prev->fp) == NULL) { /* file no longer exists */
log_info("[-] %s", prev->fp);

if (initargs.skipproc) continue;
const int flags = LCTRIG_DEL | (initargs.verbose ? LCTOPT_VERBOSE : 0);
const struct tpreq_s req = {cmdsets, head, flags};
const struct tpreq_s req = {cmdsets, prev, flags};
int err;
if ((err = tpqueue(&req)))
log_error("error queuing deletion command for `%s`: %d", head->fp, err);
if ((err = tpqueue(&req))) {
log_error("error queuing deletion command for `%s`: %d", prev->fp, err);
}
}
next:
head = head->next;
}

free(lastlist);

return waitforwork();
}

Expand Down Expand Up @@ -330,6 +325,8 @@ static int cmpchanges(void) {
(err = execstage(fsprocfile_post)))
return err;

log_info("compared %zu files", thismap.size);

if (savethismap(initargs.indexfile)) {
log_error("cannot write index `%s`: %s", initargs.indexfile,
strerror(errno));
Expand Down
2 changes: 1 addition & 1 deletion src/tp.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
#include "log.h"

struct tpool_s {
_Atomic bool busy;
volatile _Atomic bool busy;
pthread_t tid;
struct tpreq_s req;
};
Expand Down

0 comments on commit a4db515

Please sign in to comment.