From 5347eda20458d66d154273e8c3f76e7f309bcb83 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 18 Mar 2024 10:02:38 -0700 Subject: [PATCH 1/3] Fix creation of uninitialized custom_tag_name in deserialize Co-authored-by: Marshall --- src/scanner.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/scanner.c b/src/scanner.c index e1a3f3d..99ab054 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -166,7 +166,7 @@ static void deserialize(Scanner *scanner, const char *buffer, unsigned length) { if (tag_count > 0) { unsigned iter = 0; for (iter = 0; iter < serialized_tag_count; iter++) { - Tag tag = scanner->tags.data[iter]; + Tag tag = new_tag(); tag.type = (TagType)buffer[size++]; if (tag.type == CUSTOM) { uint16_t name_length = (uint8_t)buffer[size++]; From 433848041c0e3fbf3970d0a0ae1caa6ac625c9c7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 18 Mar 2024 10:55:56 -0700 Subject: [PATCH 2/3] Use array header for strings and arrays Co-authored-by: Marshall --- src/scanner.c | 210 +++++++++-------------------- src/tag.h | 70 +++++----- src/tree_sitter/alloc.h | 54 ++++++++ src/tree_sitter/array.h | 290 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 443 insertions(+), 181 deletions(-) create mode 100644 src/tree_sitter/alloc.h create mode 100644 src/tree_sitter/array.h diff --git a/src/scanner.c b/src/scanner.c index 99ab054..52815c9 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -1,6 +1,6 @@ -#include "tag.h" - #include +#include "tree_sitter/array.h" +#include "tag.h" enum TokenType { START_TAG_NAME, @@ -18,105 +18,17 @@ enum TokenType { }; typedef struct { - uint32_t len; - uint32_t cap; - Tag *data; -} tags_vec; - -typedef struct { - tags_vec tags; + Array(Tag) tags; } Scanner; #define MAX(a, b) ((a) > (b) ? (a) : (b)) -#define VEC_RESIZE(vec, _cap) \ - if ((_cap) > (vec).cap && (_cap) > 0) { \ - void *tmp = realloc((vec).data, (_cap) * sizeof((vec).data[0])); \ - assert(tmp != NULL); \ - (vec).data = tmp; \ - (vec).cap = (_cap); \ - } - -#define VEC_GROW(vec, _cap) \ - if ((vec).cap < (_cap)) { \ - VEC_RESIZE((vec), (_cap)); \ - } - -#define VEC_PUSH(vec, el) \ - if ((vec).cap == (vec).len) { \ - VEC_RESIZE((vec), MAX(16, (vec).len * 2)); \ - } \ - (vec).data[(vec).len++] = (el); - -#define VEC_POP(vec) \ - { \ - if (VEC_BACK(vec).type == CUSTOM) { \ - tag_free(&VEC_BACK(vec)); \ - } \ - (vec).len--; \ - } - -#define VEC_BACK(vec) ((vec).data[(vec).len - 1]) - -#define VEC_FREE(vec) \ - { \ - if ((vec).data != NULL) \ - free((vec).data); \ - (vec).data = NULL; \ - } - -#define VEC_CLEAR(vec) \ - { \ - for (int i = 0; i < (vec).len; i++) { \ - tag_free(&(vec).data[i]); \ - } \ - (vec).len = 0; \ - } - -#define STRING_RESIZE(vec, _cap) \ - void *tmp = realloc((vec).data, ((_cap) + 1) * sizeof((vec).data[0])); \ - assert(tmp != NULL); \ - (vec).data = tmp; \ - memset((vec).data + (vec).len, 0, (((_cap) + 1) - (vec).len) * sizeof((vec).data[0])); \ - (vec).cap = (_cap); - -#define STRING_GROW(vec, _cap) \ - if ((vec).cap < (_cap)) { \ - STRING_RESIZE((vec), (_cap)); \ - } - -#define STRING_PUSH(vec, el) \ - if ((vec).cap == (vec).len) { \ - STRING_RESIZE((vec), MAX(16, (vec).len * 2)); \ - } \ - (vec).data[(vec).len++] = (el); - -#define STRING_INIT(vec) \ - { \ - (vec).data = calloc(1, sizeof(char) * 17); \ - (vec).len = 0; \ - (vec).cap = 16; \ - } - -#define STRING_FREE(vec) \ - { \ - if ((vec).data != NULL) \ - free((vec).data); \ - (vec).data = NULL; \ - } - -#define STRING_CLEAR(vec) \ - { \ - (vec).len = 0; \ - memset((vec).data, 0, (vec).cap * sizeof(char)); \ - } - static inline void advance(TSLexer *lexer) { lexer->advance(lexer, false); } static inline void skip(TSLexer *lexer) { lexer->advance(lexer, true); } static unsigned serialize(Scanner *scanner, char *buffer) { - uint16_t tag_count = scanner->tags.len > UINT16_MAX ? UINT16_MAX : scanner->tags.len; + uint16_t tag_count = scanner->tags.size > UINT16_MAX ? UINT16_MAX : scanner->tags.size; uint16_t serialized_tag_count = 0; unsigned size = sizeof(tag_count); @@ -124,9 +36,9 @@ static unsigned serialize(Scanner *scanner, char *buffer) { size += sizeof(tag_count); for (; serialized_tag_count < tag_count; serialized_tag_count++) { - Tag tag = scanner->tags.data[serialized_tag_count]; + Tag tag = scanner->tags.contents[serialized_tag_count]; if (tag.type == CUSTOM) { - unsigned name_length = tag.custom_tag_name.len; + unsigned name_length = tag.custom_tag_name.size; if (name_length > UINT8_MAX) { name_length = UINT8_MAX; } @@ -135,7 +47,7 @@ static unsigned serialize(Scanner *scanner, char *buffer) { } buffer[size++] = (char)tag.type; buffer[size++] = (char)name_length; - strncpy(&buffer[size], tag.custom_tag_name.data, name_length); + strncpy(&buffer[size], tag.custom_tag_name.contents, name_length); size += name_length; } else { if (size + 1 >= TREE_SITTER_SERIALIZATION_BUFFER_SIZE) { @@ -150,7 +62,11 @@ static unsigned serialize(Scanner *scanner, char *buffer) { } static void deserialize(Scanner *scanner, const char *buffer, unsigned length) { - VEC_CLEAR(scanner->tags); + for (unsigned i = 0; i < scanner->tags.size; i++) { + tag_free(&scanner->tags.contents[i]); + } + array_clear(&scanner->tags); + if (length > 0) { unsigned size = 0; uint16_t tag_count = 0; @@ -162,37 +78,34 @@ static void deserialize(Scanner *scanner, const char *buffer, unsigned length) { memcpy(&tag_count, &buffer[size], sizeof(tag_count)); size += sizeof(tag_count); - VEC_RESIZE(scanner->tags, tag_count); + array_reserve(&scanner->tags, tag_count); if (tag_count > 0) { unsigned iter = 0; for (iter = 0; iter < serialized_tag_count; iter++) { - Tag tag = new_tag(); + Tag tag = tag_new(); tag.type = (TagType)buffer[size++]; if (tag.type == CUSTOM) { uint16_t name_length = (uint8_t)buffer[size++]; - tag.custom_tag_name.len = name_length; - tag.custom_tag_name.cap = name_length; - tag.custom_tag_name.data = (char *)calloc(1, sizeof(char) * (name_length + 1)); - strncpy(tag.custom_tag_name.data, &buffer[size], name_length); + array_reserve(&tag.custom_tag_name, name_length); + tag.custom_tag_name.size = name_length; + memcpy(tag.custom_tag_name.contents, &buffer[size], name_length); size += name_length; } - VEC_PUSH(scanner->tags, tag); + array_push(&scanner->tags, tag); } // add zero tags if we didn't read enough, this is because the // buffer had no more room but we held more tags. for (; iter < tag_count; iter++) { - Tag tag = new_tag(); - VEC_PUSH(scanner->tags, tag); + array_push(&scanner->tags, tag_new()); } } } } static String scan_tag_name(TSLexer *lexer) { - String tag_name; - STRING_INIT(tag_name); + String tag_name = array_new(); while (iswalnum(lexer->lookahead) || lexer->lookahead == '-' || lexer->lookahead == ':') { - STRING_PUSH(tag_name, towupper(lexer->lookahead)); + array_push(&tag_name, towupper(lexer->lookahead)); advance(lexer); } return tag_name; @@ -230,13 +143,13 @@ static bool scan_comment(TSLexer *lexer) { } static bool scan_raw_text(Scanner *scanner, TSLexer *lexer) { - if (scanner->tags.len == 0) { + if (scanner->tags.size == 0) { return false; } lexer->mark_end(lexer); - const char *end_delimiter = VEC_BACK(scanner->tags).type == SCRIPT ? "tags)->type == SCRIPT ? "lookahead) { @@ -258,70 +171,73 @@ static bool scan_raw_text(Scanner *scanner, TSLexer *lexer) { } static bool scan_implicit_end_tag(Scanner *scanner, TSLexer *lexer) { - Tag *parent = scanner->tags.len == 0 ? NULL : &VEC_BACK(scanner->tags); + Tag *parent = scanner->tags.size == 0 ? NULL : array_back(&scanner->tags); bool is_closing_tag = false; if (lexer->lookahead == '/') { is_closing_tag = true; advance(lexer); } else { - if (parent && is_void(parent)) { - VEC_POP(scanner->tags); + if (parent && tag_is_void(parent)) { + array_pop(&scanner->tags); lexer->result_symbol = IMPLICIT_END_TAG; return true; } } String tag_name = scan_tag_name(lexer); - if (tag_name.len == 0 && !lexer->eof(lexer)) { - STRING_FREE(tag_name); + if (tag_name.size == 0 && !lexer->eof(lexer)) { + array_delete(&tag_name); return false; } - Tag next_tag = for_name(tag_name.data); + Tag next_tag = tag_for_name(tag_name); if (is_closing_tag) { // The tag correctly closes the topmost element on the stack - if (scanner->tags.len > 0 && tagcmp(&VEC_BACK(scanner->tags), &next_tag)) { - STRING_FREE(tag_name); + if (scanner->tags.size > 0 && tag_eq(array_back(&scanner->tags), &next_tag)) { tag_free(&next_tag); return false; } // Otherwise, dig deeper and queue implicit end tags (to be nice in // the case of malformed HTML) - for (unsigned i = scanner->tags.len; i > 0; i--) { - if (scanner->tags.data[i - 1].type == next_tag.type) { - VEC_POP(scanner->tags); + for (unsigned i = scanner->tags.size; i > 0; i--) { + if (scanner->tags.contents[i - 1].type == next_tag.type) { + Tag popped_tag = array_pop(&scanner->tags); + tag_free(&popped_tag); lexer->result_symbol = IMPLICIT_END_TAG; - STRING_FREE(tag_name); tag_free(&next_tag); return true; } } - } else if (parent && - (!can_contain(parent, &next_tag) || - (parent->type == HTML || parent->type == HEAD || parent->type == BODY) && lexer->eof(lexer))) { - VEC_POP(scanner->tags); + } else if ( + parent && + ( + !tag_can_contain(parent, &next_tag) || + (parent->type == HTML || parent->type == HEAD || parent->type == BODY) && lexer->eof(lexer) + ) + ) { + Tag popped_tag = array_pop(&scanner->tags); + tag_free(&popped_tag); lexer->result_symbol = IMPLICIT_END_TAG; - STRING_FREE(tag_name); tag_free(&next_tag); return true; } - STRING_FREE(tag_name); tag_free(&next_tag); return false; } static bool scan_start_tag_name(Scanner *scanner, TSLexer *lexer) { String tag_name = scan_tag_name(lexer); - if (tag_name.len == 0) { - STRING_FREE(tag_name); + if (tag_name.size == 0) { + array_delete(&tag_name); return false; } - Tag tag = for_name(tag_name.data); - VEC_PUSH(scanner->tags, tag); + + Tag tag = tag_for_name(tag_name); + array_push(&scanner->tags, tag); switch (tag.type) { case SCRIPT: lexer->result_symbol = SCRIPT_START_TAG_NAME; @@ -333,25 +249,27 @@ static bool scan_start_tag_name(Scanner *scanner, TSLexer *lexer) { lexer->result_symbol = START_TAG_NAME; break; } - STRING_FREE(tag_name); return true; } static bool scan_end_tag_name(Scanner *scanner, TSLexer *lexer) { String tag_name = scan_tag_name(lexer); - if (tag_name.len == 0) { - STRING_FREE(tag_name); + + if (tag_name.size == 0) { + array_delete(&tag_name); return false; } - Tag tag = for_name(tag_name.data); - if (scanner->tags.len > 0 && tagcmp(&VEC_BACK(scanner->tags), &tag)) { - VEC_POP(scanner->tags); + + Tag tag = tag_for_name(tag_name); + if (scanner->tags.size > 0 && tag_eq(array_back(&scanner->tags), &tag)) { + Tag popped_tag = array_pop(&scanner->tags); + tag_free(&popped_tag); lexer->result_symbol = END_TAG_NAME; } else { lexer->result_symbol = ERRONEOUS_END_TAG_NAME; } + tag_free(&tag); - STRING_FREE(tag_name); return true; } @@ -359,8 +277,9 @@ static bool scan_self_closing_tag_delimiter(Scanner *scanner, TSLexer *lexer) { advance(lexer); if (lexer->lookahead == '>') { advance(lexer); - if (scanner->tags.len > 0) { - VEC_POP(scanner->tags); + if (scanner->tags.size > 0) { + Tag popped_tag = array_pop(&scanner->tags); + tag_free(&popped_tag); lexer->result_symbol = SELF_CLOSING_TAG_DELIMITER; } return true; @@ -369,9 +288,6 @@ static bool scan_self_closing_tag_delimiter(Scanner *scanner, TSLexer *lexer) { } static bool scan(Scanner *scanner, TSLexer *lexer, const bool *valid_symbols) { - if (scanner->tags.len > 0) { - Tag *parent = &VEC_BACK(scanner->tags); - } if (valid_symbols[RAW_TEXT] && !valid_symbols[START_TAG_NAME] && !valid_symbols[END_TAG_NAME]) { return scan_raw_text(scanner, lexer); } @@ -439,9 +355,9 @@ void tree_sitter_html_external_scanner_deserialize(void *payload, const char *bu void tree_sitter_html_external_scanner_destroy(void *payload) { Scanner *scanner = (Scanner *)payload; - for (unsigned i = 0; i < scanner->tags.len; i++) { - STRING_FREE(scanner->tags.data[i].custom_tag_name); + for (unsigned i = 0; i < scanner->tags.size; i++) { + tag_free(&scanner->tags.contents[i]); } - VEC_FREE(scanner->tags); + array_delete(&scanner->tags); free(scanner); } diff --git a/src/tag.h b/src/tag.h index 8c34f8f..2e046bd 100644 --- a/src/tag.h +++ b/src/tag.h @@ -1,5 +1,5 @@ #include "tree_sitter/parser.h" - +#include "tree_sitter/array.h" #include #include @@ -137,11 +137,7 @@ typedef enum { END_, } TagType; -typedef struct { - uint32_t len; - uint32_t cap; - char *data; -} String; +typedef Array(char) String; typedef struct { char tag_name[16]; @@ -289,59 +285,65 @@ static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = { NAV, OL, P, PRE, SECTION, }; -static TagType get_tag_from_string(const char *tag_name) { +static TagType tag_type_for_name(const String *tag_name) { for (int i = 0; i < 126; i++) { - if (strcmp(TAG_TYPES_BY_TAG_NAME[i].tag_name, tag_name) == 0) { + const char *name = TAG_TYPES_BY_TAG_NAME[i].tag_name; + if ( + strlen(name) == tag_name->size && + strncmp(tag_name->contents, TAG_TYPES_BY_TAG_NAME[i].tag_name, tag_name->size) == 0 + ) { return TAG_TYPES_BY_TAG_NAME[i].tag_value; } } return CUSTOM; } -static inline Tag new_tag() { +static inline Tag tag_new() { Tag tag; tag.type = END_; - tag.custom_tag_name.data = NULL; - tag.custom_tag_name.len = 0; - tag.custom_tag_name.cap = 0; + tag.custom_tag_name = (String) array_new(); return tag; } -static Tag make_tag(TagType type, const char *name) { - Tag tag = new_tag(); - tag.type = type; - if (type == CUSTOM) { - tag.custom_tag_name.len = (uint32_t)strlen(name); - tag.custom_tag_name.data = - (char *)calloc(1, sizeof(char) * (tag.custom_tag_name.len + 1)); - strncpy(tag.custom_tag_name.data, name, tag.custom_tag_name.len); +static inline Tag tag_for_name(String name) { + Tag tag = tag_new(); + tag.type = tag_type_for_name(&name); + if (tag.type == CUSTOM) { + tag.custom_tag_name = name; + } else { + array_delete(&name); } return tag; } static inline void tag_free(Tag *tag) { if (tag->type == CUSTOM) { - free(tag->custom_tag_name.data); + array_delete(&tag->custom_tag_name); } - tag->custom_tag_name.data = NULL; -} - -static inline bool is_void(const Tag *tag) { - return tag->type < END_OF_VOID_TAGS; } -static inline Tag for_name(const char *name) { - return make_tag(get_tag_from_string(name), name); +static inline bool tag_is_void(const Tag *self) { + return self->type < END_OF_VOID_TAGS; } -static inline bool tagcmp(const Tag *_tag1, const Tag *_tag2) { - return _tag1->type == _tag2->type && - (_tag1->type == CUSTOM ? strcmp(_tag1->custom_tag_name.data, - _tag2->custom_tag_name.data) == 0 - : true); +static inline bool tag_eq(const Tag *self, const Tag *other) { + if (self->type != other->type) return false; + if (self->type == CUSTOM) { + if (self->custom_tag_name.size != other->custom_tag_name.size) { + return false; + } + if (memcmp( + self->custom_tag_name.contents, + other->custom_tag_name.contents, + self->custom_tag_name.size + ) != 0) { + return false; + } + } + return true; } -static bool can_contain(Tag *self, const Tag *other) { +static bool tag_can_contain(Tag *self, const Tag *other) { TagType child = other->type; switch (self->type) { diff --git a/src/tree_sitter/alloc.h b/src/tree_sitter/alloc.h new file mode 100644 index 0000000..1f4466d --- /dev/null +++ b/src/tree_sitter/alloc.h @@ -0,0 +1,54 @@ +#ifndef TREE_SITTER_ALLOC_H_ +#define TREE_SITTER_ALLOC_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +// Allow clients to override allocation functions +#ifdef TREE_SITTER_REUSE_ALLOCATOR + +extern void *(*ts_current_malloc)(size_t); +extern void *(*ts_current_calloc)(size_t, size_t); +extern void *(*ts_current_realloc)(void *, size_t); +extern void (*ts_current_free)(void *); + +#ifndef ts_malloc +#define ts_malloc ts_current_malloc +#endif +#ifndef ts_calloc +#define ts_calloc ts_current_calloc +#endif +#ifndef ts_realloc +#define ts_realloc ts_current_realloc +#endif +#ifndef ts_free +#define ts_free ts_current_free +#endif + +#else + +#ifndef ts_malloc +#define ts_malloc malloc +#endif +#ifndef ts_calloc +#define ts_calloc calloc +#endif +#ifndef ts_realloc +#define ts_realloc realloc +#endif +#ifndef ts_free +#define ts_free free +#endif + +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ALLOC_H_ diff --git a/src/tree_sitter/array.h b/src/tree_sitter/array.h new file mode 100644 index 0000000..15a3b23 --- /dev/null +++ b/src/tree_sitter/array.h @@ -0,0 +1,290 @@ +#ifndef TREE_SITTER_ARRAY_H_ +#define TREE_SITTER_ARRAY_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "./alloc.h" + +#include +#include +#include +#include +#include + +#ifdef _MSC_VER +#pragma warning(disable : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif + +#define Array(T) \ + struct { \ + T *contents; \ + uint32_t size; \ + uint32_t capacity; \ + } + +/// Initialize an array. +#define array_init(self) \ + ((self)->size = 0, (self)->capacity = 0, (self)->contents = NULL) + +/// Create an empty array. +#define array_new() \ + { NULL, 0, 0 } + +/// Get a pointer to the element at a given `index` in the array. +#define array_get(self, _index) \ + (assert((uint32_t)(_index) < (self)->size), &(self)->contents[_index]) + +/// Get a pointer to the first element in the array. +#define array_front(self) array_get(self, 0) + +/// Get a pointer to the last element in the array. +#define array_back(self) array_get(self, (self)->size - 1) + +/// Clear the array, setting its size to zero. Note that this does not free any +/// memory allocated for the array's contents. +#define array_clear(self) ((self)->size = 0) + +/// Reserve `new_capacity` elements of space in the array. If `new_capacity` is +/// less than the array's current capacity, this function has no effect. +#define array_reserve(self, new_capacity) \ + _array__reserve((Array *)(self), array_elem_size(self), new_capacity) + +/// Free any memory allocated for this array. Note that this does not free any +/// memory allocated for the array's contents. +#define array_delete(self) _array__delete((Array *)(self)) + +/// Push a new `element` onto the end of the array. +#define array_push(self, element) \ + (_array__grow((Array *)(self), 1, array_elem_size(self)), \ + (self)->contents[(self)->size++] = (element)) + +/// Increase the array's size by `count` elements. +/// New elements are zero-initialized. +#define array_grow_by(self, count) \ + do { \ + if ((count) == 0) break; \ + _array__grow((Array *)(self), count, array_elem_size(self)); \ + memset((self)->contents + (self)->size, 0, (count) * array_elem_size(self)); \ + (self)->size += (count); \ + } while (0) + +/// Append all elements from one array to the end of another. +#define array_push_all(self, other) \ + array_extend((self), (other)->size, (other)->contents) + +/// Append `count` elements to the end of the array, reading their values from the +/// `contents` pointer. +#define array_extend(self, count, contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), (self)->size, \ + 0, count, contents \ + ) + +/// Remove `old_count` elements from the array starting at the given `index`. At +/// the same index, insert `new_count` new elements, reading their values from the +/// `new_contents` pointer. +#define array_splice(self, _index, old_count, new_count, new_contents) \ + _array__splice( \ + (Array *)(self), array_elem_size(self), _index, \ + old_count, new_count, new_contents \ + ) + +/// Insert one `element` into the array at the given `index`. +#define array_insert(self, _index, element) \ + _array__splice((Array *)(self), array_elem_size(self), _index, 0, 1, &(element)) + +/// Remove one element from the array at the given `index`. +#define array_erase(self, _index) \ + _array__erase((Array *)(self), array_elem_size(self), _index) + +/// Pop the last element off the array, returning the element by value. +#define array_pop(self) ((self)->contents[--(self)->size]) + +/// Assign the contents of one array to another, reallocating if necessary. +#define array_assign(self, other) \ + _array__assign((Array *)(self), (const Array *)(other), array_elem_size(self)) + +/// Swap one array with another +#define array_swap(self, other) \ + _array__swap((Array *)(self), (Array *)(other)) + +/// Get the size of the array contents +#define array_elem_size(self) (sizeof *(self)->contents) + +/// Search a sorted array for a given `needle` value, using the given `compare` +/// callback to determine the order. +/// +/// If an existing element is found to be equal to `needle`, then the `index` +/// out-parameter is set to the existing value's index, and the `exists` +/// out-parameter is set to true. Otherwise, `index` is set to an index where +/// `needle` should be inserted in order to preserve the sorting, and `exists` +/// is set to false. +#define array_search_sorted_with(self, compare, needle, _index, _exists) \ + _array__search_sorted(self, 0, compare, , needle, _index, _exists) + +/// Search a sorted array for a given `needle` value, using integer comparisons +/// of a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_with`. +#define array_search_sorted_by(self, field, needle, _index, _exists) \ + _array__search_sorted(self, 0, _compare_int, field, needle, _index, _exists) + +/// Insert a given `value` into a sorted array, using the given `compare` +/// callback to determine the order. +#define array_insert_sorted_with(self, compare, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_with(self, compare, &(value), &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +/// Insert a given `value` into a sorted array, using integer comparisons of +/// a given struct field (specified with a leading dot) to determine the order. +/// +/// See also `array_search_sorted_by`. +#define array_insert_sorted_by(self, field, value) \ + do { \ + unsigned _index, _exists; \ + array_search_sorted_by(self, field, (value) field, &_index, &_exists); \ + if (!_exists) array_insert(self, _index, value); \ + } while (0) + +// Private + +typedef Array(void) Array; + +/// This is not what you're looking for, see `array_delete`. +static inline void _array__delete(Array *self) { + if (self->contents) { + ts_free(self->contents); + self->contents = NULL; + self->size = 0; + self->capacity = 0; + } +} + +/// This is not what you're looking for, see `array_erase`. +static inline void _array__erase(Array *self, size_t element_size, + uint32_t index) { + assert(index < self->size); + char *contents = (char *)self->contents; + memmove(contents + index * element_size, contents + (index + 1) * element_size, + (self->size - index - 1) * element_size); + self->size--; +} + +/// This is not what you're looking for, see `array_reserve`. +static inline void _array__reserve(Array *self, size_t element_size, uint32_t new_capacity) { + if (new_capacity > self->capacity) { + if (self->contents) { + self->contents = ts_realloc(self->contents, new_capacity * element_size); + } else { + self->contents = ts_malloc(new_capacity * element_size); + } + self->capacity = new_capacity; + } +} + +/// This is not what you're looking for, see `array_assign`. +static inline void _array__assign(Array *self, const Array *other, size_t element_size) { + _array__reserve(self, element_size, other->size); + self->size = other->size; + memcpy(self->contents, other->contents, self->size * element_size); +} + +/// This is not what you're looking for, see `array_swap`. +static inline void _array__swap(Array *self, Array *other) { + Array swap = *other; + *other = *self; + *self = swap; +} + +/// This is not what you're looking for, see `array_push` or `array_grow_by`. +static inline void _array__grow(Array *self, uint32_t count, size_t element_size) { + uint32_t new_size = self->size + count; + if (new_size > self->capacity) { + uint32_t new_capacity = self->capacity * 2; + if (new_capacity < 8) new_capacity = 8; + if (new_capacity < new_size) new_capacity = new_size; + _array__reserve(self, element_size, new_capacity); + } +} + +/// This is not what you're looking for, see `array_splice`. +static inline void _array__splice(Array *self, size_t element_size, + uint32_t index, uint32_t old_count, + uint32_t new_count, const void *elements) { + uint32_t new_size = self->size + new_count - old_count; + uint32_t old_end = index + old_count; + uint32_t new_end = index + new_count; + assert(old_end <= self->size); + + _array__reserve(self, element_size, new_size); + + char *contents = (char *)self->contents; + if (self->size > old_end) { + memmove( + contents + new_end * element_size, + contents + old_end * element_size, + (self->size - old_end) * element_size + ); + } + if (new_count > 0) { + if (elements) { + memcpy( + (contents + index * element_size), + elements, + new_count * element_size + ); + } else { + memset( + (contents + index * element_size), + 0, + new_count * element_size + ); + } + } + self->size += new_count - old_count; +} + +/// A binary search routine, based on Rust's `std::slice::binary_search_by`. +/// This is not what you're looking for, see `array_search_sorted_with` or `array_search_sorted_by`. +#define _array__search_sorted(self, start, compare, suffix, needle, _index, _exists) \ + do { \ + *(_index) = start; \ + *(_exists) = false; \ + uint32_t size = (self)->size - *(_index); \ + if (size == 0) break; \ + int comparison; \ + while (size > 1) { \ + uint32_t half_size = size / 2; \ + uint32_t mid_index = *(_index) + half_size; \ + comparison = compare(&((self)->contents[mid_index] suffix), (needle)); \ + if (comparison <= 0) *(_index) = mid_index; \ + size -= half_size; \ + } \ + comparison = compare(&((self)->contents[*(_index)] suffix), (needle)); \ + if (comparison == 0) *(_exists) = true; \ + else if (comparison < 0) *(_index) += 1; \ + } while (0) + +/// Helper macro for the `_sorted_by` routines below. This takes the left (existing) +/// parameter by reference in order to work with the generic sorting function above. +#define _compare_int(a, b) ((int)*(a) - (int)(b)) + +#ifdef _MSC_VER +#pragma warning(default : 4101) +#elif defined(__GNUC__) || defined(__clang__) +#pragma GCC diagnostic pop +#endif + +#ifdef __cplusplus +} +#endif + +#endif // TREE_SITTER_ARRAY_H_ From 5d081f2fff2e0386794ab9ed4f4199f5a810b7e2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 18 Mar 2024 11:30:23 -0700 Subject: [PATCH 3/3] Use memcmp instead of strncmp because the length is handled explicitly Co-authored-by: Marshall --- src/tag.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/tag.h b/src/tag.h index 2e046bd..6884fbd 100644 --- a/src/tag.h +++ b/src/tag.h @@ -141,15 +141,15 @@ typedef Array(char) String; typedef struct { char tag_name[16]; - TagType tag_value; -} TagMap; + TagType tag_type; +} TagMapEntry; typedef struct { TagType type; String custom_tag_name; } Tag; -static const TagMap TAG_TYPES_BY_TAG_NAME[126] = { +static const TagMapEntry TAG_TYPES_BY_TAG_NAME[126] = { {"AREA", AREA }, {"BASE", BASE }, {"BASEFONT", BASEFONT }, @@ -287,12 +287,12 @@ static const TagType TAG_TYPES_NOT_ALLOWED_IN_PARAGRAPHS[] = { static TagType tag_type_for_name(const String *tag_name) { for (int i = 0; i < 126; i++) { - const char *name = TAG_TYPES_BY_TAG_NAME[i].tag_name; + const TagMapEntry *entry = &TAG_TYPES_BY_TAG_NAME[i]; if ( - strlen(name) == tag_name->size && - strncmp(tag_name->contents, TAG_TYPES_BY_TAG_NAME[i].tag_name, tag_name->size) == 0 + strlen(entry->tag_name) == tag_name->size && + memcmp(tag_name->contents, entry->tag_name, tag_name->size) == 0 ) { - return TAG_TYPES_BY_TAG_NAME[i].tag_value; + return entry->tag_type; } } return CUSTOM;