From ffb18f2475643aa7e1ab65747377a527a21321c4 Mon Sep 17 00:00:00 2001 From: TAKAI Kousuke <62541129+t-a-k@users.noreply.github.com> Date: Tue, 7 Jan 2025 00:09:12 +0900 Subject: [PATCH 1/3] utf8.c: Change the type of some length variables from SSize_t to Size_t in Perl_utf8_to_uv_msgs_helper_() Changed the type of `curlen`, `expectlen`, and `avail_len` from `SSize_t` to `Size_t`, to shut up compiler warnings about comparison between same-sized signed and unsigned values on 32-bit build. These variables represent the number of bytes which cannot be negative, so it should be safe to be unsigned type. --- utf8.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/utf8.c b/utf8.c index 71a4b89bee57..f679e806d2ac 100644 --- a/utf8.c +++ b/utf8.c @@ -1634,12 +1634,12 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, * than a single character */ const U8 * send = e; - SSize_t curlen = send - s0; + Size_t curlen = send - s0; U32 possible_problems; /* A bit is set here for each potential problem found as we go along */ UV uv = 0; - SSize_t expectlen; /* How long should this sequence be? */ - SSize_t avail_len; /* When input is too short, gives what that is */ + Size_t expectlen; /* How long should this sequence be? */ + Size_t avail_len; /* When input is too short, gives what that is */ dTHX; @@ -1892,7 +1892,7 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, * full length with occurrences of the smallest continuation * byte. For surrogates we could just look at the bytes, but * this single algorithm works for both those and supers. */ - for (unsigned i = curlen; i < expectlen; i++) { + for (Size_t i = curlen; i < expectlen; i++) { uv = UTF8_ACCUMULATE(uv, UTF8_MIN_CONTINUATION_BYTE); } } From 129e8d876baa3550acd8ab252cd67265ed21c6dc Mon Sep 17 00:00:00 2001 From: TAKAI Kousuke <62541129+t-a-k@users.noreply.github.com> Date: Wed, 8 Jan 2025 01:20:10 +0900 Subject: [PATCH 2/3] utf8.c: Fixed formatting specifier for croak() in Perl_utf8_to_uv_msgs_helper_ This call of croak() used to attempt to display U32 variable `this_problem` with %d format specifier, which triggered a compiler warning on 32-bit build where U32 is typedef'ed to unsigned long. --- utf8.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/utf8.c b/utf8.c index f679e806d2ac..f446f3c927db 100644 --- a/utf8.c +++ b/utf8.c @@ -2096,7 +2096,8 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, switch (this_problem) { default: Perl_croak(aTHX_ "panic: Unexpected case value in " - " utf8n_to_uvchr_msgs() %d", this_problem); + " utf8n_to_uvchr_msgs() %" U32uf, + this_problem); /* NOTREACHED */ break; From dad11fc6c65685ad6b7c204679ccd428a7488311 Mon Sep 17 00:00:00 2001 From: TAKAI Kousuke <62541129+t-a-k@users.noreply.github.com> Date: Sat, 11 Jan 2025 01:17:31 +0900 Subject: [PATCH 3/3] utf8.c: Postpone pointer subtraction until it turns out to be safe In Perl_utf8_to_uv_msgs_helper_(), "curlen = send - s0;" used to be done earlier in this function, but this subtraction might underflow as "send >= s0" (that is, "e >= s0") does not necessarily hold true. Thanks to @mauke and @tonycoz for pointing this out. --- utf8.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/utf8.c b/utf8.c index f446f3c927db..a7b48994ac07 100644 --- a/utf8.c +++ b/utf8.c @@ -1634,7 +1634,6 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, * than a single character */ const U8 * send = e; - Size_t curlen = send - s0; U32 possible_problems; /* A bit is set here for each potential problem found as we go along */ UV uv = 0; @@ -1723,11 +1722,13 @@ Perl_utf8_to_uv_msgs_helper_(const U8 * const s0, * allowed one, we could allow in something that shouldn't have been. */ - if (UNLIKELY(curlen <= 0)) { + Size_t curlen; + if (UNLIKELY(s0 >= send)) { possible_problems |= UTF8_GOT_EMPTY; curlen = 0; goto ready_to_handle_errors; } + curlen = send - s0; /* We now know we can examine the first byte of the input */ expectlen = UTF8SKIP(s0);