From 4b5e1f5c3694a6f2d64ad73a58603ee2818101a1 Mon Sep 17 00:00:00 2001 From: Patrick Dowling Date: Tue, 15 Mar 2022 09:33:59 +0100 Subject: [PATCH 1/2] Special-case ## pasting to string/character constants (issue #168) This enables use of macros to add literals/operator "". --- run-tests.py | 2 +- simplecpp.cpp | 112 +++++++++++++++++++++++++++++++------------------- test.cpp | 100 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 171 insertions(+), 43 deletions(-) diff --git a/run-tests.py b/run-tests.py index 0211ed04..3dc4a9b7 100644 --- a/run-tests.py +++ b/run-tests.py @@ -38,7 +38,7 @@ def cleanup(out): 'has_attribute.cpp', 'header_lookup1.c', # missing include 'line-directive-output.c', - 'macro_paste_hashhash.c', + # 'macro_paste_hashhash.c', 'microsoft-ext.c', 'normalize-3.c', # gcc has different output \uAC00 vs \U0000AC00 on cygwin/linux 'pr63831-1.c', # __has_attribute => works differently on cygwin/linux diff --git a/simplecpp.cpp b/simplecpp.cpp index 467e94eb..ed190d73 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -58,6 +58,15 @@ static bool isOct(const std::string &s) return s.size()>1 && (s[0]=='0') && (s[1] >= '0') && (s[1] < '8'); } +static bool isStringConstant(const std::string &s) +{ + return s.size() > 1 && (s[0]=='\"') && (*s.rbegin()=='\"'); +} + +static bool isCharConstant(const std::string &s) +{ + return s.size() == 3 && (s[0]=='\'') && (s[2]=='\''); +} static const simplecpp::TokenString DEFINE("define"); static const simplecpp::TokenString UNDEF("undef"); @@ -1922,7 +1931,8 @@ namespace simplecpp { throw invalidHashHash(tok->location, name()); bool canBeConcatenatedWithEqual = A->isOneOf("+-*/%&|^") || A->str() == "<<" || A->str() == ">>"; - if (!A->name && !A->number && A->op != ',' && !A->str().empty() && !canBeConcatenatedWithEqual) + bool canBeConcatenatedStringOrChar = isStringConstant(A->str()) || isCharConstant(A->str()); + if (!A->name && !A->number && A->op != ',' && !A->str().empty() && !canBeConcatenatedWithEqual && !canBeConcatenatedStringOrChar) throw invalidHashHash(tok->location, name()); Token *B = tok->next->next; @@ -1933,55 +1943,73 @@ namespace simplecpp { (!canBeConcatenatedWithEqual && B->op == '=')) throw invalidHashHash(tok->location, name()); - std::string strAB; - - const bool varargs = variadic && args.size() >= 1U && B->str() == args[args.size()-1U]; + // Superficial check; more in-depth would in theory be possible _after_ expandArg + if (canBeConcatenatedStringOrChar && (B->number || !B->name)) + throw invalidHashHash(tok->location, name()); TokenList tokensB(files); - if (expandArg(&tokensB, B, parametertokens)) { - if (tokensB.empty()) - strAB = A->str(); - else if (varargs && A->op == ',') { - strAB = ","; + const Token *nextTok = B->next; + + if (canBeConcatenatedStringOrChar) { + // It seems clearer to handle this case separately even though the code is similar-ish, but we don't want to merge here. + // TODO The question is whether the ## or varargs may still apply, and how to provoke? + if (expandArg(&tokensB, B, parametertokens)) { + for (Token *b = tokensB.front(); b; b = b->next) + b->location = loc; } else { - strAB = A->str() + tokensB.cfront()->str(); - tokensB.deleteToken(tokensB.front()); + tokensB.push_back(new Token(*B)); + tokensB.back()->location = loc; } - } else { - strAB = A->str() + B->str(); - } - - const Token *nextTok = B->next; - if (varargs && tokensB.empty() && tok->previous->str() == ",") - output->deleteToken(A); - else if (strAB != "," && macros.find(strAB) == macros.end()) { - A->setstr(strAB); - for (Token *b = tokensB.front(); b; b = b->next) - b->location = loc; output->takeTokens(tokensB); - } else if (nextTok->op == '#' && nextTok->next->op == '#') { - TokenList output2(files); - output2.push_back(new Token(strAB, tok->location)); - nextTok = expandHashHash(&output2, loc, nextTok, macros, expandedmacros, parametertokens); - output->deleteToken(A); - output->takeTokens(output2); } else { - output->deleteToken(A); - TokenList tokens(files); - tokens.push_back(new Token(strAB, tok->location)); - // for function like macros, push the (...) - if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') { - const MacroMap::const_iterator it = macros.find(strAB); - if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) { - const Token *tok2 = appendTokens(&tokens, loc, B->next, macros, expandedmacros, parametertokens); - if (tok2) - nextTok = tok2->next; + std::string strAB; + + const bool varargs = variadic && args.size() >= 1U && B->str() == args[args.size()-1U]; + + if (expandArg(&tokensB, B, parametertokens)) { + if (tokensB.empty()) + strAB = A->str(); + else if (varargs && A->op == ',') { + strAB = ","; + } else { + strAB = A->str() + tokensB.cfront()->str(); + tokensB.deleteToken(tokensB.front()); + } + } else { + strAB = A->str() + B->str(); + } + + if (varargs && tokensB.empty() && tok->previous->str() == ",") + output->deleteToken(A); + else if (strAB != "," && macros.find(strAB) == macros.end()) { + A->setstr(strAB); + for (Token *b = tokensB.front(); b; b = b->next) + b->location = loc; + output->takeTokens(tokensB); + } else if (nextTok->op == '#' && nextTok->next->op == '#') { + TokenList output2(files); + output2.push_back(new Token(strAB, tok->location)); + nextTok = expandHashHash(&output2, loc, nextTok, macros, expandedmacros, parametertokens); + output->deleteToken(A); + output->takeTokens(output2); + } else { + output->deleteToken(A); + TokenList tokens(files); + tokens.push_back(new Token(strAB, tok->location)); + // for function like macros, push the (...) + if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') { + const MacroMap::const_iterator it = macros.find(strAB); + if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) { + const Token *tok2 = appendTokens(&tokens, loc, B->next, macros, expandedmacros, parametertokens); + if (tok2) + nextTok = tok2->next; + } } + expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens); + for (Token *b = tokensB.front(); b; b = b->next) + b->location = loc; + output->takeTokens(tokensB); } - expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens); - for (Token *b = tokensB.front(); b; b = b->next) - b->location = loc; - output->takeTokens(tokensB); } return nextTok; diff --git a/test.cpp b/test.cpp index 9e12d48e..4cc5a482 100644 --- a/test.cpp +++ b/test.cpp @@ -1071,6 +1071,87 @@ static void hashhash13() ASSERT_EQUALS("\n& ab", preprocess(code2)); } +static void hashhash14() +{ + const char code[] = + "#define UL(x) x##_ul\n" + "\"ABC\"_ul;\n" + "UL(\"ABC\");"; + + ASSERT_EQUALS("\n\"ABC\" _ul ;\n\"ABC\" _ul ;", preprocess(code)); +} + +static void hashhash15() +{ + const char code[] = + "#define CONCAT(a,b) a##b\n" + "#define STR(x) CONCAT(x,s)\n" + "STR(\"ABC\");"; + + ASSERT_EQUALS("\n\n\"ABC\" s ;", preprocess(code)); +} + +static void hashhash16() +{ + const char code[] = + "#define CH(x) x##_c\n" + "CH('a');"; + + ASSERT_EQUALS("\n'a' _c ;", preprocess(code)); +} + +static void hashhash17() +{ + const char code[] = + "#define CONCAT(a,b) a##b\n" + "CONCAT(\"ABC\",);"; + + ASSERT_EQUALS("\n\"ABC\" ;", preprocess(code)); +} + +static void hashhash18() +{ + const char code[] = + "#define CONCAT(a,b) a##b\n" + "CONCAT(\"ABC\", 'c');"; + + // This works, but maybe shouldn't since the result isn't useful. + ASSERT_EQUALS("\n\"ABC\" 'c' ;", preprocess(code)); +} + +static void hashhash19() +{ + const char code[] = + "#define CONCAT(a,b) a##b\n" + "#define LIT _literal\n" + "CONCAT(\"string\", LIT);"; + + // TODO is this correct? clang fails because that's not really a valid thing but gcc seems to accept it + // see https://gist.github.com/patrickdowling/877a25294f069bf059f3b07f9b5b7039 + + ASSERT_EQUALS("\n\n\"string\" LIT ;", preprocess(code)); +} + +static void hashhash20() +{ + const char code[] = + "#define CONCAT(a,b,c) a##b##c\n" + "#define PASTER(a,b,c) CONCAT(a,b,c)\n" + "PASTER(\"123\",_i,ul);"; + + ASSERT_EQUALS("\n\n\"123\" _iul ;", preprocess(code)); +} + +static void hashhash21() +{ + const char code[] = + "#define PASTE(a,b) a##b\n" + "PASTE(123,_i);\n" + "1234_i;\n"; + + ASSERT_EQUALS("\n123_i ;\n1234_i ;", preprocess(code)); +} + static void hashhash_invalid_1() { const char code[] = "#define f(a) (##x)\nf(1)"; @@ -1087,6 +1168,16 @@ static void hashhash_invalid_2() ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'f', Invalid ## usage when expanding 'f'.\n", toString(outputList)); } +static void hashhash_invalid_3() +{ + const char code[] = + "#define BAD(x) x##12345\nBAD(\"ABC\")"; + + simplecpp::OutputList outputList; + preprocess(code, simplecpp::DUI(), &outputList); + ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'BAD', Invalid ## usage when expanding 'BAD'.\n", toString(outputList)); +} + static void has_include_1() { const char code[] = "#ifdef __has_include\n" @@ -2306,8 +2397,17 @@ int main(int argc, char **argv) TEST_CASE(hashhash11); // #60: #define x # # # TEST_CASE(hashhash12); TEST_CASE(hashhash13); + TEST_CASE(hashhash14); + TEST_CASE(hashhash15); + TEST_CASE(hashhash16); + TEST_CASE(hashhash17); + TEST_CASE(hashhash18); + TEST_CASE(hashhash19); + TEST_CASE(hashhash20); + TEST_CASE(hashhash21); TEST_CASE(hashhash_invalid_1); TEST_CASE(hashhash_invalid_2); + TEST_CASE(hashhash_invalid_3); // c++17 __has_include TEST_CASE(has_include_1); From d4c14a50f6fa21d08496158a79dfaeeb7d439d4a Mon Sep 17 00:00:00 2001 From: Patrick Dowling Date: Thu, 17 Mar 2022 19:25:16 +0100 Subject: [PATCH 2/2] Relax restrictions for char literals when pasting with ##. Additional/renamed tests. --- simplecpp.cpp | 10 +++++---- test.cpp | 56 ++++++++++++++++++++++++++++++++++----------------- 2 files changed, 44 insertions(+), 22 deletions(-) diff --git a/simplecpp.cpp b/simplecpp.cpp index ed190d73..ce3b156c 100755 --- a/simplecpp.cpp +++ b/simplecpp.cpp @@ -58,14 +58,16 @@ static bool isOct(const std::string &s) return s.size()>1 && (s[0]=='0') && (s[1] >= '0') && (s[1] < '8'); } -static bool isStringConstant(const std::string &s) +static bool isStringLiteral(const std::string &s) { return s.size() > 1 && (s[0]=='\"') && (*s.rbegin()=='\"'); } -static bool isCharConstant(const std::string &s) +static bool isCharLiteral(const std::string &s) { - return s.size() == 3 && (s[0]=='\'') && (s[2]=='\''); + // char literal patterns can include 'a', '\t', '\000', '\xff', 'abcd', and maybe '' + // This only checks for the surrounding '' but doesn't parse the content. + return s.size() > 1 && (s[0]=='\'') && (*s.rbegin()=='\''); } static const simplecpp::TokenString DEFINE("define"); @@ -1931,7 +1933,7 @@ namespace simplecpp { throw invalidHashHash(tok->location, name()); bool canBeConcatenatedWithEqual = A->isOneOf("+-*/%&|^") || A->str() == "<<" || A->str() == ">>"; - bool canBeConcatenatedStringOrChar = isStringConstant(A->str()) || isCharConstant(A->str()); + bool canBeConcatenatedStringOrChar = isStringLiteral(A->str()) || isCharLiteral(A->str()); if (!A->name && !A->number && A->op != ',' && !A->str().empty() && !canBeConcatenatedWithEqual && !canBeConcatenatedStringOrChar) throw invalidHashHash(tok->location, name()); diff --git a/test.cpp b/test.cpp index 4cc5a482..02de49b9 100644 --- a/test.cpp +++ b/test.cpp @@ -1071,7 +1071,7 @@ static void hashhash13() ASSERT_EQUALS("\n& ab", preprocess(code2)); } -static void hashhash14() +static void hashhash_string_literal() { const char code[] = "#define UL(x) x##_ul\n" @@ -1081,7 +1081,7 @@ static void hashhash14() ASSERT_EQUALS("\n\"ABC\" _ul ;\n\"ABC\" _ul ;", preprocess(code)); } -static void hashhash15() +static void hashhash_string_wrapped() { const char code[] = "#define CONCAT(a,b) a##b\n" @@ -1091,16 +1091,34 @@ static void hashhash15() ASSERT_EQUALS("\n\n\"ABC\" s ;", preprocess(code)); } -static void hashhash16() +static void hashhash_char_literal() { const char code[] = - "#define CH(x) x##_c\n" + "#define CH(x) x##_ch\n" "CH('a');"; - ASSERT_EQUALS("\n'a' _c ;", preprocess(code)); + ASSERT_EQUALS("\n'a' _ch ;", preprocess(code)); } -static void hashhash17() +static void hashhash_multichar_literal() +{ + const char code[] = + "#define CH(x) x##_ch\n" + "CH('abcd');"; + + ASSERT_EQUALS("\n'abcd' _ch ;", preprocess(code)); +} + +static void hashhash_char_escaped() +{ + const char code[] = + "#define CH(x) x##_ch\n" + "CH('\\'');"; + + ASSERT_EQUALS("\n'\\'' _ch ;", preprocess(code)); +} + +static void hashhash_string_nothing() { const char code[] = "#define CONCAT(a,b) a##b\n" @@ -1109,7 +1127,7 @@ static void hashhash17() ASSERT_EQUALS("\n\"ABC\" ;", preprocess(code)); } -static void hashhash18() +static void hashhash_string_char() { const char code[] = "#define CONCAT(a,b) a##b\n" @@ -1119,7 +1137,7 @@ static void hashhash18() ASSERT_EQUALS("\n\"ABC\" 'c' ;", preprocess(code)); } -static void hashhash19() +static void hashhash_string_name() { const char code[] = "#define CONCAT(a,b) a##b\n" @@ -1132,7 +1150,7 @@ static void hashhash19() ASSERT_EQUALS("\n\n\"string\" LIT ;", preprocess(code)); } -static void hashhash20() +static void hashhashhash_int_literal() { const char code[] = "#define CONCAT(a,b,c) a##b##c\n" @@ -1142,7 +1160,7 @@ static void hashhash20() ASSERT_EQUALS("\n\n\"123\" _iul ;", preprocess(code)); } -static void hashhash21() +static void hashhash_int_literal() { const char code[] = "#define PASTE(a,b) a##b\n" @@ -2397,14 +2415,16 @@ int main(int argc, char **argv) TEST_CASE(hashhash11); // #60: #define x # # # TEST_CASE(hashhash12); TEST_CASE(hashhash13); - TEST_CASE(hashhash14); - TEST_CASE(hashhash15); - TEST_CASE(hashhash16); - TEST_CASE(hashhash17); - TEST_CASE(hashhash18); - TEST_CASE(hashhash19); - TEST_CASE(hashhash20); - TEST_CASE(hashhash21); + TEST_CASE(hashhash_string_literal); + TEST_CASE(hashhash_string_wrapped); + TEST_CASE(hashhash_char_literal); + TEST_CASE(hashhash_multichar_literal); + TEST_CASE(hashhash_char_escaped); + TEST_CASE(hashhash_string_nothing); + TEST_CASE(hashhash_string_char); + TEST_CASE(hashhash_string_name); + TEST_CASE(hashhashhash_int_literal); + TEST_CASE(hashhash_int_literal); TEST_CASE(hashhash_invalid_1); TEST_CASE(hashhash_invalid_2); TEST_CASE(hashhash_invalid_3);