Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Special-case ## pasting to string/character constants (issue #168) #255

Merged
merged 2 commits into from
Mar 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion run-tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def cleanup(out):
'has_attribute.cpp',
'header_lookup1.c', # missing include <stddef.h>
'line-directive-output.c',
'macro_paste_hashhash.c',
# 'macro_paste_hashhash.c',
danmar marked this conversation as resolved.
Show resolved Hide resolved
'microsoft-ext.c',
'normalize-3.c', # gcc has different output \uAC00 vs \U0000AC00 on cygwin/linux
'pr63831-1.c', # __has_attribute => works differently on cygwin/linux
Expand Down
114 changes: 72 additions & 42 deletions simplecpp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ static bool isOct(const std::string &s)
return s.size()>1 && (s[0]=='0') && (s[1] >= '0') && (s[1] < '8');
}

static bool isStringLiteral(const std::string &s)
{
return s.size() > 1 && (s[0]=='\"') && (*s.rbegin()=='\"');
patrickdowling marked this conversation as resolved.
Show resolved Hide resolved
}

static bool isCharLiteral(const std::string &s)
{
// char literal patterns can include 'a', '\t', '\000', '\xff', 'abcd', and maybe ''
// This only checks for the surrounding '' but doesn't parse the content.
return s.size() > 1 && (s[0]=='\'') && (*s.rbegin()=='\'');
}

static const simplecpp::TokenString DEFINE("define");
static const simplecpp::TokenString UNDEF("undef");
Expand Down Expand Up @@ -1922,7 +1933,8 @@ namespace simplecpp {
throw invalidHashHash(tok->location, name());

bool canBeConcatenatedWithEqual = A->isOneOf("+-*/%&|^") || A->str() == "<<" || A->str() == ">>";
if (!A->name && !A->number && A->op != ',' && !A->str().empty() && !canBeConcatenatedWithEqual)
bool canBeConcatenatedStringOrChar = isStringLiteral(A->str()) || isCharLiteral(A->str());
if (!A->name && !A->number && A->op != ',' && !A->str().empty() && !canBeConcatenatedWithEqual && !canBeConcatenatedStringOrChar)
throw invalidHashHash(tok->location, name());

Token *B = tok->next->next;
Expand All @@ -1933,55 +1945,73 @@ namespace simplecpp {
(!canBeConcatenatedWithEqual && B->op == '='))
throw invalidHashHash(tok->location, name());

std::string strAB;

const bool varargs = variadic && args.size() >= 1U && B->str() == args[args.size()-1U];
// Superficial check; more in-depth would in theory be possible _after_ expandArg
if (canBeConcatenatedStringOrChar && (B->number || !B->name))
throw invalidHashHash(tok->location, name());

TokenList tokensB(files);
if (expandArg(&tokensB, B, parametertokens)) {
if (tokensB.empty())
strAB = A->str();
else if (varargs && A->op == ',') {
strAB = ",";
const Token *nextTok = B->next;

if (canBeConcatenatedStringOrChar) {
// It seems clearer to handle this case separately even though the code is similar-ish, but we don't want to merge here.
// TODO The question is whether the ## or varargs may still apply, and how to provoke?
if (expandArg(&tokensB, B, parametertokens)) {
for (Token *b = tokensB.front(); b; b = b->next)
b->location = loc;
} else {
strAB = A->str() + tokensB.cfront()->str();
tokensB.deleteToken(tokensB.front());
tokensB.push_back(new Token(*B));
tokensB.back()->location = loc;
}
} else {
strAB = A->str() + B->str();
}

const Token *nextTok = B->next;
if (varargs && tokensB.empty() && tok->previous->str() == ",")
output->deleteToken(A);
else if (strAB != "," && macros.find(strAB) == macros.end()) {
A->setstr(strAB);
for (Token *b = tokensB.front(); b; b = b->next)
b->location = loc;
output->takeTokens(tokensB);
} else if (nextTok->op == '#' && nextTok->next->op == '#') {
TokenList output2(files);
output2.push_back(new Token(strAB, tok->location));
nextTok = expandHashHash(&output2, loc, nextTok, macros, expandedmacros, parametertokens);
output->deleteToken(A);
output->takeTokens(output2);
} else {
output->deleteToken(A);
TokenList tokens(files);
tokens.push_back(new Token(strAB, tok->location));
// for function like macros, push the (...)
if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') {
const MacroMap::const_iterator it = macros.find(strAB);
if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) {
const Token *tok2 = appendTokens(&tokens, loc, B->next, macros, expandedmacros, parametertokens);
if (tok2)
nextTok = tok2->next;
std::string strAB;

const bool varargs = variadic && args.size() >= 1U && B->str() == args[args.size()-1U];

if (expandArg(&tokensB, B, parametertokens)) {
if (tokensB.empty())
strAB = A->str();
else if (varargs && A->op == ',') {
strAB = ",";
} else {
strAB = A->str() + tokensB.cfront()->str();
tokensB.deleteToken(tokensB.front());
}
} else {
strAB = A->str() + B->str();
}

if (varargs && tokensB.empty() && tok->previous->str() == ",")
output->deleteToken(A);
else if (strAB != "," && macros.find(strAB) == macros.end()) {
A->setstr(strAB);
for (Token *b = tokensB.front(); b; b = b->next)
b->location = loc;
output->takeTokens(tokensB);
} else if (nextTok->op == '#' && nextTok->next->op == '#') {
TokenList output2(files);
output2.push_back(new Token(strAB, tok->location));
nextTok = expandHashHash(&output2, loc, nextTok, macros, expandedmacros, parametertokens);
output->deleteToken(A);
output->takeTokens(output2);
} else {
output->deleteToken(A);
TokenList tokens(files);
tokens.push_back(new Token(strAB, tok->location));
// for function like macros, push the (...)
if (tokensB.empty() && sameline(B,B->next) && B->next->op=='(') {
const MacroMap::const_iterator it = macros.find(strAB);
if (it != macros.end() && expandedmacros.find(strAB) == expandedmacros.end() && it->second.functionLike()) {
const Token *tok2 = appendTokens(&tokens, loc, B->next, macros, expandedmacros, parametertokens);
if (tok2)
nextTok = tok2->next;
}
}
expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens);
for (Token *b = tokensB.front(); b; b = b->next)
b->location = loc;
output->takeTokens(tokensB);
}
expandToken(output, loc, tokens.cfront(), macros, expandedmacros, parametertokens);
for (Token *b = tokensB.front(); b; b = b->next)
b->location = loc;
output->takeTokens(tokensB);
}

return nextTok;
Expand Down
120 changes: 120 additions & 0 deletions test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1071,6 +1071,105 @@ static void hashhash13()
ASSERT_EQUALS("\n& ab", preprocess(code2));
}

static void hashhash_string_literal()
{
const char code[] =
"#define UL(x) x##_ul\n"
"\"ABC\"_ul;\n"
"UL(\"ABC\");";

ASSERT_EQUALS("\n\"ABC\" _ul ;\n\"ABC\" _ul ;", preprocess(code));
}

static void hashhash_string_wrapped()
{
const char code[] =
"#define CONCAT(a,b) a##b\n"
"#define STR(x) CONCAT(x,s)\n"
"STR(\"ABC\");";

ASSERT_EQUALS("\n\n\"ABC\" s ;", preprocess(code));
}

static void hashhash_char_literal()
{
const char code[] =
"#define CH(x) x##_ch\n"
"CH('a');";

ASSERT_EQUALS("\n'a' _ch ;", preprocess(code));
}

static void hashhash_multichar_literal()
{
const char code[] =
"#define CH(x) x##_ch\n"
"CH('abcd');";

ASSERT_EQUALS("\n'abcd' _ch ;", preprocess(code));
}

static void hashhash_char_escaped()
{
const char code[] =
"#define CH(x) x##_ch\n"
"CH('\\'');";

ASSERT_EQUALS("\n'\\'' _ch ;", preprocess(code));
}

static void hashhash_string_nothing()
{
const char code[] =
"#define CONCAT(a,b) a##b\n"
"CONCAT(\"ABC\",);";

ASSERT_EQUALS("\n\"ABC\" ;", preprocess(code));
}

static void hashhash_string_char()
{
const char code[] =
"#define CONCAT(a,b) a##b\n"
"CONCAT(\"ABC\", 'c');";

// This works, but maybe shouldn't since the result isn't useful.
ASSERT_EQUALS("\n\"ABC\" 'c' ;", preprocess(code));
}

static void hashhash_string_name()
{
const char code[] =
"#define CONCAT(a,b) a##b\n"
"#define LIT _literal\n"
"CONCAT(\"string\", LIT);";

// TODO is this correct? clang fails because that's not really a valid thing but gcc seems to accept it
// see https://gist.github.com/patrickdowling/877a25294f069bf059f3b07f9b5b7039

ASSERT_EQUALS("\n\n\"string\" LIT ;", preprocess(code));
}

static void hashhashhash_int_literal()
{
const char code[] =
"#define CONCAT(a,b,c) a##b##c\n"
"#define PASTER(a,b,c) CONCAT(a,b,c)\n"
"PASTER(\"123\",_i,ul);";

ASSERT_EQUALS("\n\n\"123\" _iul ;", preprocess(code));
}

static void hashhash_int_literal()
{
const char code[] =
"#define PASTE(a,b) a##b\n"
"PASTE(123,_i);\n"
"1234_i;\n";

ASSERT_EQUALS("\n123_i ;\n1234_i ;", preprocess(code));
}

static void hashhash_invalid_1()
{
const char code[] = "#define f(a) (##x)\nf(1)";
Expand All @@ -1087,6 +1186,16 @@ static void hashhash_invalid_2()
ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'f', Invalid ## usage when expanding 'f'.\n", toString(outputList));
}

static void hashhash_invalid_3()
{
const char code[] =
"#define BAD(x) x##12345\nBAD(\"ABC\")";

simplecpp::OutputList outputList;
preprocess(code, simplecpp::DUI(), &outputList);
ASSERT_EQUALS("file0,1,syntax_error,failed to expand 'BAD', Invalid ## usage when expanding 'BAD'.\n", toString(outputList));
}

static void has_include_1()
{
const char code[] = "#ifdef __has_include\n"
Expand Down Expand Up @@ -2306,8 +2415,19 @@ int main(int argc, char **argv)
TEST_CASE(hashhash11); // #60: #define x # # #
TEST_CASE(hashhash12);
TEST_CASE(hashhash13);
TEST_CASE(hashhash_string_literal);
TEST_CASE(hashhash_string_wrapped);
TEST_CASE(hashhash_char_literal);
TEST_CASE(hashhash_multichar_literal);
TEST_CASE(hashhash_char_escaped);
TEST_CASE(hashhash_string_nothing);
TEST_CASE(hashhash_string_char);
TEST_CASE(hashhash_string_name);
TEST_CASE(hashhashhash_int_literal);
TEST_CASE(hashhash_int_literal);
TEST_CASE(hashhash_invalid_1);
TEST_CASE(hashhash_invalid_2);
TEST_CASE(hashhash_invalid_3);

// c++17 __has_include
TEST_CASE(has_include_1);
Expand Down