diff --git a/chapi-parser-toml/build.gradle.kts b/chapi-parser-toml/build.gradle.kts new file mode 100644 index 00000000..1069985a --- /dev/null +++ b/chapi-parser-toml/build.gradle.kts @@ -0,0 +1,58 @@ +plugins { + java + id("antlr") + kotlin("jvm") + kotlin("plugin.serialization") version "1.6.10" + + `jacoco-conventions` +} + +repositories { + mavenCentral() + mavenLocal() +} + +dependencies { + antlr("org.antlr:antlr4:4.13.1") + // project deps + implementation(project(":chapi-domain")) + implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.3.2") + + implementation(kotlin("stdlib-jdk8")) + implementation(kotlin("reflect")) + // Kotlin reflection. + testImplementation(kotlin("test")) + + // JUnit 5 + testImplementation("org.junit.jupiter:junit-jupiter-api:5.6.0") + testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:5.6.0") + testRuntimeOnly("org.junit.platform:junit-platform-console:1.6.0") + + implementation("org.antlr:antlr4:4.13.1") + implementation("org.antlr:antlr4-runtime:4.13.1") +} + +sourceSets.main { + java.srcDirs("${project.buildDir}/generated-src") +} + +tasks.generateGrammarSource { + maxHeapSize = "64m" + arguments = arguments + listOf("-package", "chapi.parser.toml") + listOf("-visitor", "-long-messages") + outputDirectory = file("${project.buildDir}/generated-src/chapi/parser/toml") +} + +tasks.withType { + +} + +tasks.named("compileKotlin") { + dependsOn(tasks.withType()) +} + +tasks.withType { + useJUnitPlatform() + testLogging { + events("passed", "skipped", "failed") + } +} diff --git a/chapi-parser-toml/src/main/antlr/TomlLexer.g4 b/chapi-parser-toml/src/main/antlr/TomlLexer.g4 new file mode 100644 index 00000000..e4e088a7 --- /dev/null +++ b/chapi-parser-toml/src/main/antlr/TomlLexer.g4 @@ -0,0 +1,149 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine +// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true + +lexer grammar TomlLexer; + +WS : [ \t]+ -> skip; +NL : ('\r'? '\n')+; +COMMENT : '#' (~[\n])*; +L_BRACKET : '['; +DOUBLE_L_BRACKET : '[['; +R_BRACKET : ']'; +DOUBLE_R_BRACKET : ']]'; +EQUALS : '=' -> pushMode(SIMPLE_VALUE_MODE); +DOT : '.'; +COMMA : ','; + +fragment DIGIT : [0-9]; +fragment ALPHA : [A-Za-z]; + +// strings +fragment ESC : '\\' (["\\/bfnrt] | UNICODE | EX_UNICODE); +fragment UNICODE : 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT; +fragment EX_UNICODE: + 'U' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT +; +BASIC_STRING : '"' (ESC | ~["\\\n])*? '"'; +LITERAL_STRING : '\'' (~['\n])*? '\''; + +// keys +UNQUOTED_KEY: (ALPHA | DIGIT | '-' | '_')+; + +mode SIMPLE_VALUE_MODE; + +VALUE_WS: WS -> skip; + +L_BRACE : '{' -> mode(INLINE_TABLE_MODE); +ARRAY_START : L_BRACKET -> type(L_BRACKET), mode(ARRAY_MODE); + +// booleans +BOOLEAN: ('true' | 'false') -> popMode; + +// strings +fragment ML_ESC : '\\' '\r'? '\n' | ESC; +VALUE_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING), popMode; +ML_BASIC_STRING : '"""' (ML_ESC | ~["\\])*? '"""' -> popMode; +VALUE_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING), popMode; +ML_LITERAL_STRING : '\'\'\'' (.)*? '\'\'\'' -> popMode; + +// floating point numbers +fragment EXP : ('e' | 'E') [+-]? ZERO_PREFIXABLE_INT; +fragment ZERO_PREFIXABLE_INT : DIGIT (DIGIT | '_' DIGIT)*; +fragment FRAC : '.' ZERO_PREFIXABLE_INT; +FLOAT : DEC_INT ( EXP | FRAC EXP?) -> popMode; +INF : [+-]? 'inf' -> popMode; +NAN : [+-]? 'nan' -> popMode; + +// integers +fragment HEX_DIGIT : [A-Fa-f] | DIGIT; +fragment DIGIT_1_9 : [1-9]; +fragment DIGIT_0_7 : [0-7]; +fragment DIGIT_0_1 : [0-1]; +DEC_INT : [+-]? (DIGIT | (DIGIT_1_9 (DIGIT | '_' DIGIT)+)) -> popMode; +HEX_INT : '0x' HEX_DIGIT (HEX_DIGIT | '_' HEX_DIGIT)* -> popMode; +OCT_INT : '0o' DIGIT_0_7 (DIGIT_0_7 | '_' DIGIT_0_7)* -> popMode; +BIN_INT : '0b' DIGIT_0_1 (DIGIT_0_1 | '_' DIGIT_0_1)* -> popMode; + +// dates +fragment YEAR : DIGIT DIGIT DIGIT DIGIT; +fragment MONTH : DIGIT DIGIT; +fragment DAY : DIGIT DIGIT; +fragment DELIM : 'T' | 't' | ' '; +fragment HOUR : DIGIT DIGIT; +fragment MINUTE : DIGIT DIGIT; +fragment SECOND : DIGIT DIGIT; +fragment SECFRAC : '.' DIGIT+; +fragment NUMOFFSET : ('+' | '-') HOUR ':' MINUTE; +fragment OFFSET : 'Z' | NUMOFFSET; +fragment PARTIAL_TIME : HOUR ':' MINUTE ':' SECOND SECFRAC?; +fragment FULL_DATE : YEAR '-' MONTH '-' DAY; +fragment FULL_TIME : PARTIAL_TIME OFFSET; +OFFSET_DATE_TIME : FULL_DATE DELIM FULL_TIME -> popMode; +LOCAL_DATE_TIME : FULL_DATE DELIM PARTIAL_TIME -> popMode; +LOCAL_DATE : FULL_DATE -> popMode; +LOCAL_TIME : PARTIAL_TIME -> popMode; + +mode INLINE_TABLE_MODE; + +INLINE_TABLE_WS : WS -> skip; +INLINE_TABLE_KEY_DOT : DOT -> type(DOT); +INLINE_TABLE_COMMA : COMMA -> type(COMMA); +R_BRACE : '}' -> popMode; + +INLINE_TABLE_KEY_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING); +INLINE_TABLE_KEY_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING); +INLINE_TABLE_KEY_UNQUOTED : UNQUOTED_KEY -> type(UNQUOTED_KEY); + +INLINE_TABLE_EQUALS: EQUALS -> type(EQUALS), pushMode(SIMPLE_VALUE_MODE); + +mode ARRAY_MODE; + +ARRAY_WS : WS -> skip; +ARRAY_NL : NL -> type(NL); +ARRAY_COMMENT : COMMENT -> type(COMMENT); +ARRAY_COMMA : COMMA -> type(COMMA); + +ARRAY_INLINE_TABLE_START : L_BRACE -> type(L_BRACE), pushMode(INLINE_TABLE_MODE); +NESTED_ARRAY_START : L_BRACKET -> type(L_BRACKET), pushMode(ARRAY_MODE); +ARRAY_END : R_BRACKET -> type(R_BRACKET), popMode; + +ARRAY_BOOLEAN: BOOLEAN -> type(BOOLEAN); + +ARRAY_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING); +ARRAY_ML_BASIC_STRING : ML_BASIC_STRING -> type(ML_BASIC_STRING); +ARRAY_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING); +ARRAY_ML_LITERAL_STRING : ML_LITERAL_STRING -> type(ML_LITERAL_STRING); + +ARRAY_FLOAT : FLOAT -> type(FLOAT); +ARRAY_INF : INF -> type(INF); +ARRAY_NAN : NAN -> type(NAN); + +ARRAY_DEC_INT : DEC_INT -> type(DEC_INT); +ARRAY_HEX_INT : HEX_INT -> type(HEX_INT); +ARRAY_OCT_INT : OCT_INT -> type(OCT_INT); +ARRAY_BIN_INT : BIN_INT -> type(BIN_INT); + +ARRAY_OFFSET_DATE_TIME : OFFSET_DATE_TIME -> type(OFFSET_DATE_TIME); +ARRAY_LOCAL_DATE_TIME : LOCAL_DATE_TIME -> type(LOCAL_DATE_TIME); +ARRAY_LOCAL_DATE : LOCAL_DATE -> type(LOCAL_DATE); +ARRAY_LOCAL_TIME : LOCAL_TIME -> type(LOCAL_TIME); \ No newline at end of file diff --git a/chapi-parser-toml/src/main/antlr/TomlParser.g4 b/chapi-parser-toml/src/main/antlr/TomlParser.g4 new file mode 100644 index 00000000..c54b3da0 --- /dev/null +++ b/chapi-parser-toml/src/main/antlr/TomlParser.g4 @@ -0,0 +1,151 @@ +/* +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +*/ + +// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false +// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging + +parser grammar TomlParser; + +options { + tokenVocab = TomlLexer; +} + +document + : expression (NL expression)* EOF + ; + +expression + : key_value comment + | table comment + | comment + ; + +comment + : COMMENT? + ; + +key_value + : key EQUALS value + ; + +key + : simple_key + | dotted_key + ; + +simple_key + : quoted_key + | unquoted_key + ; + +unquoted_key + : UNQUOTED_KEY + ; + +quoted_key + : BASIC_STRING + | LITERAL_STRING + ; + +dotted_key + : simple_key (DOT simple_key)+ + ; + +value + : string + | integer + | floating_point + | bool_ + | date_time + | array_ + | inline_table + ; + +string + : BASIC_STRING + | ML_BASIC_STRING + | LITERAL_STRING + | ML_LITERAL_STRING + ; + +integer + : DEC_INT + | HEX_INT + | OCT_INT + | BIN_INT + ; + +floating_point + : FLOAT + | INF + | NAN + ; + +bool_ + : BOOLEAN + ; + +date_time + : OFFSET_DATE_TIME + | LOCAL_DATE_TIME + | LOCAL_DATE + | LOCAL_TIME + ; + +array_ + : L_BRACKET array_values? comment_or_nl R_BRACKET + ; + +array_values + : (comment_or_nl value nl_or_comment COMMA array_values comment_or_nl) + | comment_or_nl value nl_or_comment COMMA? + ; + +comment_or_nl + : (COMMENT? NL)* + ; + +nl_or_comment + : (NL COMMENT?)* + ; + +table + : standard_table + | array_table + ; + +standard_table + : L_BRACKET key R_BRACKET + ; + +inline_table + : L_BRACE inline_table_keyvals R_BRACE + ; + +inline_table_keyvals + : inline_table_keyvals_non_empty? + ; + +inline_table_keyvals_non_empty + : key EQUALS value (COMMA inline_table_keyvals_non_empty)? + ; + +array_table + : DOUBLE_L_BRACKET key DOUBLE_R_BRACKET + ; \ No newline at end of file diff --git a/chapi-parser-toml/src/main/kotlin/chapi/parser/toml/TomlAnalyser.kt b/chapi-parser-toml/src/main/kotlin/chapi/parser/toml/TomlAnalyser.kt new file mode 100644 index 00000000..9e2862bb --- /dev/null +++ b/chapi-parser-toml/src/main/kotlin/chapi/parser/toml/TomlAnalyser.kt @@ -0,0 +1,22 @@ +package chapi.parser.toml + +import chapi.domain.core.CodeContainer +import chapi.parser.Analyser +import org.antlr.v4.runtime.CharStreams +import org.antlr.v4.runtime.CommonTokenStream +import org.antlr.v4.runtime.tree.ParseTreeWalker +class TomlAnalyser : Analyser { + override fun analysis(code: String, filePath: String): CodeContainer { + val context = this.parse(code).document() + val listener = TomlListener(filePath = filePath) + ParseTreeWalker().walk(listener, context) + return listener.getNodeInfo() + } + + private fun parse(str: String): TomlParser = + CharStreams.fromString(str) + .let(::TomlLexer) + .let(::CommonTokenStream) + .let(::TomlParser) + +} diff --git a/chapi-parser-toml/src/main/kotlin/chapi/parser/toml/TomlListener.kt b/chapi-parser-toml/src/main/kotlin/chapi/parser/toml/TomlListener.kt new file mode 100644 index 00000000..d335ff19 --- /dev/null +++ b/chapi-parser-toml/src/main/kotlin/chapi/parser/toml/TomlListener.kt @@ -0,0 +1,10 @@ +package chapi.parser.toml + +import chapi.domain.core.CodeContainer + +class TomlListener(val filePath: String) : TomlParserBaseListener() { + fun getNodeInfo(): CodeContainer { + return CodeContainer() + } + +} diff --git a/settings.gradle.kts b/settings.gradle.kts index 020889da..ca423766 100644 --- a/settings.gradle.kts +++ b/settings.gradle.kts @@ -3,20 +3,23 @@ rootProject.name = "chapi" include( ":chapi-domain", ":chapi-helper", -// tier 1 languages + // tier 1 languages ":chapi-ast-java", ":chapi-ast-typescript", -// tier 2 languages + // tier 2 languages ":chapi-ast-kotlin", ":chapi-ast-go", ":chapi-ast-python", ":chapi-ast-scala", -// tier 3 languages + // tier 3 languages ":chapi-ast-rust", ":chapi-ast-csharp", ":chapi-ast-c", ":chapi-ast-cpp", + + // others + ":chapi-parser-toml", )