chore: init for toml languages

phodal · Jan 4, 2024 · ea0ec1e · ea0ec1e
1 parent 6ad9674
commit ea0ec1e
Show file tree

Hide file tree

Showing 6 changed files with 396 additions and 3 deletions.
diff --git a/chapi-parser-toml/build.gradle.kts b/chapi-parser-toml/build.gradle.kts
@@ -0,0 +1,58 @@
+plugins {
+    java
+    id("antlr")
+    kotlin("jvm")
+    kotlin("plugin.serialization") version "1.6.10"
+
+    `jacoco-conventions`
+}
+
+repositories {
+    mavenCentral()
+    mavenLocal()
+}
+
+dependencies {
+    antlr("org.antlr:antlr4:4.13.1")
+    // project deps
+    implementation(project(":chapi-domain"))
+    implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.3.2")
+
+    implementation(kotlin("stdlib-jdk8"))
+    implementation(kotlin("reflect"))
+    // Kotlin reflection.
+    testImplementation(kotlin("test"))
+
+    // JUnit 5
+    testImplementation("org.junit.jupiter:junit-jupiter-api:5.6.0")
+    testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:5.6.0")
+    testRuntimeOnly("org.junit.platform:junit-platform-console:1.6.0")
+
+    implementation("org.antlr:antlr4:4.13.1")
+    implementation("org.antlr:antlr4-runtime:4.13.1")
+}
+
+sourceSets.main {
+    java.srcDirs("${project.buildDir}/generated-src")
+}
+
+tasks.generateGrammarSource {
+    maxHeapSize = "64m"
+    arguments = arguments + listOf("-package", "chapi.parser.toml") + listOf("-visitor", "-long-messages")
+    outputDirectory  = file("${project.buildDir}/generated-src/chapi/parser/toml")
+}
+
+tasks.withType<AntlrTask> {
+
+}
+
+tasks.named("compileKotlin") {
+    dependsOn(tasks.withType<AntlrTask>())
+}
+
+tasks.withType<Test> {
+    useJUnitPlatform()
+    testLogging {
+        events("passed", "skipped", "failed")
+    }
+}
diff --git a/chapi-parser-toml/src/main/antlr/TomlLexer.g4 b/chapi-parser-toml/src/main/antlr/TomlLexer.g4
@@ -0,0 +1,149 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
+// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
+// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true
+
+lexer grammar TomlLexer;
+
+WS               : [ \t]+ -> skip;
+NL               : ('\r'? '\n')+;
+COMMENT          : '#' (~[\n])*;
+L_BRACKET        : '[';
+DOUBLE_L_BRACKET : '[[';
+R_BRACKET        : ']';
+DOUBLE_R_BRACKET : ']]';
+EQUALS           : '=' -> pushMode(SIMPLE_VALUE_MODE);
+DOT              : '.';
+COMMA            : ',';
+
+fragment DIGIT : [0-9];
+fragment ALPHA : [A-Za-z];
+
+// strings
+fragment ESC     : '\\' (["\\/bfnrt] | UNICODE | EX_UNICODE);
+fragment UNICODE : 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
+fragment EX_UNICODE:
+    'U' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
+;
+BASIC_STRING   : '"' (ESC | ~["\\\n])*? '"';
+LITERAL_STRING : '\'' (~['\n])*? '\'';
+
+// keys
+UNQUOTED_KEY: (ALPHA | DIGIT | '-' | '_')+;
+
+mode SIMPLE_VALUE_MODE;
+
+VALUE_WS: WS -> skip;
+
+L_BRACE     : '{'       -> mode(INLINE_TABLE_MODE);
+ARRAY_START : L_BRACKET -> type(L_BRACKET), mode(ARRAY_MODE);
+
+// booleans
+BOOLEAN: ('true' | 'false') -> popMode;
+
+// strings
+fragment ML_ESC      : '\\' '\r'? '\n' | ESC;
+VALUE_BASIC_STRING   : BASIC_STRING                    -> type(BASIC_STRING), popMode;
+ML_BASIC_STRING      : '"""' (ML_ESC | ~["\\])*? '"""' -> popMode;
+VALUE_LITERAL_STRING : LITERAL_STRING                  -> type(LITERAL_STRING), popMode;
+ML_LITERAL_STRING    : '\'\'\'' (.)*? '\'\'\''         -> popMode;
+
+// floating point numbers
+fragment EXP                 : ('e' | 'E') [+-]? ZERO_PREFIXABLE_INT;
+fragment ZERO_PREFIXABLE_INT : DIGIT (DIGIT | '_' DIGIT)*;
+fragment FRAC                : '.' ZERO_PREFIXABLE_INT;
+FLOAT                        : DEC_INT ( EXP | FRAC EXP?) -> popMode;
+INF                          : [+-]? 'inf'                -> popMode;
+NAN                          : [+-]? 'nan'                -> popMode;
+
+// integers
+fragment HEX_DIGIT : [A-Fa-f] | DIGIT;
+fragment DIGIT_1_9 : [1-9];
+fragment DIGIT_0_7 : [0-7];
+fragment DIGIT_0_1 : [0-1];
+DEC_INT            : [+-]? (DIGIT | (DIGIT_1_9 (DIGIT | '_' DIGIT)+)) -> popMode;
+HEX_INT            : '0x' HEX_DIGIT (HEX_DIGIT | '_' HEX_DIGIT)*      -> popMode;
+OCT_INT            : '0o' DIGIT_0_7 (DIGIT_0_7 | '_' DIGIT_0_7)*      -> popMode;
+BIN_INT            : '0b' DIGIT_0_1 (DIGIT_0_1 | '_' DIGIT_0_1)*      -> popMode;
+
+// dates
+fragment YEAR         : DIGIT DIGIT DIGIT DIGIT;
+fragment MONTH        : DIGIT DIGIT;
+fragment DAY          : DIGIT DIGIT;
+fragment DELIM        : 'T' | 't' | ' ';
+fragment HOUR         : DIGIT DIGIT;
+fragment MINUTE       : DIGIT DIGIT;
+fragment SECOND       : DIGIT DIGIT;
+fragment SECFRAC      : '.' DIGIT+;
+fragment NUMOFFSET    : ('+' | '-') HOUR ':' MINUTE;
+fragment OFFSET       : 'Z' | NUMOFFSET;
+fragment PARTIAL_TIME : HOUR ':' MINUTE ':' SECOND SECFRAC?;
+fragment FULL_DATE    : YEAR '-' MONTH '-' DAY;
+fragment FULL_TIME    : PARTIAL_TIME OFFSET;
+OFFSET_DATE_TIME      : FULL_DATE DELIM FULL_TIME    -> popMode;
+LOCAL_DATE_TIME       : FULL_DATE DELIM PARTIAL_TIME -> popMode;
+LOCAL_DATE            : FULL_DATE                    -> popMode;
+LOCAL_TIME            : PARTIAL_TIME                 -> popMode;
+
+mode INLINE_TABLE_MODE;
+
+INLINE_TABLE_WS      : WS    -> skip;
+INLINE_TABLE_KEY_DOT : DOT   -> type(DOT);
+INLINE_TABLE_COMMA   : COMMA -> type(COMMA);
+R_BRACE              : '}'   -> popMode;
+
+INLINE_TABLE_KEY_BASIC_STRING   : BASIC_STRING   -> type(BASIC_STRING);
+INLINE_TABLE_KEY_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING);
+INLINE_TABLE_KEY_UNQUOTED       : UNQUOTED_KEY   -> type(UNQUOTED_KEY);
+
+INLINE_TABLE_EQUALS: EQUALS -> type(EQUALS), pushMode(SIMPLE_VALUE_MODE);
+
+mode ARRAY_MODE;
+
+ARRAY_WS      : WS      -> skip;
+ARRAY_NL      : NL      -> type(NL);
+ARRAY_COMMENT : COMMENT -> type(COMMENT);
+ARRAY_COMMA   : COMMA   -> type(COMMA);
+
+ARRAY_INLINE_TABLE_START : L_BRACE   -> type(L_BRACE), pushMode(INLINE_TABLE_MODE);
+NESTED_ARRAY_START       : L_BRACKET -> type(L_BRACKET), pushMode(ARRAY_MODE);
+ARRAY_END                : R_BRACKET -> type(R_BRACKET), popMode;
+
+ARRAY_BOOLEAN: BOOLEAN -> type(BOOLEAN);
+
+ARRAY_BASIC_STRING      : BASIC_STRING      -> type(BASIC_STRING);
+ARRAY_ML_BASIC_STRING   : ML_BASIC_STRING   -> type(ML_BASIC_STRING);
+ARRAY_LITERAL_STRING    : LITERAL_STRING    -> type(LITERAL_STRING);
+ARRAY_ML_LITERAL_STRING : ML_LITERAL_STRING -> type(ML_LITERAL_STRING);
+
+ARRAY_FLOAT : FLOAT -> type(FLOAT);
+ARRAY_INF   : INF   -> type(INF);
+ARRAY_NAN   : NAN   -> type(NAN);
+
+ARRAY_DEC_INT : DEC_INT -> type(DEC_INT);
+ARRAY_HEX_INT : HEX_INT -> type(HEX_INT);
+ARRAY_OCT_INT : OCT_INT -> type(OCT_INT);
+ARRAY_BIN_INT : BIN_INT -> type(BIN_INT);
+
+ARRAY_OFFSET_DATE_TIME : OFFSET_DATE_TIME -> type(OFFSET_DATE_TIME);
+ARRAY_LOCAL_DATE_TIME  : LOCAL_DATE_TIME  -> type(LOCAL_DATE_TIME);
+ARRAY_LOCAL_DATE       : LOCAL_DATE       -> type(LOCAL_DATE);
+ARRAY_LOCAL_TIME       : LOCAL_TIME       -> type(LOCAL_TIME);
diff --git a/chapi-parser-toml/src/main/antlr/TomlParser.g4 b/chapi-parser-toml/src/main/antlr/TomlParser.g4
@@ -0,0 +1,151 @@
+/*
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+*/
+
+// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false
+// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging
+
+parser grammar TomlParser;
+
+options {
+    tokenVocab = TomlLexer;
+}
+
+document
+    : expression (NL expression)* EOF
+    ;
+
+expression
+    : key_value comment
+    | table comment
+    | comment
+    ;
+
+comment
+    : COMMENT?
+    ;
+
+key_value
+    : key EQUALS value
+    ;
+
+key
+    : simple_key
+    | dotted_key
+    ;
+
+simple_key
+    : quoted_key
+    | unquoted_key
+    ;
+
+unquoted_key
+    : UNQUOTED_KEY
+    ;
+
+quoted_key
+    : BASIC_STRING
+    | LITERAL_STRING
+    ;
+
+dotted_key
+    : simple_key (DOT simple_key)+
+    ;
+
+value
+    : string
+    | integer
+    | floating_point
+    | bool_
+    | date_time
+    | array_
+    | inline_table
+    ;
+
+string
+    : BASIC_STRING
+    | ML_BASIC_STRING
+    | LITERAL_STRING
+    | ML_LITERAL_STRING
+    ;
+
+integer
+    : DEC_INT
+    | HEX_INT
+    | OCT_INT
+    | BIN_INT
+    ;
+
+floating_point
+    : FLOAT
+    | INF
+    | NAN
+    ;
+
+bool_
+    : BOOLEAN
+    ;
+
+date_time
+    : OFFSET_DATE_TIME
+    | LOCAL_DATE_TIME
+    | LOCAL_DATE
+    | LOCAL_TIME
+    ;
+
+array_
+    : L_BRACKET array_values? comment_or_nl R_BRACKET
+    ;
+
+array_values
+    : (comment_or_nl value nl_or_comment COMMA array_values comment_or_nl)
+    | comment_or_nl value nl_or_comment COMMA?
+    ;
+
+comment_or_nl
+    : (COMMENT? NL)*
+    ;
+
+nl_or_comment
+    : (NL COMMENT?)*
+    ;
+
+table
+    : standard_table
+    | array_table
+    ;
+
+standard_table
+    : L_BRACKET key R_BRACKET
+    ;
+
+inline_table
+    : L_BRACE inline_table_keyvals R_BRACE
+    ;
+
+inline_table_keyvals
+    : inline_table_keyvals_non_empty?
+    ;
+
+inline_table_keyvals_non_empty
+    : key EQUALS value (COMMA inline_table_keyvals_non_empty)?
+    ;
+
+array_table
+    : DOUBLE_L_BRACKET key DOUBLE_R_BRACKET
+    ;
diff --git a/chapi-parser-toml/src/main/kotlin/chapi/parser/toml/TomlAnalyser.kt b/chapi-parser-toml/src/main/kotlin/chapi/parser/toml/TomlAnalyser.kt
@@ -0,0 +1,22 @@
+package chapi.parser.toml
+
+import chapi.domain.core.CodeContainer
+import chapi.parser.Analyser
+import org.antlr.v4.runtime.CharStreams
+import org.antlr.v4.runtime.CommonTokenStream
+import org.antlr.v4.runtime.tree.ParseTreeWalker
+class TomlAnalyser : Analyser {
+    override fun analysis(code: String, filePath: String): CodeContainer {
+        val context = this.parse(code).document()
+        val listener = TomlListener(filePath = filePath)
+        ParseTreeWalker().walk(listener, context)
+        return listener.getNodeInfo()
+    }
+
+    private fun parse(str: String): TomlParser =
+        CharStreams.fromString(str)
+            .let(::TomlLexer)
+            .let(::CommonTokenStream)
+            .let(::TomlParser)
+
+}
diff --git a/chapi-parser-toml/src/main/kotlin/chapi/parser/toml/TomlListener.kt b/chapi-parser-toml/src/main/kotlin/chapi/parser/toml/TomlListener.kt
@@ -0,0 +1,10 @@
+package chapi.parser.toml
+
+import chapi.domain.core.CodeContainer
+
+class TomlListener(val filePath: String) : TomlParserBaseListener() {
+    fun getNodeInfo(): CodeContainer {
+        return CodeContainer()
+    }
+
+}