Skip to content

Commit

Permalink
chore: init for toml languages
Browse files Browse the repository at this point in the history
  • Loading branch information
phodal committed Jan 4, 2024
1 parent 6ad9674 commit ea0ec1e
Show file tree
Hide file tree
Showing 6 changed files with 396 additions and 3 deletions.
58 changes: 58 additions & 0 deletions chapi-parser-toml/build.gradle.kts
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
plugins {
java
id("antlr")
kotlin("jvm")
kotlin("plugin.serialization") version "1.6.10"

`jacoco-conventions`
}

repositories {
mavenCentral()
mavenLocal()
}

dependencies {
antlr("org.antlr:antlr4:4.13.1")
// project deps
implementation(project(":chapi-domain"))
implementation("org.jetbrains.kotlinx:kotlinx-serialization-json:1.3.2")

implementation(kotlin("stdlib-jdk8"))
implementation(kotlin("reflect"))
// Kotlin reflection.
testImplementation(kotlin("test"))

// JUnit 5
testImplementation("org.junit.jupiter:junit-jupiter-api:5.6.0")
testRuntimeOnly("org.junit.jupiter:junit-jupiter-engine:5.6.0")
testRuntimeOnly("org.junit.platform:junit-platform-console:1.6.0")

implementation("org.antlr:antlr4:4.13.1")
implementation("org.antlr:antlr4-runtime:4.13.1")
}

sourceSets.main {
java.srcDirs("${project.buildDir}/generated-src")
}

tasks.generateGrammarSource {
maxHeapSize = "64m"
arguments = arguments + listOf("-package", "chapi.parser.toml") + listOf("-visitor", "-long-messages")
outputDirectory = file("${project.buildDir}/generated-src/chapi/parser/toml")
}

tasks.withType<AntlrTask> {

}

tasks.named("compileKotlin") {
dependsOn(tasks.withType<AntlrTask>())
}

tasks.withType<Test> {
useJUnitPlatform()
testLogging {
events("passed", "skipped", "failed")
}
}
149 changes: 149 additions & 0 deletions chapi-parser-toml/src/main/antlr/TomlLexer.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/

// $antlr-format alignTrailingComments true, columnLimit 150, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine true, allowShortBlocksOnASingleLine true, minEmptyLines 0, alignSemicolons ownLine
// $antlr-format alignColons trailing, singleLineOverrulesHangingColon true, alignLexerCommands true, alignLabels true, alignTrailers true

lexer grammar TomlLexer;

WS : [ \t]+ -> skip;
NL : ('\r'? '\n')+;
COMMENT : '#' (~[\n])*;
L_BRACKET : '[';
DOUBLE_L_BRACKET : '[[';
R_BRACKET : ']';
DOUBLE_R_BRACKET : ']]';
EQUALS : '=' -> pushMode(SIMPLE_VALUE_MODE);
DOT : '.';
COMMA : ',';

fragment DIGIT : [0-9];
fragment ALPHA : [A-Za-z];

// strings
fragment ESC : '\\' (["\\/bfnrt] | UNICODE | EX_UNICODE);
fragment UNICODE : 'u' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
fragment EX_UNICODE:
'U' HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT
;
BASIC_STRING : '"' (ESC | ~["\\\n])*? '"';
LITERAL_STRING : '\'' (~['\n])*? '\'';
// keys
UNQUOTED_KEY: (ALPHA | DIGIT | '-' | '_')+;
mode SIMPLE_VALUE_MODE;
VALUE_WS: WS -> skip;
L_BRACE : '{' -> mode(INLINE_TABLE_MODE);
ARRAY_START : L_BRACKET -> type(L_BRACKET), mode(ARRAY_MODE);
// booleans
BOOLEAN: ('true' | 'false') -> popMode;
// strings
fragment ML_ESC : '\\' '\r'? '\n' | ESC;
VALUE_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING), popMode;
ML_BASIC_STRING : '"""' (ML_ESC | ~["\\])*? '"""' -> popMode;
VALUE_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING), popMode;
ML_LITERAL_STRING : '\'\'\'' (.)*? '\'\'\'' -> popMode;
// floating point numbers
fragment EXP : ('e' | 'E') [+-]? ZERO_PREFIXABLE_INT;
fragment ZERO_PREFIXABLE_INT : DIGIT (DIGIT | '_' DIGIT)*;
fragment FRAC : '.' ZERO_PREFIXABLE_INT;
FLOAT : DEC_INT ( EXP | FRAC EXP?) -> popMode;
INF : [+-]? 'inf' -> popMode;
NAN : [+-]? 'nan' -> popMode;
// integers
fragment HEX_DIGIT : [A-Fa-f] | DIGIT;
fragment DIGIT_1_9 : [1-9];
fragment DIGIT_0_7 : [0-7];
fragment DIGIT_0_1 : [0-1];
DEC_INT : [+-]? (DIGIT | (DIGIT_1_9 (DIGIT | '_' DIGIT)+)) -> popMode;
HEX_INT : '0x' HEX_DIGIT (HEX_DIGIT | '_' HEX_DIGIT)* -> popMode;
OCT_INT : '0o' DIGIT_0_7 (DIGIT_0_7 | '_' DIGIT_0_7)* -> popMode;
BIN_INT : '0b' DIGIT_0_1 (DIGIT_0_1 | '_' DIGIT_0_1)* -> popMode;
// dates
fragment YEAR : DIGIT DIGIT DIGIT DIGIT;
fragment MONTH : DIGIT DIGIT;
fragment DAY : DIGIT DIGIT;
fragment DELIM : 'T' | 't' | ' ';
fragment HOUR : DIGIT DIGIT;
fragment MINUTE : DIGIT DIGIT;
fragment SECOND : DIGIT DIGIT;
fragment SECFRAC : '.' DIGIT+;
fragment NUMOFFSET : ('+' | '-') HOUR ':' MINUTE;
fragment OFFSET : 'Z' | NUMOFFSET;
fragment PARTIAL_TIME : HOUR ':' MINUTE ':' SECOND SECFRAC?;
fragment FULL_DATE : YEAR '-' MONTH '-' DAY;
fragment FULL_TIME : PARTIAL_TIME OFFSET;
OFFSET_DATE_TIME : FULL_DATE DELIM FULL_TIME -> popMode;
LOCAL_DATE_TIME : FULL_DATE DELIM PARTIAL_TIME -> popMode;
LOCAL_DATE : FULL_DATE -> popMode;
LOCAL_TIME : PARTIAL_TIME -> popMode;
mode INLINE_TABLE_MODE;
INLINE_TABLE_WS : WS -> skip;
INLINE_TABLE_KEY_DOT : DOT -> type(DOT);
INLINE_TABLE_COMMA : COMMA -> type(COMMA);
R_BRACE : '}' -> popMode;
INLINE_TABLE_KEY_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING);
INLINE_TABLE_KEY_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING);
INLINE_TABLE_KEY_UNQUOTED : UNQUOTED_KEY -> type(UNQUOTED_KEY);
INLINE_TABLE_EQUALS: EQUALS -> type(EQUALS), pushMode(SIMPLE_VALUE_MODE);
mode ARRAY_MODE;
ARRAY_WS : WS -> skip;
ARRAY_NL : NL -> type(NL);
ARRAY_COMMENT : COMMENT -> type(COMMENT);
ARRAY_COMMA : COMMA -> type(COMMA);
ARRAY_INLINE_TABLE_START : L_BRACE -> type(L_BRACE), pushMode(INLINE_TABLE_MODE);
NESTED_ARRAY_START : L_BRACKET -> type(L_BRACKET), pushMode(ARRAY_MODE);
ARRAY_END : R_BRACKET -> type(R_BRACKET), popMode;
ARRAY_BOOLEAN: BOOLEAN -> type(BOOLEAN);
ARRAY_BASIC_STRING : BASIC_STRING -> type(BASIC_STRING);
ARRAY_ML_BASIC_STRING : ML_BASIC_STRING -> type(ML_BASIC_STRING);
ARRAY_LITERAL_STRING : LITERAL_STRING -> type(LITERAL_STRING);
ARRAY_ML_LITERAL_STRING : ML_LITERAL_STRING -> type(ML_LITERAL_STRING);
ARRAY_FLOAT : FLOAT -> type(FLOAT);
ARRAY_INF : INF -> type(INF);
ARRAY_NAN : NAN -> type(NAN);
ARRAY_DEC_INT : DEC_INT -> type(DEC_INT);
ARRAY_HEX_INT : HEX_INT -> type(HEX_INT);
ARRAY_OCT_INT : OCT_INT -> type(OCT_INT);
ARRAY_BIN_INT : BIN_INT -> type(BIN_INT);
ARRAY_OFFSET_DATE_TIME : OFFSET_DATE_TIME -> type(OFFSET_DATE_TIME);
ARRAY_LOCAL_DATE_TIME : LOCAL_DATE_TIME -> type(LOCAL_DATE_TIME);
ARRAY_LOCAL_DATE : LOCAL_DATE -> type(LOCAL_DATE);
ARRAY_LOCAL_TIME : LOCAL_TIME -> type(LOCAL_TIME);
151 changes: 151 additions & 0 deletions chapi-parser-toml/src/main/antlr/TomlParser.g4
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
/*
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
*/

// $antlr-format alignTrailingComments true, columnLimit 150, minEmptyLines 1, maxEmptyLinesToKeep 1, reflowComments false, useTab false
// $antlr-format allowShortRulesOnASingleLine false, allowShortBlocksOnASingleLine true, alignSemicolons hanging, alignColons hanging

parser grammar TomlParser;

options {
tokenVocab = TomlLexer;
}

document
: expression (NL expression)* EOF
;

expression
: key_value comment
| table comment
| comment
;

comment
: COMMENT?
;

key_value
: key EQUALS value
;

key
: simple_key
| dotted_key
;

simple_key
: quoted_key
| unquoted_key
;

unquoted_key
: UNQUOTED_KEY
;

quoted_key
: BASIC_STRING
| LITERAL_STRING
;

dotted_key
: simple_key (DOT simple_key)+
;

value
: string
| integer
| floating_point
| bool_
| date_time
| array_
| inline_table
;

string
: BASIC_STRING
| ML_BASIC_STRING
| LITERAL_STRING
| ML_LITERAL_STRING
;

integer
: DEC_INT
| HEX_INT
| OCT_INT
| BIN_INT
;

floating_point
: FLOAT
| INF
| NAN
;

bool_
: BOOLEAN
;

date_time
: OFFSET_DATE_TIME
| LOCAL_DATE_TIME
| LOCAL_DATE
| LOCAL_TIME
;

array_
: L_BRACKET array_values? comment_or_nl R_BRACKET
;

array_values
: (comment_or_nl value nl_or_comment COMMA array_values comment_or_nl)
| comment_or_nl value nl_or_comment COMMA?
;

comment_or_nl
: (COMMENT? NL)*
;

nl_or_comment
: (NL COMMENT?)*
;

table
: standard_table
| array_table
;

standard_table
: L_BRACKET key R_BRACKET
;

inline_table
: L_BRACE inline_table_keyvals R_BRACE
;

inline_table_keyvals
: inline_table_keyvals_non_empty?
;

inline_table_keyvals_non_empty
: key EQUALS value (COMMA inline_table_keyvals_non_empty)?
;

array_table
: DOUBLE_L_BRACKET key DOUBLE_R_BRACKET
;
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
package chapi.parser.toml

import chapi.domain.core.CodeContainer
import chapi.parser.Analyser
import org.antlr.v4.runtime.CharStreams
import org.antlr.v4.runtime.CommonTokenStream
import org.antlr.v4.runtime.tree.ParseTreeWalker
class TomlAnalyser : Analyser {
override fun analysis(code: String, filePath: String): CodeContainer {
val context = this.parse(code).document()
val listener = TomlListener(filePath = filePath)
ParseTreeWalker().walk(listener, context)
return listener.getNodeInfo()
}

private fun parse(str: String): TomlParser =
CharStreams.fromString(str)
.let(::TomlLexer)
.let(::CommonTokenStream)
.let(::TomlParser)

}
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
package chapi.parser.toml

import chapi.domain.core.CodeContainer

class TomlListener(val filePath: String) : TomlParserBaseListener() {
fun getNodeInfo(): CodeContainer {
return CodeContainer()
}

}
Loading

0 comments on commit ea0ec1e

Please sign in to comment.