Skip to content

Commit

Permalink
dev: preprocessor
Browse files Browse the repository at this point in the history
  • Loading branch information
adrianschubek committed Jun 2, 2024
1 parent 845bcf2 commit 0c7f290
Show file tree
Hide file tree
Showing 12 changed files with 1,051 additions and 84 deletions.
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "utpp",
"version": "0.6.0",
"version": "1.0.0",
"description": "Universal Text Pre-Processor",
"main": "bin/index.js",
"bin": "bin/npx.js",
Expand Down
16 changes: 16 additions & 0 deletions src/analyzer.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import { ASTNode } from "./ast";
import { Config } from "./common";
import { Visitor } from "./visitor";

export function analyze(node: ASTNode, config: Config): ASTNode {
// Analyzing Visitor (imports, allow/disallow features)
// imports only for \use{\file...} or \use{\url...} or \use{stdlib} -> scan full ast -> add imports to config
// other \url,\file IGNORE. they are on demand in-place reads as-is without eval. do not eval
// scan for node types: use>raw, use>url, use>file

return node;
}

export class Analyzer implements Visitor<void> {

}
26 changes: 26 additions & 0 deletions src/ast.ts
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,32 @@ export class IfStatement extends ASTNode {
this.falseBranch = falseBranch;
}
}
export class MatchStatement extends ASTNode {
accept<T>(visitor: Visitor<T>): T {
return visitor.visitMatchStatement(this);
}
value: ASTNode;
cases: CaseStatement[]; // eval from first to last
// defaultCase: ASTNode; // nicht nötig einfach \case{true}{...}
constructor(value: ASTNode, cases: CaseStatement[], row: number, col: number) {
super(ASTNodeType._MATCH, row, col);
this.value = value;
this.cases = cases;
}
}
export class CaseStatement extends ASTNode {
accept<T>(visitor: Visitor<T>): T {
return visitor.visitCaseStatement(this);
}
value: ASTNode;
body: ASTNode;
constructor(value: ASTNode, body: ASTNode, row: number, col: number) {
super(ASTNodeType._CASE, row, col);
this.value = value;
this.body = body;
}
}

export class LoopStatement extends ASTNode {
accept<T>(visitor: Visitor<T>): T {
return visitor.visitLoopStatement(this);
Expand Down
86 changes: 76 additions & 10 deletions src/common.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import chalk from "chalk";
import { ASTNode, Params } from "./ast";
import { Params } from "./ast";

export enum TokenType {
T_RAW = "T_RAW",
Expand Down Expand Up @@ -29,7 +29,9 @@ export enum ASTNodeType {
_LOOP = "_LOOP",
_URL = "_URL",
_FILE = "_FILE",
_USE = "_USE" /* imports. must be top level. parse file then interpret */,
_USE = "_USE" /* imports. nein: must be top level. parse file then interpret */,
_MATCH = "_MATCH",
_CASE = "_CASE",
}

// built in cannot be overriden
Expand All @@ -49,12 +51,72 @@ export enum BuiltInFunction {
FILE = "file",
USE = "use",
VAR = "var",
TRUE = "$true",
FALSE = "$false",
TRUE = "true",
FALSE = "false",
HALT = "halt" /* stop exec immediate */,
ASSERT = "assert" /* assert function */,
MATCH = "match",
CASE = "case",
}

// export interface Config {
// /* Tokens */
// prefix: string;
// argStart: string;
// argEnd: string;
// paramStart: string;
// paramAssign: string;
// paramSep: string;
// paramEnd: string;
// evalStart: string;
// evalEnd: string;
// /* Visitor */
// readUrls: boolean;
// readFiles: boolean;
// readEnv: boolean;
// eval: boolean;
// }

export type Config = { [key in ConfigKey]: string };

export type ConfigKey =
/* Lexer */
| "prefix"
| "argStart"
| "argEnd"
| "paramStart"
| "paramAssign"
| "paramSep"
| "paramEnd"
| "evalStart"
| "evalEnd"
/* Visitor */
| "readUrls"
| "readFiles"
| "readEnv"
| "eval";
// | "allowBuiltinOverride";
// | string /* custom config key */;

export const DefaultConfig: Config = {
/* Tokens */
prefix: "\\",
argStart: "{",
argEnd: "}",
paramStart: "[",
paramAssign: "=",
paramSep: ",",
paramEnd: "]",
evalStart: "`",
evalEnd: "`",
/* Visitor */
readUrls: "true",
readFiles: "true",
readEnv: "true",
eval: "true",
// allowBuiltinOverride: "false",
};

export interface Token extends Indexable {
type: TokenType;
value: string;
Expand All @@ -67,10 +129,6 @@ export interface Indexable {
col: number;
}

export interface Visitor {
visit(node: ASTNode): void;
}

/**
* Check RAW for truthy values
*
Expand All @@ -80,13 +138,18 @@ export function truthy(value: string): boolean {
return /* val !== "false" && val !== "0" && */ val !== "$false";
}

export function info(msg: string, row?: number, col?: number): void {
// can be silenced with "-q"
console.log("ℹ️ " + chalk.cyanBright(` ${msg} ${row !== undefined && col !== undefined ? `on line ${row}:${col}.` : ""}`));
}

export function warn(msg: string, row?: number, col?: number): void {
// treat warning as errors config?
console.log("⚠️ " + chalk.yellow(` ${msg} ${row !== undefined && col !== undefined ? `on line ${row}:${col}.` : ""}\n`));
console.log("⚠️ " + chalk.yellow(` ${msg} ${row !== undefined && col !== undefined ? `on line ${row}:${col}.` : ""}`));
}

export function err(msg: string, row?: number, col?: number): never {
throw new Error("🔥 " + chalk.red(`${msg} ${row !== undefined && col !== undefined ? `on line ${row}:${col}.` : ""}\n`));
throw new Error("🔥 " + chalk.red(`${msg} ${row !== undefined && col !== undefined ? `on line ${row}:${col}.` : ""}`));
}

export function assertCount<T>(text: string, details: string, thisToken: Indexable, count: number, args?: T[]) {
Expand Down Expand Up @@ -119,6 +182,9 @@ export function assertFnArgRange<T>(thisToken: Indexable, fnName: string, min: n
export function assertParamCount(thisToken: Indexable, fnName: string, count: number, params: Params | null) {
assertCount("parameters", `in function \\${fnName}`, thisToken, count, params?.kv ? Object.values(params.kv) : undefined);
}
export function assertParamRange(thisToken: Indexable, fnName: string, min: number, max: number, params: Params | null) {
assertRange("parameters", `in function \\${fnName}`, thisToken, min, max, params?.kv ? Object.values(params.kv) : undefined);
}

export function assertType<T>(details: string, thisToken: Indexable, expected: T, actual: T) {
if (actual !== expected) {
Expand Down
40 changes: 2 additions & 38 deletions src/lexer.ts
Original file line number Diff line number Diff line change
@@ -1,45 +1,9 @@
import { log } from "console";
import { TokenType, err } from "./common";
import type { Token } from "./common";
import type { Token, Config } from "./common";

const ROW_OFFSET = 0;
const INDEX_OFFSET = 0;
const PREFIX = "\\";
const ARG_START = "{";
const ARG_END = "}";
const PARAM_START = "[";
const PARAM_ASSIGN = "=";
const PARAM_SEP = ",";
const PARAM_END = "]";
const EVAL_START = "`";
const EVAL_END = "`";

export interface TokenizerConfig {
prefix: string;
argStart: string;
argEnd: string;
paramStart: string;
paramAssign: string;
paramSep: string;
paramEnd: string;
evalStart: string;
evalEnd: string;
}

export function tokenize(
input: string,
config: TokenizerConfig = {
prefix: PREFIX,
argStart: ARG_START,
argEnd: ARG_END,
paramStart: PARAM_START,
paramAssign: PARAM_ASSIGN,
paramSep: PARAM_SEP,
paramEnd: PARAM_END,
evalStart: EVAL_START,
evalEnd: EVAL_END,
}
): Token[] {
export function tokenize(input: string, config: Config): Token[] {
let row = 0 - ROW_OFFSET; // offset for inserted statements
let col = 0;
let index = 0;
Expand Down
65 changes: 43 additions & 22 deletions src/main.ts
Original file line number Diff line number Diff line change
@@ -1,45 +1,66 @@
import fs from "fs";
import { tokenize } from "./lexer";
import { log } from "console";
import { TokenType } from "./common";
import assert from "node:assert/strict";
import { parse } from "./parser";
import { Interpreter } from "./visitor";
import { Program } from "./ast";
import { interpret } from "./visitor";
import { preprocess } from "./preprocessor";
import chalk from "chalk";
import { analyze } from "./analyzer";

let original = "";
// original = fs.readFileSync(__dirname + "/../tests/loop.txt", "utf8");
original = fs.readFileSync(__dirname + "/../tests/t1.txt", "utf8");
// original = fs.readFileSync(__dirname + "/../tests/t1.txt", "utf8");
// original = fs.readFileSync(__dirname + "/../tests/t3.php", "utf8");
original = fs.readFileSync(__dirname + "/../tests/t4.txt", "utf8");

// 1. add default stuff
// original = "\\version{1}\n\\prefix{\\}\n\\config{files}{true}\n\\config{net}{true}\n\\config{env}{true}\n\\config{js}{true}\n" + original;
// \rawfile{path} imports the file, just copo/pastes the file content, no pipeline
// allow file importsw: \file{name} just copo/pastes the file content
// \use{realtivePathOrURL} imports the file, usign this pipeline and executes it
// pro: own \\\utppp[] block
// no easy. In Interperter for \file{} just do the pipeline (except interpret) preprocess>tokenize>parse

// original += "\n\\halt";
// read cli: -q quiet? (print debug)

log("===== content: =====");
log(original);
const pj = require("../package.json");
console.log(chalk.yellowBright(chalk.bold(`🚀 utpp ${pj.version} `)));

// log("===== content: =====");
// log(original);

// 1. preprocessor (Meta Config)
log("===== preprocess: =====");
const [input, config] = preprocess(original);

console.log("with config: ", config);

// 2. lexer (Tokens)
log("===== tokenize: =====");
const tokenized = tokenize(original);
const tokenized = tokenize(input, config);
log(tokenized);

const reconstructedFromLexer = tokenized.map((t) => t.value).join("");
assert.strictEqual(original, reconstructedFromLexer);
log("lexer verified ✅");
// const reconstructedFromLexer = tokenized.map((t) => t.value).join("");
// assert.strictEqual(original, reconstructedFromLexer);
// log("lexer verified ✅"); // doesnt work when metaconfig used

// TODO: allow export/import (serialize) AST for faster executing

// 3. parser (AST)
log("===== parse: =====");
const ast = parse(tokenized) as Program;
const treeify = require("./utils/treeify");
treeify.asLines(ast, true, false, log);

// 4. interpreter (Execute)
const ast = parse(tokenized);
const treeify = require("./utils/treeify"); // debug
treeify.asLines(ast, true, false, log); // debug

// 4. AST Analyzer (imports, verify enabled/disabled features)
log("===== analyze: =====");
const program = analyze(ast, config);

// 5. interpreter (Execute on complete resolved AST)
log("===== interpret: =====");
const visitor = new Interpreter();
const generated = ast.accept(visitor);
const generated = interpret(program);
log(generated);
log("----------------------")
log("----------------------");

// ?. Validator/Rewriter (check enabled/disabled features)
// ?. Optimizer (dead code elimination, constant folding, etc.)

// maybe IR-Representation SSA form optimize...
14 changes: 7 additions & 7 deletions src/parser.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import { log } from "console";
import {
ASTNode,
Raw,
Expand All @@ -13,7 +14,7 @@ import {
FileStatement,
UseStatement,
} from "./ast";
import { BuiltInFunction, Token, TokenType, assertCount, assertRange, assertType, err } from "./common";
import { BuiltInFunction, Token, TokenType, assertCount, assertRange, err } from "./common";

// ne Build CST from tokens like \if{}{} --> Command(name="if",args=2...)
// ne Build AST from CST like Command(name="if",args=2...) --> IfStatement(condition=...,trueBranch=...,falseBranch=...)
Expand Down Expand Up @@ -116,25 +117,24 @@ export function parse(input: Token[], isRecursiveCall: boolean = false): ASTNode
}

function parseArguments(alwaysEvalNArgs?: number): ASTNode[] {
const as = consume(TokenType.T_ARG_START);
const args: ASTNode[] = [];

// FIXME may break if JS contains { }.
while (alwaysEvalNArgs && alwaysEvalNArgs > 0) {
while (alwaysEvalNArgs && alwaysEvalNArgs > 0 && hasMore() && option(TokenType.T_ARG_START)) {
const as = consume(TokenType.T_ARG_START);
alwaysEvalNArgs--;
let evalCode = "";
log("uhhhhhhhhhhhhhhhhhhhhhhhhhhh", alwaysEvalNArgs);
while (!option(TokenType.T_ARG_END)) evalCode += consumeAny().value;
consume(TokenType.T_ARG_END);
consume(TokenType.T_ARG_START);
args.push(new EvalStatement(evalCode, ...rowcol(as)));
}

while (hasMore()) {
while (hasMore() && option(TokenType.T_ARG_START)) {
consume(TokenType.T_ARG_START);
const arg = parse(input, true);
args.push(arg);
consume(TokenType.T_ARG_END);
if (!option(TokenType.T_ARG_START)) break;
consume(TokenType.T_ARG_START);
}

return args;
Expand Down
Loading

0 comments on commit 0c7f290

Please sign in to comment.