Skip to content

Commit

Permalink
feat: replace bfj with custom json utils
Browse files Browse the repository at this point in the history
  • Loading branch information
shioju committed Dec 31, 2024
1 parent e4bb339 commit dd6c3b9
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 169 deletions.
163 changes: 5 additions & 158 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@
"axe-core": "^4.10.2",
"axios": "^1.7.4",
"base64-stream": "^1.0.0",
"bfj": "^9.1.1",
"cheerio": "^1.0.0-rc.12",
"crawlee": "^3.11.1",
"ejs": "^3.1.9",
Expand Down
75 changes: 75 additions & 0 deletions src/json-utils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import { Readable } from 'stream';

/**
* Recursively serialize an object (including arrays, Dates, etc.) into JSON chunks.
* This function is a generator that yields small JSON string segments.
*/
function* serializeObject<T>(
obj: T,
depth: number = 0,
indent: string = ' ',
): Generator<string, void, undefined> {
const currentIndent = indent.repeat(depth);
const nextIndent = indent.repeat(depth + 1);

// 1. Handle Date
if (obj instanceof Date) {
yield JSON.stringify(obj.toISOString());
return;
}

// 2. Handle Array
if (Array.isArray(obj)) {
yield '[\n';
for (let i = 0; i < obj.length; i++) {
if (i > 0) {
// separate elements by comma + newline
yield ',\n';
}
yield nextIndent;
// recursively yield elements
yield* serializeObject(obj[i], depth + 1, indent);
}
yield `\n${currentIndent}]`;
return;
}

// 3. Handle Object
if (obj !== null && typeof obj === 'object') {
yield '{\n';
const entries = Object.entries(obj);
for (let i = 0; i < entries.length; i++) {
const [key, value] = entries[i];
if (i > 0) {
// separate properties by comma + newline
yield ',\n';
}
yield `${nextIndent}${JSON.stringify(key)}: `;
yield* serializeObject(value, depth + 1, indent);
}
yield `\n${currentIndent}}`;
return;
}

// 4. Handle primitives (string, number, boolean, null, etc.)
// Use JSON.stringify() to ensure valid JSON.
yield JSON.stringify(obj);
}

/**
* Create a Readable stream that pushes JSON data by consuming our generator.
*/
export function objectToReadableStream<T>(obj: T, indent: string = ' '): Readable {
const generator = serializeObject(obj, 0, indent);

return new Readable({
read() {
const { value, done } = generator.next();
if (done) {
this.push(null); // Signal end of the stream
} else {
this.push(value);
}
},
});
}
20 changes: 10 additions & 10 deletions src/mergeAxeResults.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,10 @@ import {
import { consoleLogger, silentLogger } from './logs.js';
import itemTypeDescription from './constants/itemTypeDescription.js';
import { oobeeAiHtmlETL, oobeeAiRules } from './constants/oobeeAi.js';
import bfj from 'bfj';
import zlib from 'zlib';
import { Base64Encode } from 'base64-stream';
import { pipeline } from 'stream/promises';
import { objectToReadableStream } from './json-utils.js';

export type ItemsInfo = {
html: string;
Expand Down Expand Up @@ -370,11 +370,11 @@ const writeHTML = async (
outputStream.end();
});

console.log('Content appended successfully.');
consoleLogger.info('Content appended successfully.');
await cleanupFiles();

outputStream.on('error', err => {
console.error('Error writing to output file:', err);
consoleLogger.error('Error writing to output file:', err);
});
};

Expand Down Expand Up @@ -451,17 +451,17 @@ function writeLargeJsonToFile(obj, filePath) {
}

async function writeObjectToGzipBase64File(obj: object, outputFilePath: string) {
console.log('Producing large gzipped base64 from object...');
const jsonReadable = await bfj.streamify(obj);
consoleLogger.info('Producing large gzipped base64 from object...');
const jsonReadable = objectToReadableStream(obj);
const gzipStream = zlib.createGzip({
level: 6,
});
const base64EncodeStream = new Base64Encode();
const fileWriteStream = fs.createWriteStream(outputFilePath, { encoding: 'utf8' });
await pipeline(jsonReadable, gzipStream, base64EncodeStream, fileWriteStream);
const scanDataFileStats = fs.statSync(outputFilePath);
const fileStats = fs.statSync(outputFilePath);
return {
fileSize: scanDataFileStats.size,
fileSize: fileStats.size,
};
}

Expand Down Expand Up @@ -602,15 +602,15 @@ const writeCompressedBase64 = async (
// scanData
const scanDataFilePath = path.join(storagePath, 'scanData.json.gz.b64');
const { fileSize: scanDataFileSize } = await writeObjectToGzipBase64File(rest, scanDataFilePath);
console.log(`File size of scanData.json.gz.b64: ${scanDataFileSize} bytes`);
consoleLogger.info(`File size of scanData.json.gz.b64: ${scanDataFileSize} bytes`);

// scanItems
const scanItemsFilePath = path.join(storagePath, 'scanItems.json.gz.b64');
const { fileSize: scanItemsFileSize } = await writeObjectToGzipBase64File(
items,
scanItemsFilePath,
);
console.log(`File size of scanItems.json.gz.b64: ${scanItemsFileSize} bytes`);
consoleLogger.info(`File size of scanItems.json.gz.b64: ${scanItemsFileSize} bytes`);

// scanItemsSummary
// the below mutates the original items object, since it is expensive to clone
Expand Down Expand Up @@ -643,7 +643,7 @@ const writeCompressedBase64 = async (
items,
scanItemsSummaryFilePath,
);
console.log(`File size of scanItemsSummary.json.gz.b64: ${scanItemsSummaryFileSize} bytes`);
consoleLogger.info(`File size of scanItemsSummary.json.gz.b64: ${scanItemsSummaryFileSize} bytes`);

return {
scanDataFilePath,
Expand Down

0 comments on commit dd6c3b9

Please sign in to comment.