From 1904e8ff0a082bde3243ecbc70fac579d907976b Mon Sep 17 00:00:00 2001 From: macbre Date: Fri, 24 Nov 2023 16:38:06 +0000 Subject: [PATCH 01/11] Add the initial code structure --- .gitignore | 5 ++++- phpunit.xml | 27 +++++++++++++++++++++++++++ src/JsonlParser.php | 29 +++++++++++++++++++++++++++++ tests/BaseTestCase.php | 20 ++++++++++++++++++++ tests/JsonParserTest.php | 14 ++++++++++++++ 5 files changed, 94 insertions(+), 1 deletion(-) create mode 100644 phpunit.xml create mode 100644 src/JsonlParser.php create mode 100644 tests/BaseTestCase.php create mode 100644 tests/JsonParserTest.php diff --git a/.gitignore b/.gitignore index 1ddcf91..f12be7f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,5 @@ -.idea/ +/.idea/ /vendor/ +/.php-cs-fixer.cache +/.phpunit.cache/ +/.phpunit.result.cache diff --git a/phpunit.xml b/phpunit.xml new file mode 100644 index 0000000..29c238c --- /dev/null +++ b/phpunit.xml @@ -0,0 +1,27 @@ + + + + + tests + + + + + + src + + + diff --git a/src/JsonlParser.php b/src/JsonlParser.php new file mode 100644 index 0000000..1cd638d --- /dev/null +++ b/src/JsonlParser.php @@ -0,0 +1,29 @@ +assertNull($parser->pop()); + $this->assertCount(0, $parser); + } +} From 6ce2e3447b7c40b25c87a60eae1223f43d0d8fca Mon Sep 17 00:00:00 2001 From: macbre Date: Sat, 25 Nov 2023 13:17:22 +0000 Subject: [PATCH 02/11] count() method implemented + tests --- src/JsonlParser.php | 26 +++++++++++++++++++++++++- tests/JsonParserTest.php | 24 ++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/src/JsonlParser.php b/src/JsonlParser.php index 1cd638d..305fb37 100644 --- a/src/JsonlParser.php +++ b/src/JsonlParser.php @@ -17,13 +17,37 @@ public function push(array $item): void { } + + public function pushItems(\Iterator $items): void + { + foreach($items as $item) { + $this->push($item); + } + } public function pop(): ?array { return null; } + /** + * This method returns how many JSON-encoded lines are in the stream. + * + * This can be heavy on large files this method rewinds and then reads the entire stream content. + * + * @return int + */ public function count(): int { - return 0; + $count = 0; + fseek($this->stream, 0); + + /** + * https://www.php.net/manual/en/function.stream-get-line.php + */ + while(($_line = stream_get_line($this->stream, 1024 * 1024, self::LINES_SEPARATOR)) !== false) { + $count++; + } + + return $count; } } diff --git a/tests/JsonParserTest.php b/tests/JsonParserTest.php index b1e2efb..470ad67 100644 --- a/tests/JsonParserTest.php +++ b/tests/JsonParserTest.php @@ -11,4 +11,28 @@ public function testOpensAnEmptyString(): void $this->assertNull($parser->pop()); $this->assertCount(0, $parser); } + + public function testOpensASingleLine(): void + { + $item = ['foo' => 'bar', 'ok' => true]; + + $stream = self::streamFromString(json_encode($item)); + $parser = new JsonlParser($stream); + $this->assertCount(1, $parser); +// $this->assertSame($item, $parser->pop()); +// $this->assertCount(0, $parser); + } + public function testOpensTwoLines(): void + { + $itemA = ['foo' => 'bar', 'ok' => true]; + $itemB = ['foo' => 'test', 'ok' => true]; + + $stream = self::streamFromString(json_encode($itemA) . JsonlParser::LINES_SEPARATOR . json_encode($itemB)); + $parser = new JsonlParser($stream); + $this->assertCount(2, $parser); +// $this->assertSame($itemB, $parser->pop()); +// $this->assertCount(1, $parser); +// $this->assertSame($itemA, $parser->pop()); +// $this->assertCount(0, $parser); + } } From 02a4e05d1d3c659f4a81ffcb7d063ce8b95a8df9 Mon Sep 17 00:00:00 2001 From: macbre Date: Sat, 25 Nov 2023 19:21:24 +0000 Subject: [PATCH 03/11] Initial, a bit dirty implementation of the pop() method --- src/JsonlParser.php | 46 +++++++++++++++++++++++++++++++++++++++- tests/JsonParserTest.php | 12 +++++------ 2 files changed, 51 insertions(+), 7 deletions(-) diff --git a/src/JsonlParser.php b/src/JsonlParser.php index 305fb37..16c7a8e 100644 --- a/src/JsonlParser.php +++ b/src/JsonlParser.php @@ -24,9 +24,53 @@ public function pushItems(\Iterator $items): void $this->push($item); } } + + /** + * This method returns the last item from the file and removes it. + */ public function pop(): ?array { - return null; + /*** + * Rewind to the end of the file and try to find the last newline + * + * @see https://www.php.net/manual/en/function.fseek.php + */ + fseek($this->stream, 0, SEEK_END); + + // this stream is now empty + if (ftell($this->stream) === 0) { + return null; + } + + // start reading from the end of the stream in reverse order, byte by byte + fseek($this->stream, -1, SEEK_END); + $buffer = fread($this->stream, 1); + + while(ftell($this->stream) > 0) { + // move two bytes back (one already read and the one before it) + fseek($this->stream, -2, SEEK_CUR); + + $char = fread($this->stream, 1); + $buffer .= $char; + + if ($char === self::LINES_SEPARATOR) { + break; + } + + if (ftell($this->stream) === 0) { + break; + } + } + + $buffer = strrev($buffer); + +// var_dump(__METHOD__, $buffer); + + // truncate the stream and remove the trailing newline + $pos = ftell($this->stream); + ftruncate($this->stream, $pos < 1 ? 0 : $pos-1); + + return json_decode($buffer, associative: true); } /** diff --git a/tests/JsonParserTest.php b/tests/JsonParserTest.php index 470ad67..83e1ff9 100644 --- a/tests/JsonParserTest.php +++ b/tests/JsonParserTest.php @@ -19,8 +19,8 @@ public function testOpensASingleLine(): void $stream = self::streamFromString(json_encode($item)); $parser = new JsonlParser($stream); $this->assertCount(1, $parser); -// $this->assertSame($item, $parser->pop()); -// $this->assertCount(0, $parser); + $this->assertSame($item, $parser->pop()); + $this->assertCount(0, $parser); } public function testOpensTwoLines(): void { @@ -30,9 +30,9 @@ public function testOpensTwoLines(): void $stream = self::streamFromString(json_encode($itemA) . JsonlParser::LINES_SEPARATOR . json_encode($itemB)); $parser = new JsonlParser($stream); $this->assertCount(2, $parser); -// $this->assertSame($itemB, $parser->pop()); -// $this->assertCount(1, $parser); -// $this->assertSame($itemA, $parser->pop()); -// $this->assertCount(0, $parser); + $this->assertSame($itemB, $parser->pop()); + $this->assertCount(1, $parser); + $this->assertSame($itemA, $parser->pop()); + $this->assertCount(0, $parser); } } From 085239345dfa0571be17c244e261f5c2489cd5a2 Mon Sep 17 00:00:00 2001 From: macbre Date: Mon, 27 Nov 2023 10:25:26 +0000 Subject: [PATCH 04/11] Add the testOpensASingleLineWithTrailingNewLine case --- src/JsonlParser.php | 4 +--- tests/JsonParserTest.php | 10 ++++++++++ 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/JsonlParser.php b/src/JsonlParser.php index 16c7a8e..6ce212e 100644 --- a/src/JsonlParser.php +++ b/src/JsonlParser.php @@ -63,8 +63,6 @@ public function pop(): ?array } $buffer = strrev($buffer); - -// var_dump(__METHOD__, $buffer); // truncate the stream and remove the trailing newline $pos = ftell($this->stream); @@ -88,7 +86,7 @@ public function count(): int /** * https://www.php.net/manual/en/function.stream-get-line.php */ - while(($_line = stream_get_line($this->stream, 1024 * 1024, self::LINES_SEPARATOR)) !== false) { + while(stream_get_line($this->stream, 1024 * 1024, self::LINES_SEPARATOR) !== false) { $count++; } diff --git a/tests/JsonParserTest.php b/tests/JsonParserTest.php index 83e1ff9..1735776 100644 --- a/tests/JsonParserTest.php +++ b/tests/JsonParserTest.php @@ -22,6 +22,16 @@ public function testOpensASingleLine(): void $this->assertSame($item, $parser->pop()); $this->assertCount(0, $parser); } + public function testOpensASingleLineWithTrailingNewLine(): void + { + $item = ['foo' => 'bar', 'ok' => true]; + + $stream = self::streamFromString(json_encode($item) . JsonlParser::LINES_SEPARATOR); + $parser = new JsonlParser($stream); + $this->assertCount(1, $parser); + $this->assertSame($item, $parser->pop()); + $this->assertCount(0, $parser); + } public function testOpensTwoLines(): void { $itemA = ['foo' => 'bar', 'ok' => true]; From 634f7e392c344eaeff8ea788478023706f079a55 Mon Sep 17 00:00:00 2001 From: macbre Date: Mon, 27 Nov 2023 10:43:31 +0000 Subject: [PATCH 05/11] Implement the push() method --- src/JsonlParser.php | 3 ++- tests/JsonParserTest.php | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) diff --git a/src/JsonlParser.php b/src/JsonlParser.php index 6ce212e..c577a75 100644 --- a/src/JsonlParser.php +++ b/src/JsonlParser.php @@ -15,7 +15,8 @@ public function __construct(protected $stream) public function push(array $item): void { - + $encoded = json_encode($item); + fwrite($this->stream, $encoded . self::LINES_SEPARATOR); } public function pushItems(\Iterator $items): void diff --git a/tests/JsonParserTest.php b/tests/JsonParserTest.php index 1735776..3226aaa 100644 --- a/tests/JsonParserTest.php +++ b/tests/JsonParserTest.php @@ -45,4 +45,18 @@ public function testOpensTwoLines(): void $this->assertSame($itemA, $parser->pop()); $this->assertCount(0, $parser); } + + + public function testOpensAnEmptyStringAndAddsAnItem(): void + { + $item = ['foo' => 'bar', 'ok' => true]; + + $stream = self::streamFromString(''); + $parser = new JsonlParser($stream); + $this->assertCount(0, $parser); + $parser->push($item); + $this->assertCount(1, $parser); + $this->assertSame($item, $parser->pop()); + $this->assertCount(0, $parser); + } } From 6674b574dbd8b042c51dc5884ba21a94ed8fcfc9 Mon Sep 17 00:00:00 2001 From: macbre Date: Mon, 27 Nov 2023 10:45:07 +0000 Subject: [PATCH 06/11] count(): call rewind() --- src/JsonlParser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/JsonlParser.php b/src/JsonlParser.php index c577a75..e30445e 100644 --- a/src/JsonlParser.php +++ b/src/JsonlParser.php @@ -82,7 +82,7 @@ public function pop(): ?array public function count(): int { $count = 0; - fseek($this->stream, 0); + rewind($this->stream); /** * https://www.php.net/manual/en/function.stream-get-line.php From bdff29727abaa0ed6b2a1037003443afa70b4983 Mon Sep 17 00:00:00 2001 From: macbre Date: Mon, 27 Nov 2023 10:52:50 +0000 Subject: [PATCH 07/11] JsonlParser: handle both arrays and strings --- src/JsonlParser.php | 4 ++-- tests/JsonParserTest.php | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/src/JsonlParser.php b/src/JsonlParser.php index e30445e..1e40aff 100644 --- a/src/JsonlParser.php +++ b/src/JsonlParser.php @@ -13,7 +13,7 @@ public function __construct(protected $stream) { } - public function push(array $item): void + public function push(array|string $item): void { $encoded = json_encode($item); fwrite($this->stream, $encoded . self::LINES_SEPARATOR); @@ -29,7 +29,7 @@ public function pushItems(\Iterator $items): void /** * This method returns the last item from the file and removes it. */ - public function pop(): ?array + public function pop(): null|array|string { /*** * Rewind to the end of the file and try to find the last newline diff --git a/tests/JsonParserTest.php b/tests/JsonParserTest.php index 3226aaa..7c300e2 100644 --- a/tests/JsonParserTest.php +++ b/tests/JsonParserTest.php @@ -59,4 +59,18 @@ public function testOpensAnEmptyStringAndAddsAnItem(): void $this->assertSame($item, $parser->pop()); $this->assertCount(0, $parser); } + + public function testHandlesStrings(): void + { + $item = 'https://foo.bar.net'; + + $stream = self::streamFromString(''); + $parser = new JsonlParser($stream); + $this->assertCount(0, $parser); + $parser->push($item); + $this->assertCount(1, $parser); + $this->assertSame($item, $parser->pop()); + $this->assertCount(0, $parser); + $this->assertNull($parser->pop()); + } } From 7fd1c9312ac4f40b122bdfdbb86b51fb72c50f3e Mon Sep 17 00:00:00 2001 From: macbre Date: Mon, 27 Nov 2023 11:01:00 +0000 Subject: [PATCH 08/11] pop: fix for PHP 8.3 --- src/JsonlParser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/JsonlParser.php b/src/JsonlParser.php index 1e40aff..9ca3597 100644 --- a/src/JsonlParser.php +++ b/src/JsonlParser.php @@ -47,7 +47,7 @@ public function pop(): null|array|string fseek($this->stream, -1, SEEK_END); $buffer = fread($this->stream, 1); - while(ftell($this->stream) > 0) { + while(ftell($this->stream) > 1) { // move two bytes back (one already read and the one before it) fseek($this->stream, -2, SEEK_CUR); From 3a1b44f2e78ed90c0d6806e7b0f3c13020c23a5a Mon Sep 17 00:00:00 2001 From: macbre Date: Mon, 27 Nov 2023 11:08:57 +0000 Subject: [PATCH 09/11] Add and test the iterate() method --- src/JsonlParser.php | 9 ++++++++ tests/JsonParserTest.php | 48 ++++++++++++++++++++++++---------------- 2 files changed, 38 insertions(+), 19 deletions(-) diff --git a/src/JsonlParser.php b/src/JsonlParser.php index 9ca3597..b37d977 100644 --- a/src/JsonlParser.php +++ b/src/JsonlParser.php @@ -72,6 +72,15 @@ public function pop(): null|array|string return json_decode($buffer, associative: true); } + /** + * @return \Generator + */ + public function iterate(): \Generator { + while(!is_null($item=$this->pop())) { + yield $item; + } + } + /** * This method returns how many JSON-encoded lines are in the stream. * diff --git a/tests/JsonParserTest.php b/tests/JsonParserTest.php index 7c300e2..56770ef 100644 --- a/tests/JsonParserTest.php +++ b/tests/JsonParserTest.php @@ -4,6 +4,10 @@ class JsonParserTest extends BaseTestCase { + const ITEM = ['foo' => 'bar', 'ok' => true]; + const ITEM_ONE = ['foo' => 'bar', 'ok' => true]; + const ITEM_TWO = ['foo' => 'test', 'ok' => true]; + public function testOpensAnEmptyString(): void { $stream = self::streamFromString(''); @@ -14,49 +18,39 @@ public function testOpensAnEmptyString(): void public function testOpensASingleLine(): void { - $item = ['foo' => 'bar', 'ok' => true]; - - $stream = self::streamFromString(json_encode($item)); + $stream = self::streamFromString(json_encode(self::ITEM)); $parser = new JsonlParser($stream); $this->assertCount(1, $parser); - $this->assertSame($item, $parser->pop()); + $this->assertSame(self::ITEM, $parser->pop()); $this->assertCount(0, $parser); } public function testOpensASingleLineWithTrailingNewLine(): void { - $item = ['foo' => 'bar', 'ok' => true]; - - $stream = self::streamFromString(json_encode($item) . JsonlParser::LINES_SEPARATOR); + $stream = self::streamFromString(json_encode(self::ITEM) . JsonlParser::LINES_SEPARATOR); $parser = new JsonlParser($stream); $this->assertCount(1, $parser); - $this->assertSame($item, $parser->pop()); + $this->assertSame(self::ITEM, $parser->pop()); $this->assertCount(0, $parser); } public function testOpensTwoLines(): void { - $itemA = ['foo' => 'bar', 'ok' => true]; - $itemB = ['foo' => 'test', 'ok' => true]; - - $stream = self::streamFromString(json_encode($itemA) . JsonlParser::LINES_SEPARATOR . json_encode($itemB)); + $stream = self::streamFromString(json_encode(self::ITEM_ONE) . JsonlParser::LINES_SEPARATOR . json_encode(self::ITEM_TWO)); $parser = new JsonlParser($stream); $this->assertCount(2, $parser); - $this->assertSame($itemB, $parser->pop()); + $this->assertSame(self::ITEM_TWO, $parser->pop()); $this->assertCount(1, $parser); - $this->assertSame($itemA, $parser->pop()); + $this->assertSame(self::ITEM_ONE, $parser->pop()); $this->assertCount(0, $parser); } - public function testOpensAnEmptyStringAndAddsAnItem(): void { - $item = ['foo' => 'bar', 'ok' => true]; - $stream = self::streamFromString(''); $parser = new JsonlParser($stream); $this->assertCount(0, $parser); - $parser->push($item); + $parser->push(self::ITEM); $this->assertCount(1, $parser); - $this->assertSame($item, $parser->pop()); + $this->assertSame(self::ITEM, $parser->pop()); $this->assertCount(0, $parser); } @@ -73,4 +67,20 @@ public function testHandlesStrings(): void $this->assertCount(0, $parser); $this->assertNull($parser->pop()); } + + public function testIterator(): void { + $stream = self::streamFromString(''); + $parser = new JsonlParser($stream); + + $parser->push('one'); + $parser->push('two'); + $parser->push('three'); + $this->assertCount(3, $parser); + + $list = iterator_to_array($parser->iterate()); + + $this->assertCount(0, $parser); + $this->assertCount(3, $list); + $this->assertSame(['three', 'two', 'one'], $list); + } } From 5257a96a7a50d7ce625b26aa9585fd85aeca40fb Mon Sep 17 00:00:00 2001 From: macbre Date: Mon, 27 Nov 2023 11:20:37 +0000 Subject: [PATCH 10/11] Reformat the code --- src/JsonlParser.php | 17 +++++++++-------- tests/JsonParserTest.php | 31 ++++++++++++++++--------------- 2 files changed, 25 insertions(+), 23 deletions(-) diff --git a/src/JsonlParser.php b/src/JsonlParser.php index b37d977..3303f0a 100644 --- a/src/JsonlParser.php +++ b/src/JsonlParser.php @@ -72,14 +72,15 @@ public function pop(): null|array|string return json_decode($buffer, associative: true); } - /** - * @return \Generator - */ - public function iterate(): \Generator { - while(!is_null($item=$this->pop())) { - yield $item; - } - } + /** + * @return \Generator + */ + public function iterate(): \Generator + { + while(!is_null($item=$this->pop())) { + yield $item; + } + } /** * This method returns how many JSON-encoded lines are in the stream. diff --git a/tests/JsonParserTest.php b/tests/JsonParserTest.php index 56770ef..155e27e 100644 --- a/tests/JsonParserTest.php +++ b/tests/JsonParserTest.php @@ -4,9 +4,9 @@ class JsonParserTest extends BaseTestCase { - const ITEM = ['foo' => 'bar', 'ok' => true]; - const ITEM_ONE = ['foo' => 'bar', 'ok' => true]; - const ITEM_TWO = ['foo' => 'test', 'ok' => true]; + const ITEM = ['foo' => 'bar', 'ok' => true]; + const ITEM_ONE = ['foo' => 'bar', 'ok' => true]; + const ITEM_TWO = ['foo' => 'test', 'ok' => true]; public function testOpensAnEmptyString(): void { @@ -68,19 +68,20 @@ public function testHandlesStrings(): void $this->assertNull($parser->pop()); } - public function testIterator(): void { - $stream = self::streamFromString(''); - $parser = new JsonlParser($stream); + public function testIterator(): void + { + $stream = self::streamFromString(''); + $parser = new JsonlParser($stream); - $parser->push('one'); - $parser->push('two'); - $parser->push('three'); - $this->assertCount(3, $parser); + $parser->push('one'); + $parser->push('two'); + $parser->push('three'); + $this->assertCount(3, $parser); - $list = iterator_to_array($parser->iterate()); + $list = iterator_to_array($parser->iterate()); - $this->assertCount(0, $parser); - $this->assertCount(3, $list); - $this->assertSame(['three', 'two', 'one'], $list); - } + $this->assertCount(0, $parser); + $this->assertCount(3, $list); + $this->assertSame(['three', 'two', 'one'], $list); + } } From fc2925619c22c6c4481d59ef0a20edab81fa4cb9 Mon Sep 17 00:00:00 2001 From: macbre Date: Mon, 27 Nov 2023 11:26:23 +0000 Subject: [PATCH 11/11] JsonParserTest: add a test case for the pushItem() method --- tests/JsonParserTest.php | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/tests/JsonParserTest.php b/tests/JsonParserTest.php index 155e27e..c37dff7 100644 --- a/tests/JsonParserTest.php +++ b/tests/JsonParserTest.php @@ -84,4 +84,26 @@ public function testIterator(): void $this->assertCount(3, $list); $this->assertSame(['three', 'two', 'one'], $list); } + + public function testPushItems(): void + { + $stream = self::streamFromString(''); + $parser = new JsonlParser($stream); + + function iterator(): Generator + { + yield 'one'; + yield 'two'; + yield 'three'; + } + + $parser->pushItems(items:iterator()); + $this->assertCount(3, $parser); + + $list = iterator_to_array($parser->iterate()); + + $this->assertCount(0, $parser); + $this->assertCount(3, $list); + $this->assertSame(['three', 'two', 'one'], $list); + } }