From 797a41b59f10ca725d97dbb4d361e3fd8c71b150 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?R=C3=A9mi=20Lapeyre?= Date: Thu, 25 Jun 2020 01:54:17 +0200 Subject: [PATCH] Add support for Time64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Lapeyre --- README.md | 1 + data/example1.parquet | Bin 6813 -> 7708 bytes data/example2.parquet | Bin 4594 -> 4594 bytes data/generate.py | 16 +++++-- input/parquet_fdw.source | 3 +- output/import.source | 26 +++++----- output/parquet_fdw.source | 97 +++++++++++++++++++------------------- parquet_impl.cpp | 8 ++++ 8 files changed, 86 insertions(+), 65 deletions(-) diff --git a/README.md b/README.md index f779b5e..0b78354 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ Now you should be able to create foreign table from Parquet files. Currently `pa | FLOAT | FLOAT4 | | DOUBLE | FLOAT8 | | TIMESTAMP | TIMESTAMP | +| TIME64 | TIME | | DATE32 | DATE | | STRING | TEXT | | BINARY | BYTEA | diff --git a/data/example1.parquet b/data/example1.parquet index c6c1f2c557cffadc015a60035f792ceba521b709..098b7e86366f104721a77429970096480a127b1b 100644 GIT binary patch delta 1842 zcmb_cUrbw796r6Y7Y40krMKJ*9k2p-7zl+9+*lUPxgE5(_A<)9(z;~ndP^y5FGMIE z-AoDbaq7&8F&bQ4G%+!Rk?^t@5`8d6A2h}XAB+#i%qJ7$gAc|R{hf0!i+04P-L&`o z&iDKOeTDZ%cP{O}W%rN71YWSa5fZPW{a>GHLd~ZS{FDk@BP8%{n;k}?v~$o`^00) z)E4K>$;fN=m$|K0Vsl82-vs$Xfpj`P=e}$saog#p?>X*QmV1>BTY zWYYScLw$86V@0gfSJt4cTqAR!R^m=e-qyYDOQH?I2WVp_&8`)@DnBhPj->E@LigTk zh10_!X+Y#x(}td26F)d{%bz#&?9FSLl|rd#tobi?y>#%E>)SeVwb|)oe?9(zF8^Xa zThOl=>0%ZF{0t`dPqx_C^TjODP7aWFPEEHD`(Ht#6HiZHNnA8V7kAwO`iv3Y{Q@_c zBVl*9D^yl6#`KF{)DFt-po0jl&Gp6k)l_h4h};$=K2b2zkvRF(E0Kpn2QgaO$=9Cx z>cUJprxqtSQ;CV8>*v(`mArCZE#6#B8quYUaRcr+pNSN}j&A6i>Y5sg22%^sN;)xC zQkPe9>M|~Av&oVcijKuX5F4H-rNR@Hg>$Jw#+Y2a9vopcw3(4|YS4=MgVifzB}ePheEO(X zJ`Gh0>4d(bQ&?-N^albLz#6|CCig{8RgD3+6i~53i$^94dU3u47Ve)q9|+J{;z`!= zG6k)=)S8=u)UpPaQb{l5aD?4yNG?hJwedr+1*-?$sXtXL1gj~wk)ndjTrp2wxOPQ= zk~oAZm#jA2HIssHYC~UOjN-+|1I!&e@G$dJmok)&L<`XSdH4a%LTg5eHVZD326IAl zStbWe95)+k%SZ8{2%|7|1X!soOIg*!^Xs%8os@RegBNI3XrH5EMKHE>eyrqpC`!Yb x4%Ptl4)H)GteBw;SkHucLWz1D9xK5rB++)(c865k1g8)FCERF1XbMjc{{vc&+Km7J delta 1184 zcmZ`$OHUI~6rN6PDNr6lJ5C=wggBT$C=^P-V9cGiw9^!93$~@ZKClmIi)mr9V{~DnEH%1v!@|TBA>MPRMT2@XbMCq4p2zon_u=#-+spfB za7dM5k2>+w7K7@0hw!8We^KqT`{%G<(bfEs<+fR2lM}Y|_`Q0A-85olwoZMs47g zh4h-9<6a ztzpQpE__n}+uu}LZ9}<@d@syBxg1M+%&EMo97ch2kjPE75qYXg>@r*sOxVLB@rP6KXN2@d-PUSH8@g` z?4C?K8%!l!Q)MZWPcD@`g=olAUJQF|aF%kSjrir*SUl~uqjVx0EXS?O#aLRJknB>) zn{isb8I-<0UxM6JEE`D0N2jcju%{9YO|8-%8RzP(eY_&&MqoM;lf<~iRFx)|NPzsv z1=t+mp*1iPxMCdKIWWjie#fk(Xt5ASL~GCm&4Qt%z(IC9@CFB=Yh)8Ek#J-=2C2Tx zpv5vt9H(oBScFPduud>^%{WNuqyv1yaGcb_nVg)GizZNPzw_t3wfh<(hiV^aw9}lf`Ou(4Ygu1R+AH<%lJdrfMOjVcX67P Kvkc=yfze;V%1|)? diff --git a/data/example2.parquet b/data/example2.parquet index 589ae651c209e7821f89cb6fb724e0d2ad1cceb2..45a5dfb1ae0094f3335c3229851fbf99c1725f5c 100644 GIT binary patch delta 21 dcmeyQ{7HF(G#8`cW*M&Otc(?#xA5CB0RU9o2G#%o delta 21 dcmeyQ{7HF(G#8`sW*M&Otc+EgxA5CB0RUA32HF4s diff --git a/data/generate.py b/data/generate.py index d25d4c9..c5091d1 100755 --- a/data/generate.py +++ b/data/generate.py @@ -5,7 +5,7 @@ import pandas as pd import pyarrow as pa import pyarrow.parquet as pq -from datetime import datetime, date +from datetime import datetime, date, time, timezone, timedelta # row group 1 df1 = pd.DataFrame({'one': [1, 2, 3], @@ -18,7 +18,12 @@ date(2018, 1, 2), date(2018, 1, 3)], 'six': [True, False, True], - 'seven': [0.5, None, 1.0]}) + 'seven': [0.5, None, 1.0], + 'eight': [ + time(12), + time(13, 14, 15), + time(16, 17, 18, 5432), + ]}) table1 = pa.Table.from_pandas(df1) # row group 2 @@ -32,7 +37,12 @@ date(2018, 1, 5), date(2018, 1, 6)], 'six': [False, False, False], - 'seven': [0.5, None, 1.0]}) + 'seven': [0.5, None, 1.0], + 'eight': [ + time(12, tzinfo=timezone(timedelta(hours=3))), + time(13, tzinfo=timezone(timedelta(hours=2))), + time(14, tzinfo=timezone(timedelta(hours=1))), + ]}) table2 = pa.Table.from_pandas(df2) with pq.ParquetWriter('example1.parquet', table1.schema) as writer: diff --git a/input/parquet_fdw.source b/input/parquet_fdw.source index 82785b9..9a2c6ff 100644 --- a/input/parquet_fdw.source +++ b/input/parquet_fdw.source @@ -18,7 +18,8 @@ CREATE FOREIGN TABLE example1 ( four TIMESTAMP, five DATE, six BOOL, - seven FLOAT8) + seven FLOAT8, + eight TIME) SERVER parquet_srv OPTIONS (filename '@abs_srcdir@/data/example1.parquet', sorted 'one'); diff --git a/output/import.source b/output/import.source index 2431ee4..8551c95 100644 --- a/output/import.source +++ b/output/import.source @@ -44,19 +44,19 @@ select import_parquet('example_import', 'public', 'parquet_srv', 'list_parquet_f (1 row) SELECT * FROM example_import ORDER BY one, three; - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 1 | {19,20} | eins | 2018-01-01 00:00:00 | 2018-01-01 | t | - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | - 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 - 3 | {21,22} | zwei | 2018-01-03 00:00:00 | 2018-01-03 | f | - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 - 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | - 5 | {23,24} | drei | 2018-01-05 00:00:00 | 2018-01-05 | t | - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 - 7 | {25,26} | vier | 2018-01-07 00:00:00 | 2018-01-07 | f | - 9 | {27,28} | fünf | 2018-01-09 00:00:00 | 2018-01-09 | t | + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+----------------- + 1 | {19,20} | eins | 2018-01-01 00:00:00 | 2018-01-01 | t | | + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 + 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 | 16:17:18.005432 + 3 | {21,22} | zwei | 2018-01-03 00:00:00 | 2018-01-03 | f | | + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00 + 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00 + 5 | {23,24} | drei | 2018-01-05 00:00:00 | 2018-01-05 | t | | + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00 + 7 | {25,26} | vier | 2018-01-07 00:00:00 | 2018-01-07 | f | | + 9 | {27,28} | fünf | 2018-01-09 00:00:00 | 2018-01-09 | t | | (11 rows) select import_parquet_explicit('example_import2', 'public', 'parquet_srv', array['one', 'three', 'six'], array['int8', 'text', 'bool']::regtype[], 'list_parquet_files', '{"dir": "@abs_srcdir@/data"}', '{"sorted": "one"}'); diff --git a/output/parquet_fdw.source b/output/parquet_fdw.source index a66f1b3..0ae7ce1 100644 --- a/output/parquet_fdw.source +++ b/output/parquet_fdw.source @@ -15,18 +15,19 @@ CREATE FOREIGN TABLE example1 ( four TIMESTAMP, five DATE, six BOOL, - seven FLOAT8) + seven FLOAT8, + eight TIME) SERVER parquet_srv OPTIONS (filename '@abs_srcdir@/data/example1.parquet', sorted 'one'); SELECT * FROM example1; - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | - 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 - 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+----------------- + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 + 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 | 16:17:18.005432 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00 + 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00 (6 rows) -- no explicit columns mentions @@ -71,91 +72,91 @@ SET client_min_messages = DEBUG1; SELECT * FROM example1 WHERE one < 1; DEBUG: parquet_fdw: skip rowgroup 1 DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+-----+-------+------+------+-----+------- + one | two | three | four | five | six | seven | eight +-----+-----+-------+------+------+-----+-------+------- (0 rows) SELECT * FROM example1 WHERE one <= 1; DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+---------+-------+---------------------+------------+-----+------- - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 + one | two | three | four | five | six | seven | eight +-----+---------+-------+---------------------+------------+-----+-------+---------- + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 (1 row) SELECT * FROM example1 WHERE one > 6; DEBUG: parquet_fdw: skip rowgroup 1 DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+-----+-------+------+------+-----+------- + one | two | three | four | five | six | seven | eight +-----+-----+-------+------+------+-----+-------+------- (0 rows) SELECT * FROM example1 WHERE one >= 6; DEBUG: parquet_fdw: skip rowgroup 1 - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00 (1 row) SELECT * FROM example1 WHERE one = 2; DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 (1 row) SELECT * FROM example1 WHERE one = 7; DEBUG: parquet_fdw: skip rowgroup 1 DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+-----+-------+------+------+-----+------- + one | two | three | four | five | six | seven | eight +-----+-----+-------+------+------+-----+-------+------- (0 rows) SELECT * FROM example1 WHERE six = true; DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+---------+-------+---------------------+------------+-----+------- - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 - 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 + one | two | three | four | five | six | seven | eight +-----+---------+-------+---------------------+------------+-----+-------+----------------- + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 + 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 | 16:17:18.005432 (2 rows) SELECT * FROM example1 WHERE six = false; - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 - 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00 + 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00 (4 rows) SELECT * FROM example1 WHERE seven < 0.9; - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00 (2 rows) SELECT * FROM example1 WHERE seven IS NULL; - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | - 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 + 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00 (2 rows) -- prepared statements prepare prep(date) as select * from example1 where five < $1; execute prep('2018-01-03'); DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 (2 rows) execute prep('2018-01-01'); DEBUG: parquet_fdw: skip rowgroup 1 DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+-----+-------+------+------+-----+------- + one | two | three | four | five | six | seven | eight +-----+-----+-------+------+------+-----+-------+------- (0 rows) -- invalid options diff --git a/parquet_impl.cpp b/parquet_impl.cpp index 19a1c58..f79f1a5 100644 --- a/parquet_impl.cpp +++ b/parquet_impl.cpp @@ -801,6 +801,12 @@ class ParquetFdwReader res = TimestampGetDatum(ts); break; } + case arrow::Type::TIME64: + { + arrow::Time64Array *tarray = (arrow::Time64Array *) array; + res = TimeADTGetDatum(tarray->Value(i)); + break; + } case arrow::Type::DATE32: { arrow::Date32Array *tsarray = (arrow::Date32Array *) array; @@ -2190,6 +2196,8 @@ to_postgres_type(int arrow_type) return BYTEAOID; case arrow::Type::TIMESTAMP: return TIMESTAMPOID; + case arrow::Type::TIME64: + return TIMEOID; case arrow::Type::DATE32: return DATEOID; default: