diff --git a/README.md b/README.md index f779b5e..0b78354 100644 --- a/README.md +++ b/README.md @@ -38,6 +38,7 @@ Now you should be able to create foreign table from Parquet files. Currently `pa | FLOAT | FLOAT4 | | DOUBLE | FLOAT8 | | TIMESTAMP | TIMESTAMP | +| TIME64 | TIME | | DATE32 | DATE | | STRING | TEXT | | BINARY | BYTEA | diff --git a/data/example1.parquet b/data/example1.parquet index c6c1f2c..098b7e8 100644 Binary files a/data/example1.parquet and b/data/example1.parquet differ diff --git a/data/example2.parquet b/data/example2.parquet index 589ae65..45a5dfb 100644 Binary files a/data/example2.parquet and b/data/example2.parquet differ diff --git a/data/generate.py b/data/generate.py index d25d4c9..c5091d1 100755 --- a/data/generate.py +++ b/data/generate.py @@ -5,7 +5,7 @@ import pandas as pd import pyarrow as pa import pyarrow.parquet as pq -from datetime import datetime, date +from datetime import datetime, date, time, timezone, timedelta # row group 1 df1 = pd.DataFrame({'one': [1, 2, 3], @@ -18,7 +18,12 @@ date(2018, 1, 2), date(2018, 1, 3)], 'six': [True, False, True], - 'seven': [0.5, None, 1.0]}) + 'seven': [0.5, None, 1.0], + 'eight': [ + time(12), + time(13, 14, 15), + time(16, 17, 18, 5432), + ]}) table1 = pa.Table.from_pandas(df1) # row group 2 @@ -32,7 +37,12 @@ date(2018, 1, 5), date(2018, 1, 6)], 'six': [False, False, False], - 'seven': [0.5, None, 1.0]}) + 'seven': [0.5, None, 1.0], + 'eight': [ + time(12, tzinfo=timezone(timedelta(hours=3))), + time(13, tzinfo=timezone(timedelta(hours=2))), + time(14, tzinfo=timezone(timedelta(hours=1))), + ]}) table2 = pa.Table.from_pandas(df2) with pq.ParquetWriter('example1.parquet', table1.schema) as writer: diff --git a/input/parquet_fdw.source b/input/parquet_fdw.source index 82785b9..9a2c6ff 100644 --- a/input/parquet_fdw.source +++ b/input/parquet_fdw.source @@ -18,7 +18,8 @@ CREATE FOREIGN TABLE example1 ( four TIMESTAMP, five DATE, six BOOL, - seven FLOAT8) + seven FLOAT8, + eight TIME) SERVER parquet_srv OPTIONS (filename '@abs_srcdir@/data/example1.parquet', sorted 'one'); diff --git a/output/import.source b/output/import.source index 2431ee4..8551c95 100644 --- a/output/import.source +++ b/output/import.source @@ -44,19 +44,19 @@ select import_parquet('example_import', 'public', 'parquet_srv', 'list_parquet_f (1 row) SELECT * FROM example_import ORDER BY one, three; - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 1 | {19,20} | eins | 2018-01-01 00:00:00 | 2018-01-01 | t | - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | - 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 - 3 | {21,22} | zwei | 2018-01-03 00:00:00 | 2018-01-03 | f | - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 - 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | - 5 | {23,24} | drei | 2018-01-05 00:00:00 | 2018-01-05 | t | - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 - 7 | {25,26} | vier | 2018-01-07 00:00:00 | 2018-01-07 | f | - 9 | {27,28} | fünf | 2018-01-09 00:00:00 | 2018-01-09 | t | + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+----------------- + 1 | {19,20} | eins | 2018-01-01 00:00:00 | 2018-01-01 | t | | + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 + 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 | 16:17:18.005432 + 3 | {21,22} | zwei | 2018-01-03 00:00:00 | 2018-01-03 | f | | + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00 + 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00 + 5 | {23,24} | drei | 2018-01-05 00:00:00 | 2018-01-05 | t | | + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00 + 7 | {25,26} | vier | 2018-01-07 00:00:00 | 2018-01-07 | f | | + 9 | {27,28} | fünf | 2018-01-09 00:00:00 | 2018-01-09 | t | | (11 rows) select import_parquet_explicit('example_import2', 'public', 'parquet_srv', array['one', 'three', 'six'], array['int8', 'text', 'bool']::regtype[], 'list_parquet_files', '{"dir": "@abs_srcdir@/data"}', '{"sorted": "one"}'); diff --git a/output/parquet_fdw.source b/output/parquet_fdw.source index a66f1b3..0ae7ce1 100644 --- a/output/parquet_fdw.source +++ b/output/parquet_fdw.source @@ -15,18 +15,19 @@ CREATE FOREIGN TABLE example1 ( four TIMESTAMP, five DATE, six BOOL, - seven FLOAT8) + seven FLOAT8, + eight TIME) SERVER parquet_srv OPTIONS (filename '@abs_srcdir@/data/example1.parquet', sorted 'one'); SELECT * FROM example1; - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | - 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 - 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+----------------- + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 + 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 | 16:17:18.005432 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00 + 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00 (6 rows) -- no explicit columns mentions @@ -71,91 +72,91 @@ SET client_min_messages = DEBUG1; SELECT * FROM example1 WHERE one < 1; DEBUG: parquet_fdw: skip rowgroup 1 DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+-----+-------+------+------+-----+------- + one | two | three | four | five | six | seven | eight +-----+-----+-------+------+------+-----+-------+------- (0 rows) SELECT * FROM example1 WHERE one <= 1; DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+---------+-------+---------------------+------------+-----+------- - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 + one | two | three | four | five | six | seven | eight +-----+---------+-------+---------------------+------------+-----+-------+---------- + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 (1 row) SELECT * FROM example1 WHERE one > 6; DEBUG: parquet_fdw: skip rowgroup 1 DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+-----+-------+------+------+-----+------- + one | two | three | four | five | six | seven | eight +-----+-----+-------+------+------+-----+-------+------- (0 rows) SELECT * FROM example1 WHERE one >= 6; DEBUG: parquet_fdw: skip rowgroup 1 - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00 (1 row) SELECT * FROM example1 WHERE one = 2; DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 (1 row) SELECT * FROM example1 WHERE one = 7; DEBUG: parquet_fdw: skip rowgroup 1 DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+-----+-------+------+------+-----+------- + one | two | three | four | five | six | seven | eight +-----+-----+-------+------+------+-----+-------+------- (0 rows) SELECT * FROM example1 WHERE six = true; DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+---------+-------+---------------------+------------+-----+------- - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 - 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 + one | two | three | four | five | six | seven | eight +-----+---------+-------+---------------------+------------+-----+-------+----------------- + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 + 3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 | 16:17:18.005432 (2 rows) SELECT * FROM example1 WHERE six = false; - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 - 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | - 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00 + 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00 + 6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00 (4 rows) SELECT * FROM example1 WHERE seven < 0.9; - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 - 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 + 4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00 (2 rows) SELECT * FROM example1 WHERE seven IS NULL; - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | - 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 + 5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00 (2 rows) -- prepared statements prepare prep(date) as select * from example1 where five < $1; execute prep('2018-01-03'); DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+------------+-------+---------------------+------------+-----+------- - 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 - 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | + one | two | three | four | five | six | seven | eight +-----+------------+-------+---------------------+------------+-----+-------+---------- + 1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00 + 2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15 (2 rows) execute prep('2018-01-01'); DEBUG: parquet_fdw: skip rowgroup 1 DEBUG: parquet_fdw: skip rowgroup 2 - one | two | three | four | five | six | seven ------+-----+-------+------+------+-----+------- + one | two | three | four | five | six | seven | eight +-----+-----+-------+------+------+-----+-------+------- (0 rows) -- invalid options diff --git a/parquet_impl.cpp b/parquet_impl.cpp index 19a1c58..f79f1a5 100644 --- a/parquet_impl.cpp +++ b/parquet_impl.cpp @@ -801,6 +801,12 @@ class ParquetFdwReader res = TimestampGetDatum(ts); break; } + case arrow::Type::TIME64: + { + arrow::Time64Array *tarray = (arrow::Time64Array *) array; + res = TimeADTGetDatum(tarray->Value(i)); + break; + } case arrow::Type::DATE32: { arrow::Date32Array *tsarray = (arrow::Date32Array *) array; @@ -2190,6 +2196,8 @@ to_postgres_type(int arrow_type) return BYTEAOID; case arrow::Type::TIMESTAMP: return TIMESTAMPOID; + case arrow::Type::TIME64: + return TIMEOID; case arrow::Type::DATE32: return DATEOID; default: