Skip to content

Commit

Permalink
Add support for Time64
Browse files Browse the repository at this point in the history
Signed-off-by: Rémi Lapeyre <[email protected]>
  • Loading branch information
remilapeyre committed Jul 21, 2020
1 parent 1583329 commit 797a41b
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 65 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ Now you should be able to create foreign table from Parquet files. Currently `pa
| FLOAT | FLOAT4 |
| DOUBLE | FLOAT8 |
| TIMESTAMP | TIMESTAMP |
| TIME64 | TIME |
| DATE32 | DATE |
| STRING | TEXT |
| BINARY | BYTEA |
Expand Down
Binary file modified data/example1.parquet
Binary file not shown.
Binary file modified data/example2.parquet
Binary file not shown.
16 changes: 13 additions & 3 deletions data/generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq
from datetime import datetime, date
from datetime import datetime, date, time, timezone, timedelta

# row group 1
df1 = pd.DataFrame({'one': [1, 2, 3],
Expand All @@ -18,7 +18,12 @@
date(2018, 1, 2),
date(2018, 1, 3)],
'six': [True, False, True],
'seven': [0.5, None, 1.0]})
'seven': [0.5, None, 1.0],
'eight': [
time(12),
time(13, 14, 15),
time(16, 17, 18, 5432),
]})
table1 = pa.Table.from_pandas(df1)

# row group 2
Expand All @@ -32,7 +37,12 @@
date(2018, 1, 5),
date(2018, 1, 6)],
'six': [False, False, False],
'seven': [0.5, None, 1.0]})
'seven': [0.5, None, 1.0],
'eight': [
time(12, tzinfo=timezone(timedelta(hours=3))),
time(13, tzinfo=timezone(timedelta(hours=2))),
time(14, tzinfo=timezone(timedelta(hours=1))),
]})
table2 = pa.Table.from_pandas(df2)

with pq.ParquetWriter('example1.parquet', table1.schema) as writer:
Expand Down
3 changes: 2 additions & 1 deletion input/parquet_fdw.source
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,8 @@ CREATE FOREIGN TABLE example1 (
four TIMESTAMP,
five DATE,
six BOOL,
seven FLOAT8)
seven FLOAT8,
eight TIME)
SERVER parquet_srv
OPTIONS (filename '@abs_srcdir@/data/example1.parquet', sorted 'one');

Expand Down
26 changes: 13 additions & 13 deletions output/import.source
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,19 @@ select import_parquet('example_import', 'public', 'parquet_srv', 'list_parquet_f
(1 row)

SELECT * FROM example_import ORDER BY one, three;
one | two | three | four | five | six | seven
-----+------------+-------+---------------------+------------+-----+-------
1 | {19,20} | eins | 2018-01-01 00:00:00 | 2018-01-01 | t |
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f |
3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1
3 | {21,22} | zwei | 2018-01-03 00:00:00 | 2018-01-03 | f |
4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5
5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f |
5 | {23,24} | drei | 2018-01-05 00:00:00 | 2018-01-05 | t |
6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1
7 | {25,26} | vier | 2018-01-07 00:00:00 | 2018-01-07 | f |
9 | {27,28} | fünf | 2018-01-09 00:00:00 | 2018-01-09 | t |
one | two | three | four | five | six | seven | eight
-----+------------+-------+---------------------+------------+-----+-------+-----------------
1 | {19,20} | eins | 2018-01-01 00:00:00 | 2018-01-01 | t | |
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15
3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 | 16:17:18.005432
3 | {21,22} | zwei | 2018-01-03 00:00:00 | 2018-01-03 | f | |
4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00
5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00
5 | {23,24} | drei | 2018-01-05 00:00:00 | 2018-01-05 | t | |
6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00
7 | {25,26} | vier | 2018-01-07 00:00:00 | 2018-01-07 | f | |
9 | {27,28} | fünf | 2018-01-09 00:00:00 | 2018-01-09 | t | |
(11 rows)

select import_parquet_explicit('example_import2', 'public', 'parquet_srv', array['one', 'three', 'six'], array['int8', 'text', 'bool']::regtype[], 'list_parquet_files', '{"dir": "@abs_srcdir@/data"}', '{"sorted": "one"}');
Expand Down
97 changes: 49 additions & 48 deletions output/parquet_fdw.source
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,19 @@ CREATE FOREIGN TABLE example1 (
four TIMESTAMP,
five DATE,
six BOOL,
seven FLOAT8)
seven FLOAT8,
eight TIME)
SERVER parquet_srv
OPTIONS (filename '@abs_srcdir@/data/example1.parquet', sorted 'one');
SELECT * FROM example1;
one | two | three | four | five | six | seven
-----+------------+-------+---------------------+------------+-----+-------
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f |
3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1
4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5
5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f |
6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1
one | two | three | four | five | six | seven | eight
-----+------------+-------+---------------------+------------+-----+-------+-----------------
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15
3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 | 16:17:18.005432
4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00
5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00
6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00
(6 rows)

-- no explicit columns mentions
Expand Down Expand Up @@ -71,91 +72,91 @@ SET client_min_messages = DEBUG1;
SELECT * FROM example1 WHERE one < 1;
DEBUG: parquet_fdw: skip rowgroup 1
DEBUG: parquet_fdw: skip rowgroup 2
one | two | three | four | five | six | seven
-----+-----+-------+------+------+-----+-------
one | two | three | four | five | six | seven | eight
-----+-----+-------+------+------+-----+-------+-------
(0 rows)

SELECT * FROM example1 WHERE one <= 1;
DEBUG: parquet_fdw: skip rowgroup 2
one | two | three | four | five | six | seven
-----+---------+-------+---------------------+------------+-----+-------
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5
one | two | three | four | five | six | seven | eight
-----+---------+-------+---------------------+------------+-----+-------+----------
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00
(1 row)

SELECT * FROM example1 WHERE one > 6;
DEBUG: parquet_fdw: skip rowgroup 1
DEBUG: parquet_fdw: skip rowgroup 2
one | two | three | four | five | six | seven
-----+-----+-------+------+------+-----+-------
one | two | three | four | five | six | seven | eight
-----+-----+-------+------+------+-----+-------+-------
(0 rows)

SELECT * FROM example1 WHERE one >= 6;
DEBUG: parquet_fdw: skip rowgroup 1
one | two | three | four | five | six | seven
-----+------------+-------+---------------------+------------+-----+-------
6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1
one | two | three | four | five | six | seven | eight
-----+------------+-------+---------------------+------------+-----+-------+----------
6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00
(1 row)

SELECT * FROM example1 WHERE one = 2;
DEBUG: parquet_fdw: skip rowgroup 2
one | two | three | four | five | six | seven
-----+------------+-------+---------------------+------------+-----+-------
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f |
one | two | three | four | five | six | seven | eight
-----+------------+-------+---------------------+------------+-----+-------+----------
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15
(1 row)

SELECT * FROM example1 WHERE one = 7;
DEBUG: parquet_fdw: skip rowgroup 1
DEBUG: parquet_fdw: skip rowgroup 2
one | two | three | four | five | six | seven
-----+-----+-------+------+------+-----+-------
one | two | three | four | five | six | seven | eight
-----+-----+-------+------+------+-----+-------+-------
(0 rows)

SELECT * FROM example1 WHERE six = true;
DEBUG: parquet_fdw: skip rowgroup 2
one | two | three | four | five | six | seven
-----+---------+-------+---------------------+------------+-----+-------
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5
3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1
one | two | three | four | five | six | seven | eight
-----+---------+-------+---------------------+------------+-----+-------+-----------------
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00
3 | {7,8,9} | baz | 2018-01-03 00:00:00 | 2018-01-03 | t | 1 | 16:17:18.005432
(2 rows)

SELECT * FROM example1 WHERE six = false;
one | two | three | four | five | six | seven
-----+------------+-------+---------------------+------------+-----+-------
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f |
4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5
5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f |
6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1
one | two | three | four | five | six | seven | eight
-----+------------+-------+---------------------+------------+-----+-------+----------
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15
4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00
5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00
6 | {16,17,18} | tres | 2018-01-06 00:00:00 | 2018-01-06 | f | 1 | 14:00:00
(4 rows)

SELECT * FROM example1 WHERE seven < 0.9;
one | two | three | four | five | six | seven
-----+------------+-------+---------------------+------------+-----+-------
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5
4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5
one | two | three | four | five | six | seven | eight
-----+------------+-------+---------------------+------------+-----+-------+----------
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00
4 | {10,11,12} | uno | 2018-01-04 00:00:00 | 2018-01-04 | f | 0.5 | 12:00:00
(2 rows)

SELECT * FROM example1 WHERE seven IS NULL;
one | two | three | four | five | six | seven
-----+------------+-------+---------------------+------------+-----+-------
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f |
5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f |
one | two | three | four | five | six | seven | eight
-----+------------+-------+---------------------+------------+-----+-------+----------
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15
5 | {13,14,15} | dos | 2018-01-05 00:00:00 | 2018-01-05 | f | | 13:00:00
(2 rows)

-- prepared statements
prepare prep(date) as select * from example1 where five < $1;
execute prep('2018-01-03');
DEBUG: parquet_fdw: skip rowgroup 2
one | two | three | four | five | six | seven
-----+------------+-------+---------------------+------------+-----+-------
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f |
one | two | three | four | five | six | seven | eight
-----+------------+-------+---------------------+------------+-----+-------+----------
1 | {1,2,3} | foo | 2018-01-01 00:00:00 | 2018-01-01 | t | 0.5 | 12:00:00
2 | {NULL,5,6} | bar | 2018-01-02 00:00:00 | 2018-01-02 | f | | 13:14:15
(2 rows)

execute prep('2018-01-01');
DEBUG: parquet_fdw: skip rowgroup 1
DEBUG: parquet_fdw: skip rowgroup 2
one | two | three | four | five | six | seven
-----+-----+-------+------+------+-----+-------
one | two | three | four | five | six | seven | eight
-----+-----+-------+------+------+-----+-------+-------
(0 rows)

-- invalid options
Expand Down
8 changes: 8 additions & 0 deletions parquet_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -801,6 +801,12 @@ class ParquetFdwReader
res = TimestampGetDatum(ts);
break;
}
case arrow::Type::TIME64:
{
arrow::Time64Array *tarray = (arrow::Time64Array *) array;
res = TimeADTGetDatum(tarray->Value(i));
break;
}
case arrow::Type::DATE32:
{
arrow::Date32Array *tsarray = (arrow::Date32Array *) array;
Expand Down Expand Up @@ -2190,6 +2196,8 @@ to_postgres_type(int arrow_type)
return BYTEAOID;
case arrow::Type::TIMESTAMP:
return TIMESTAMPOID;
case arrow::Type::TIME64:
return TIMEOID;
case arrow::Type::DATE32:
return DATEOID;
default:
Expand Down

0 comments on commit 797a41b

Please sign in to comment.