- Reading
sql
# <- csv ->
# delim
# nullstr
# normalize_names
select * from read_csv_auto('/path/data.csv', header=True, delim='|');
# cli reading
.mode csv
.import data.csv data_tbl
# sample
# using sample 10% -> cluster sampling
select * from read_csv_auto('/path/data.csv') using sample 4;
# persist as table
create table as select * from read_csv_auto('/path/data.csv');
# <- parquet ->
# filename
select * from read_parquet('/tmp/*.parquet');
- Writing
sql
# copy (select * from tbl) to 'result.csv' with (header, delimiter ',');
# <- parquet ->
copy (select * from tbl) to 'result.parquet' (format 'parquet');
# cli writing
.mode jsonlines
.output results.txt
select * from tbl;
.output
# write entire db -> series of files
export database 'target_dir' (format parquet);
- Features
sql
# remove cols
select * exclude(col_1, col_2) from tbl;
# quick col fixes
select * replace(lower(city) as city) from tbl;
# smarter groupby
select col_1, col_2, count(*) from tbl group by all;
# aliases can be used where + having
# string + list slicing -> 1-indexed
select 'hello world!'[:-2];
- Insert using python
python
import duckdb
con = duckdb.connect(database='my-db.duckdb')
# create a table
con.execute("CREATE TABLE items(item VARCHAR, value DECIMAL(10,2), count INTEGER)")
# insert two items into the table
con.execute("INSERT INTO items VALUES ('jeans', 20.0, 1), ('hammer', 42.2, 2)")
# insert several rows using prepared statements
con.executemany("INSERT INTO items VALUES (?, ?, ?)", [['chainsaw', 500, 10], ['iphone', 300, 2]] )
- Read sql file
sql
.read select_example.sql
- Generate date spine
sql
select * from range(date '1992-01-01', date '1992-03-01', interval '1' month);
- List lambdas
sql
select list_transform([1, 2, null, 4], x -> x + 1);
- Histogram
sql
select histogram(col) from tbl;