Performing SQL, Hive and Impala queries

The internal Python API allows you to execute SQL queries on any SQL connection in DSS (including Hive and Impala).

Retrieving results

You can retrieve the results of a SELECT query as a Pandas dataframe or as an iterator.

from dataiku.core.sql import SQLExecutor2

executor = SQLExecutor2(connection="db-connection") # or dataset="dataset_name"

df = executor.query_to_df("SELECT col1, COUNT(*) as count FROM mytable")

# df is a Pandas dataframe with two columns : "col1" and "count"

API doc

class dataiku.core.sql.SQLExecutor2(connection=None, dataset=None)
static exec_recipe_fragment(output_dataset, query, pre_queries=[], post_queries=[], overwrite_output_schema=True, drop_partitioned_on_schema_mismatch=False)
query_to_df(query, pre_queries=None, post_queries=None, extra_conf={})
query_to_iter(query, pre_queries=None, post_queries=None, extra_conf={})
class dataiku.core.sql.HiveExecutor(dataset=None, database=None, connection=None)
static exec_recipe_fragment(query, pre_queries=[], post_queries=[], overwrite_output_schema=True, drop_partitioned_on_schema_mismatch=False, metastore_handling=None, extra_conf={}, add_dku_udf=False)
query_to_df(query, pre_queries=None, post_queries=None, extra_conf={})
query_to_iter(query, pre_queries=None, post_queries=None, extra_conf={})
class dataiku.core.sql.ImpalaExecutor(dataset=None, database=None, connection=None)
static exec_recipe_fragment(output_dataset, query, pre_queries=[], post_queries=[], overwrite_output_schema=True, use_stream_mode=True)
query_to_df(query, pre_queries=None, post_queries=None, connection=None, extra_conf={})
query_to_iter(query, pre_queries=None, post_queries=None, connection=None, extra_conf={})