SELECT * FROM events WHERE `date` = CURRENT_DATE() LIMIT 10 /*SA*/;
DESC events /*SA*/;
SELECT * FROM users LIMIT 10 /*SA(test_project)*/;
CREATE TABLE test_data AS
/*SA_BEGIN(test_project)*/ SELECT id, first_id, $city AS city FROM users LIMIT 10 /*SA_END*/
CREATE TABLE default.export_data AS
/*SA_BEGIN(production)*/
SELECT user_id,time,event, $os AS _os FROM events WHERE date=CURRENT_DATE() LIMIT 10
/*SA_END*/
from pyspark.sql import SparkSession
jdbc_url= "jdbc:impala://localhost:21050/rawdata;UseNativeQuery=1"
spark = SparkSession.builder.appName("sa-test").getOrCreate()
df = spark.read.jdbc(url=jdbc_url, table="(/*SA(default)*/ SELECT date, event, count(*) AS c FROM events WHERE date=CURRENT_DATE() GROUP BY 1,2) a")
df.select(df['date'], df['event'], df['c'] * 10000).show()
var test_jdbc = spark.sqlContext.read.format("jdbc").option("url", "jdbc:impala://localhost:21050/rawdata;UseNativeQuery=1").option("driver", "com.cloudera.impala.jdbc41.Driver").option("dbtable", "(/*SA(default)*/ SELECT date, event, count(*) AS c FROM events WHERE date=CURRENT_DATE() GROUP BY 1,2) a").load();
test_jdbc.show