110 lines
2.0 KiB
Python
110 lines
2.0 KiB
Python
from sklearn.datasets import load_iris
|
|
import pandas as pd
|
|
from pandasql import sqldf
|
|
from pandasql import load_meat, load_births
|
|
import re
|
|
|
|
births = load_births()
|
|
meat = load_meat()
|
|
iris = load_iris()
|
|
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
|
|
iris_df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
|
|
iris_df.columns = [re.sub("[() ]", "", col) for col in iris_df.columns]
|
|
|
|
print(sqldf("SELECT * FROM iris_df LIMIT 10;", locals()))
|
|
print(sqldf("SELECT sepalwidthcm, species FROM iris_df LIMIT 10;", locals()))
|
|
|
|
q = """
|
|
select
|
|
species
|
|
, avg(sepalwidthcm)
|
|
, min(sepalwidthcm)
|
|
, max(sepalwidthcm)
|
|
from
|
|
iris_df
|
|
group by
|
|
species;
|
|
|
|
"""
|
|
print("*" * 80)
|
|
print("aggregation")
|
|
print("-" * 80)
|
|
print(q)
|
|
print(sqldf(q, locals()))
|
|
|
|
|
|
def pysqldf(q):
|
|
"add this to your script if you get tired of calling locals()"
|
|
return sqldf(q, globals())
|
|
|
|
|
|
print("*" * 80)
|
|
print("calling from a helper function")
|
|
print('''def pysqldf(q):)
|
|
"add this to your script if you get tired of calling locals()"
|
|
return sqldf(q, globals())''')
|
|
print("-" * 80)
|
|
print(q)
|
|
print(pysqldf(q))
|
|
|
|
q = """
|
|
select
|
|
a.*
|
|
from
|
|
iris_df a
|
|
inner join
|
|
iris_df b
|
|
on a.species = b.species
|
|
limit 10;
|
|
"""
|
|
|
|
print("*" * 80)
|
|
print("joins")
|
|
print("-" * 80)
|
|
print(q)
|
|
print(pysqldf(q))
|
|
|
|
q = """
|
|
select
|
|
*
|
|
from
|
|
iris_df
|
|
where
|
|
species = 'virginica'
|
|
and sepallengthcm > 7.7;
|
|
"""
|
|
print("*" * 80)
|
|
print("where clause")
|
|
print("-" * 80)
|
|
print(q)
|
|
print(pysqldf(q))
|
|
iris_df['id'] = range(len(iris_df))
|
|
q = """
|
|
select
|
|
*
|
|
from
|
|
iris_df
|
|
where
|
|
id in (select id from iris_df where sepalwidthcm*sepallengthcm > 25);
|
|
"""
|
|
print("*" * 80)
|
|
print("subqueries")
|
|
print("-" * 80)
|
|
print(q)
|
|
print(pysqldf(q))
|
|
|
|
q = """
|
|
SELECT
|
|
m.*
|
|
, b.births
|
|
FROM
|
|
meat m
|
|
INNER JOIN
|
|
births b
|
|
on m.date = b.date
|
|
ORDER BY
|
|
m.date;
|
|
"""
|
|
|
|
print(pysqldf(q).head())
|