pythonbook/pandasSQL例子/demo.py

110 lines
2.0 KiB
Python

from sklearn.datasets import load_iris
import pandas as pd
from pandasql import sqldf
from pandasql import load_meat, load_births
import re
births = load_births()
meat = load_meat()
iris = load_iris()
iris_df = pd.DataFrame(iris.data, columns=iris.feature_names)
iris_df['species'] = pd.Categorical.from_codes(iris.target, iris.target_names)
iris_df.columns = [re.sub("[() ]", "", col) for col in iris_df.columns]
print(sqldf("SELECT * FROM iris_df LIMIT 10;", locals()))
print(sqldf("SELECT sepalwidthcm, species FROM iris_df LIMIT 10;", locals()))
q = """
select
species
, avg(sepalwidthcm)
, min(sepalwidthcm)
, max(sepalwidthcm)
from
iris_df
group by
species;
"""
print("*" * 80)
print("aggregation")
print("-" * 80)
print(q)
print(sqldf(q, locals()))
def pysqldf(q):
"add this to your script if you get tired of calling locals()"
return sqldf(q, globals())
print("*" * 80)
print("calling from a helper function")
print('''def pysqldf(q):)
"add this to your script if you get tired of calling locals()"
return sqldf(q, globals())''')
print("-" * 80)
print(q)
print(pysqldf(q))
q = """
select
a.*
from
iris_df a
inner join
iris_df b
on a.species = b.species
limit 10;
"""
print("*" * 80)
print("joins")
print("-" * 80)
print(q)
print(pysqldf(q))
q = """
select
*
from
iris_df
where
species = 'virginica'
and sepallengthcm > 7.7;
"""
print("*" * 80)
print("where clause")
print("-" * 80)
print(q)
print(pysqldf(q))
iris_df['id'] = range(len(iris_df))
q = """
select
*
from
iris_df
where
id in (select id from iris_df where sepalwidthcm*sepallengthcm > 25);
"""
print("*" * 80)
print("subqueries")
print("-" * 80)
print(q)
print(pysqldf(q))
q = """
SELECT
m.*
, b.births
FROM
meat m
INNER JOIN
births b
on m.date = b.date
ORDER BY
m.date;
"""
print(pysqldf(q).head())