You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
184 lines
5.4 KiB
184 lines
5.4 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Pandas demo from Jake Reback\n",
|
|
"see https://github.com/jreback/PyDataNYC2015 or http://pandas.pydata.org/pandas-docs/version/0.17.1/style.html#Table-Styles"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%matplotlib inline\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"import seaborn as sns\n",
|
|
"pd.options.display.max_rows=12\n",
|
|
"pd.options.display.width=80\n",
|
|
"\n",
|
|
"# read a csv\n",
|
|
"df = pd.read_csv('~/seaborn-data/iris.csv',index_col=False)\n",
|
|
"# adjust column names\n",
|
|
"df.columns = df.columns.str.replace('\\s+','_').str.lower()\n",
|
|
"# in pandas 0.17.1 and more : sample and style ... \n",
|
|
"def color_negative_red(val):\n",
|
|
" \"\"\"\n",
|
|
" Takes a scalar and returns a string with\n",
|
|
" the css property `'color: red'` for negative\n",
|
|
" strings, black otherwise.\n",
|
|
" \"\"\"\n",
|
|
" color = 'red' if (isinstance(val, (int, float)) and val < 3) else 'black'\n",
|
|
" return 'color: %s' % color\n",
|
|
"\n",
|
|
"def highlight_max(s):\n",
|
|
" '''\n",
|
|
" highlight the maximum in a Series\n",
|
|
" '''\n",
|
|
" is_max = s == s.max()\n",
|
|
" return ['background-color: yellow' if v else '' for v in is_max]\n",
|
|
"\n",
|
|
"cm = sns.light_palette(\"green\", as_cmap=True)\n",
|
|
"\n",
|
|
"(df\n",
|
|
" .sample (n=7)\n",
|
|
" .style\n",
|
|
" .applymap(color_negative_red, subset=pd.IndexSlice[['sepal_width', 'petal_width']])\n",
|
|
" .bar(subset=['sepal_length', 'petal_length'], color='#7F7FFF')\n",
|
|
" .background_gradient(subset=['sepal_width', 'petal_width'], cmap=cm)\n",
|
|
" .apply(highlight_max)\n",
|
|
" .highlight_null(null_color='red')\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# assign = define new temporary columns (like 'mutate' in R language)\n",
|
|
"(df\n",
|
|
" .query('sepal_length > 5')\n",
|
|
" .assign(sepal_ratio = lambda x: x.sepal_width / x.sepal_length,\n",
|
|
" petal_ratio = lambda x: x.petal_width / x.petal_length)\n",
|
|
" .plot\n",
|
|
" .scatter(x='sepal_ratio', y='petal_ratio', figsize=(8,4))\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# pipe = like '%>%' in R language\n",
|
|
"(df\n",
|
|
" .query('sepal_length > 5')\n",
|
|
" .assign(sepal_ratio = lambda x: x.sepal_width / x.sepal_length,\n",
|
|
" petal_ratio = lambda x: x.petal_width / x.petal_length)\n",
|
|
" .pipe(sns.pairplot, hue='species', height=1.5)\n",
|
|
")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Pandas interactive\n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"# create a df with random datas\n",
|
|
"np.random.seed(24)\n",
|
|
"df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n",
|
|
"df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n",
|
|
" axis=1)\n",
|
|
"df.iloc[0, 2] = np.nan\n",
|
|
"\n",
|
|
"# interactive\n",
|
|
"#from IPython.html import widgets\n",
|
|
"from ipywidgets import widgets\n",
|
|
"@widgets.interact\n",
|
|
"def f(h_neg=(0, 359, 1), h_pos=(0, 359), s=(0., 99.9), l=(0., 99.9)):\n",
|
|
" return (df\n",
|
|
" .style\n",
|
|
" .background_gradient(\n",
|
|
" cmap=sns.palettes.diverging_palette(\n",
|
|
" h_neg=h_neg, h_pos=h_pos, s=s, l=l, as_cmap=True)\n",
|
|
" ).highlight_null()\n",
|
|
" )"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from IPython.display import HTML\n",
|
|
"\n",
|
|
"def hover(hover_color=\"#ff0f99\"):\n",
|
|
" return dict(selector=\"tr:hover\",\n",
|
|
" props=[(\"background-color\", \"%s\" % hover_color)])\n",
|
|
"\n",
|
|
"styles = [\n",
|
|
" hover(),\n",
|
|
" dict(selector=\"th\", props=[(\"font-size\", \"150%\"),\n",
|
|
" (\"text-align\", \"center\")]),\n",
|
|
" dict(selector=\"caption\", props=[(\"caption-side\", \"bottom\")])\n",
|
|
"]\n",
|
|
"html = (df.style.set_table_styles(styles)\n",
|
|
" .set_caption(\"Hover to highlight.\"))\n",
|
|
"html"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Usefull links\n",
|
|
"\n",
|
|
"### Beginners Training Video: [\"Brandon Rhodes - Pandas From The Ground Up - PyCon 2015 \"](https://www.youtube.com/watch?v=5JnMutdy6Fw)\n",
|
|
"\n",
|
|
"### Pandas [API reference](http://pandas.pydata.org/pandas-docs/stable/api.html)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.2"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|