{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Pandas demo from Jake Reback\n", "see https://github.com/jreback/PyDataNYC2015 or http://pandas.pydata.org/pandas-docs/version/0.17.1/style.html#Table-Styles" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "pd.options.display.max_rows=12\n", "pd.options.display.width=80\n", "\n", "# read a csv\n", "df = pd.read_csv('~/seaborn-data/iris.csv',index_col=False)\n", "# adjust column names\n", "df.columns = df.columns.str.replace('\\s+','_').str.lower()\n", "# in pandas 0.17.1 and more : sample and style ... \n", "def color_negative_red(val):\n", " \"\"\"\n", " Takes a scalar and returns a string with\n", " the css property `'color: red'` for negative\n", " strings, black otherwise.\n", " \"\"\"\n", " color = 'red' if (isinstance(val, (int, float)) and val < 3) else 'black'\n", " return 'color: %s' % color\n", "\n", "def highlight_max(s):\n", " '''\n", " highlight the maximum in a Series\n", " '''\n", " is_max = s == s.max()\n", " return ['background-color: yellow' if v else '' for v in is_max]\n", "\n", "cm = sns.light_palette(\"green\", as_cmap=True)\n", "\n", "(df\n", " .sample (n=7)\n", " .style\n", " .applymap(color_negative_red, subset=pd.IndexSlice[['sepal_width', 'petal_width']])\n", " .bar(subset=['sepal_length', 'petal_length'], color='#7F7FFF')\n", " .background_gradient(subset=['sepal_width', 'petal_width'], cmap=cm)\n", " .apply(highlight_max)\n", " .highlight_null(null_color='red')\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# assign = define new temporary columns (like 'mutate' in R language)\n", "(df\n", " .query('sepal_length > 5')\n", " .assign(sepal_ratio = lambda x: x.sepal_width / x.sepal_length,\n", " petal_ratio = lambda x: x.petal_width / x.petal_length)\n", " .plot\n", " .scatter(x='sepal_ratio', y='petal_ratio', figsize=(8,4))\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# pipe = like '%>%' in R language\n", "(df\n", " .query('sepal_length > 5')\n", " .assign(sepal_ratio = lambda x: x.sepal_width / x.sepal_length,\n", " petal_ratio = lambda x: x.petal_width / x.petal_length)\n", " .pipe(sns.pairplot, hue='species', height=1.5)\n", ")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Pandas interactive\n", "import pandas as pd\n", "import numpy as np\n", "\n", "# create a df with random datas\n", "np.random.seed(24)\n", "df = pd.DataFrame({'A': np.linspace(1, 10, 10)})\n", "df = pd.concat([df, pd.DataFrame(np.random.randn(10, 4), columns=list('BCDE'))],\n", " axis=1)\n", "df.iloc[0, 2] = np.nan\n", "\n", "# interactive\n", "#from IPython.html import widgets\n", "from ipywidgets import widgets\n", "@widgets.interact\n", "def f(h_neg=(0, 359, 1), h_pos=(0, 359), s=(0., 99.9), l=(0., 99.9)):\n", " return (df\n", " .style\n", " .background_gradient(\n", " cmap=sns.palettes.diverging_palette(\n", " h_neg=h_neg, h_pos=h_pos, s=s, l=l, as_cmap=True)\n", " ).highlight_null()\n", " )" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "from IPython.display import HTML\n", "\n", "def hover(hover_color=\"#ff0f99\"):\n", " return dict(selector=\"tr:hover\",\n", " props=[(\"background-color\", \"%s\" % hover_color)])\n", "\n", "styles = [\n", " hover(),\n", " dict(selector=\"th\", props=[(\"font-size\", \"150%\"),\n", " (\"text-align\", \"center\")]),\n", " dict(selector=\"caption\", props=[(\"caption-side\", \"bottom\")])\n", "]\n", "html = (df.style.set_table_styles(styles)\n", " .set_caption(\"Hover to highlight.\"))\n", "html" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Usefull links\n", "\n", "### Beginners Training Video: [\"Brandon Rhodes - Pandas From The Ground Up - PyCon 2015 \"](https://www.youtube.com/watch?v=5JnMutdy6Fw)\n", "\n", "### Pandas [API reference](http://pandas.pydata.org/pandas-docs/stable/api.html)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.2" } }, "nbformat": 4, "nbformat_minor": 2 }