You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
933 lines
27 KiB
933 lines
27 KiB
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"# Winpython Default checker"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import warnings\n",
|
|
"warnings.filterwarnings(\"ignore\", category=DeprecationWarning)\n",
|
|
"warnings.filterwarnings(\"ignore\", category=UserWarning)\n",
|
|
"warnings.filterwarnings(\"ignore\", category=FutureWarning)\n",
|
|
"# warnings.filterwarnings(\"ignore\") # would silence all warnings"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%matplotlib inline"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Compilers: Numba and Cython\n",
|
|
"\n",
|
|
"##### Requirement\n",
|
|
"To get Cython working, Winpython 3.5 users should install \"Microsoft Visual C++ Build Tools 2015\" (visualcppbuildtools_full.exe, a 4 Go installation) at https://beta.visualstudio.com/download-visual-studio-vs/\n",
|
|
"\n",
|
|
"To get Numba working, not-windows10 users may have to install \"Microsoft Visual C++ 2015 Redistributable\" (vc_redist) at <https://beta.visualstudio.com/download-visual-studio-vs/>\n",
|
|
"\n",
|
|
"#### Compiler toolchains"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# checking Numba JIT toolchain\n",
|
|
"import numpy as np\n",
|
|
"image = np.zeros((1024, 1536), dtype = np.uint8)\n",
|
|
"\n",
|
|
"from pylab import imshow, show\n",
|
|
"from timeit import default_timer as timer\n",
|
|
"\n",
|
|
"def create_fractal(min_x, max_x, min_y, max_y, image, iters , mandelx):\n",
|
|
" height = image.shape[0]\n",
|
|
" width = image.shape[1]\n",
|
|
" pixel_size_x = (max_x - min_x) / width\n",
|
|
" pixel_size_y = (max_y - min_y) / height\n",
|
|
" \n",
|
|
" for x in range(width):\n",
|
|
" real = min_x + x * pixel_size_x\n",
|
|
" for y in range(height):\n",
|
|
" imag = min_y + y * pixel_size_y\n",
|
|
" color = mandelx(real, imag, iters)\n",
|
|
" image[y, x] = color"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"##### Numba (a JIT Compiler)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"from numba import autojit\n",
|
|
"\n",
|
|
"@autojit\n",
|
|
"def mandel(x, y, max_iters):\n",
|
|
" c = complex(x, y)\n",
|
|
" z = 0.0j\n",
|
|
" for i in range(max_iters):\n",
|
|
" z = z*z + c\n",
|
|
" if (z.real*z.real + z.imag*z.imag) >= 4:\n",
|
|
" return i\n",
|
|
" return max_iters\n",
|
|
"\n",
|
|
"start = timer()\n",
|
|
"create_fractal(-2.0, 1.0, -1.0, 1.0, image, 20 , mandel) \n",
|
|
"dt = timer() - start\n",
|
|
"\n",
|
|
"print (\"Mandelbrot created by numba in %f s\" % dt)\n",
|
|
"imshow(image)\n",
|
|
"show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"##### Cython (a compiler for writing C extensions for the Python language)\n",
|
|
"WinPython 3.5 and 3.6 users may not have mingwpy available, and so need \"VisualStudio C++ Community Edition 2015\" https://www.visualstudio.com/downloads/download-visual-studio-vs#d-visual-c "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Cython + Mingwpy compiler toolchain test\n",
|
|
"%load_ext Cython"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%%cython -a\n",
|
|
"# with %%cython -a , full C-speed lines are shown in white, slowest python-speed lines are shown in dark yellow lines \n",
|
|
"# ==> put your cython rewrite effort on dark yellow lines\n",
|
|
"def mandel_cython(x, y, max_iters):\n",
|
|
" cdef int i \n",
|
|
" cdef double cx, cy , zx, zy\n",
|
|
" cx , cy = x, y \n",
|
|
" zx , zy =0 ,0 \n",
|
|
" for i in range(max_iters):\n",
|
|
" zx , zy = zx*zx - zy*zy + cx , zx*zy*2 + cy\n",
|
|
" if (zx*zx + zy*zy) >= 4:\n",
|
|
" return i\n",
|
|
" return max_iters"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"start = timer()\n",
|
|
"create_fractal(-2.0, 1.0, -1.0, 1.0, image, 20 , mandel_cython) \n",
|
|
"dt = timer() - start\n",
|
|
"\n",
|
|
"print (\"Mandelbrot created by cython in %f s\" % dt)\n",
|
|
"imshow(image)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Graphics: Matplotlib, Pandas, Seaborn, Holoviews, Bokeh, bqplot, ipyleaflet, plotnine"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Matplotlib\n",
|
|
"# for more examples, see: http://matplotlib.org/gallery.html\n",
|
|
"from mpl_toolkits.mplot3d import axes3d\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"from matplotlib import cm\n",
|
|
"\n",
|
|
"fig = plt.figure()\n",
|
|
"ax = fig.gca(projection='3d')\n",
|
|
"X, Y, Z = axes3d.get_test_data(0.05)\n",
|
|
"ax.plot_surface(X, Y, Z, rstride=8, cstride=8, alpha=0.3)\n",
|
|
"cset = ax.contourf(X, Y, Z, zdir='z', offset=-100, cmap=cm.coolwarm)\n",
|
|
"cset = ax.contourf(X, Y, Z, zdir='x', offset=-40, cmap=cm.coolwarm)\n",
|
|
"cset = ax.contourf(X, Y, Z, zdir='y', offset=40, cmap=cm.coolwarm)\n",
|
|
"\n",
|
|
"ax.set_xlabel('X')\n",
|
|
"ax.set_xlim(-40, 40)\n",
|
|
"ax.set_ylabel('Y')\n",
|
|
"ax.set_ylim(-40, 40)\n",
|
|
"ax.set_zlabel('Z')\n",
|
|
"ax.set_zlim(-100, 100)\n",
|
|
"\n",
|
|
"plt.show()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Seaborn\n",
|
|
"# for more examples, see http://stanford.edu/~mwaskom/software/seaborn/examples/index.html\n",
|
|
"import seaborn as sns\n",
|
|
"sns.set()\n",
|
|
"df = sns.load_dataset(\"iris\")\n",
|
|
"sns.pairplot(df, hue=\"species\", height=1.5)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# altair-2.0.0 example\n",
|
|
"import altair as alt\n",
|
|
"\n",
|
|
"# Uncomment/run this line to enable Altair in JupyterLab/nteract:\n",
|
|
"#alt.renderers.enable('default') # api_v2\n",
|
|
"#alt.renderers.enable('notebook') # api_v2,if in Notebook\n",
|
|
"alt.Chart(df).mark_bar().encode(\n",
|
|
" x=alt.X('sepal_length', bin=alt.Bin(maxbins=50)),\n",
|
|
" y='count(*):Q',\n",
|
|
" color='species:N',\n",
|
|
" #column='species',\n",
|
|
").interactive() # api_v1 .configure_cell(width=200)\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# temporary warning removal\n",
|
|
"import warnings\n",
|
|
"import matplotlib as mpl\n",
|
|
"warnings.filterwarnings(\"ignore\", category=mpl.cbook.MatplotlibDeprecationWarning)\n",
|
|
"# Holoviews\n",
|
|
"# for more example, see http://holoviews.org/Tutorials/index.html\n",
|
|
"import numpy as np\n",
|
|
"import holoviews as hv\n",
|
|
"hv.extension('matplotlib')\n",
|
|
"dots = np.linspace(-0.45, 0.45, 11)\n",
|
|
"fractal = hv.Image(image)\n",
|
|
"\n",
|
|
"layouts = {y: (fractal * hv.Points(fractal.sample([(i,y) for i in dots])) +\n",
|
|
" fractal.sample(y=y) )\n",
|
|
" for y in np.linspace(0, 0.45,11)}\n",
|
|
"\n",
|
|
"hv.HoloMap(layouts, kdims=['Y']).collate().cols(2)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Bokeh 0.12.5 \n",
|
|
"import numpy as np\n",
|
|
"from six.moves import zip\n",
|
|
"from bokeh.plotting import figure, show, output_notebook\n",
|
|
"N = 4000\n",
|
|
"x = np.random.random(size=N) * 100\n",
|
|
"y = np.random.random(size=N) * 100\n",
|
|
"radii = np.random.random(size=N) * 1.5\n",
|
|
"colors = [\"#%02x%02x%02x\" % (int(r), int(g), 150) for r, g in zip(50+2*x, 30+2*y)]\n",
|
|
"\n",
|
|
"output_notebook()\n",
|
|
"TOOLS=\"hover,crosshair,pan,wheel_zoom,box_zoom,reset,tap,save,box_select,poly_select,lasso_select\"\n",
|
|
"\n",
|
|
"p = figure(tools=TOOLS)\n",
|
|
"p.scatter(x,y, radius=radii, fill_color=colors, fill_alpha=0.6, line_color=None)\n",
|
|
"show(p)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Datashader (holoviews+Bokeh)\n",
|
|
"import numpy as np\n",
|
|
"import pandas as pd\n",
|
|
"import holoviews as hv\n",
|
|
"import datashader as ds\n",
|
|
"from holoviews.operation.datashader import aggregate, shade, datashade, dynspread\n",
|
|
"from bokeh.models import DatetimeTickFormatter\n",
|
|
"hv.extension('bokeh')\n",
|
|
"\n",
|
|
"def time_series(T = 1, N = 100, mu = 0.1, sigma = 0.1, S0 = 20): \n",
|
|
" \"\"\"Parameterized noisy time series\"\"\"\n",
|
|
" dt = float(T)/N\n",
|
|
" t = np.linspace(0, T, N)\n",
|
|
" W = np.random.standard_normal(size = N) \n",
|
|
" W = np.cumsum(W)*np.sqrt(dt) # standard brownian motion\n",
|
|
" X = (mu-0.5*sigma**2)*t + sigma*W \n",
|
|
" S = S0*np.exp(X) # geometric brownian motion\n",
|
|
" return S\n",
|
|
"\n",
|
|
"def apply_formatter(plot, element):\n",
|
|
" plot.handles['xaxis'].formatter = DatetimeTickFormatter()\n",
|
|
" \n",
|
|
"drange = pd.date_range(start=\"2014-01-01\", end=\"2016-01-01\", freq='1D') # or '1min'\n",
|
|
"dates = drange.values.astype('int64')/10**6 # Convert dates to ints\n",
|
|
"curve = hv.Curve((dates, time_series(N=len(dates), sigma = 1)))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%%opts RGB [finalize_hooks=[apply_formatter] width=800]\n",
|
|
"%%opts Overlay [finalize_hooks=[apply_formatter] width=800] \n",
|
|
"%%opts Scatter [tools=['hover', 'box_select']] (line_color=\"black\" fill_color=\"red\" size=10)\n",
|
|
"\n",
|
|
"from holoviews.operation.timeseries import rolling, rolling_outlier_std\n",
|
|
"smoothed = rolling(curve, rolling_window=50)\n",
|
|
"outliers = rolling_outlier_std(curve, rolling_window=50, sigma=2)\n",
|
|
"datashade(curve, cmap=[\"blue\"]) * dynspread(datashade(smoothed, cmap=[\"red\"]),max_px=1) * outliers"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#bqplot\n",
|
|
"from IPython.display import display\n",
|
|
"from bqplot import (Figure, Map, Mercator, Orthographic, ColorScale, ColorAxis,\n",
|
|
" AlbersUSA, topo_load, Tooltip)\n",
|
|
"def_tt = Tooltip(fields=['id', 'name'])\n",
|
|
"map_mark = Map(scales={'projection': Mercator()}, tooltip=def_tt)\n",
|
|
"map_mark.interactions = {'click': 'select', 'hover': 'tooltip'}\n",
|
|
"fig = Figure(marks=[map_mark], title='Interactions Example')\n",
|
|
"display(fig)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# ipyleaflet (javascript library usage)\n",
|
|
"from ipyleaflet import (\n",
|
|
" Map, Marker, TileLayer, ImageOverlay, Polyline, Polygon,\n",
|
|
" Rectangle, Circle, CircleMarker, GeoJSON, DrawControl\n",
|
|
")\n",
|
|
"from traitlets import link\n",
|
|
"center = [34.6252978589571, -77.34580993652344]\n",
|
|
"m = Map(center=[34.6252978589571, -77.34580993652344], zoom=10)\n",
|
|
"dc = DrawControl()\n",
|
|
"\n",
|
|
"def handle_draw(self, action, geo_json):\n",
|
|
" print(action)\n",
|
|
" print(geo_json)\n",
|
|
"m\n",
|
|
"m"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"dc.on_draw(handle_draw)\n",
|
|
"m.add_control(dc)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# plotnine: giving a taste of ggplot of R langage (formerly we were using ggpy)\n",
|
|
"from plotnine import ggplot, aes, geom_blank, geom_point, stat_smooth, facet_wrap, theme_bw\n",
|
|
"from plotnine.data import mtcars\n",
|
|
"ggplot(mtcars, aes(x='hp', y='wt', color='mpg')) + geom_point() +\\\n",
|
|
"facet_wrap(\"~cyl\") + theme_bw()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Ipython Notebook: Interactivity & other"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"import IPython;IPython.__version__"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Audio Example : https://github.com/ipython/ipywidgets/blob/master/examples/Beat%20Frequencies.ipynb\n",
|
|
"%matplotlib inline\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import numpy as np\n",
|
|
"from ipywidgets import interactive\n",
|
|
"from IPython.display import Audio, display\n",
|
|
"def beat_freq(f1=220.0, f2=224.0):\n",
|
|
" max_time = 3\n",
|
|
" rate = 8000\n",
|
|
" times = np.linspace(0,max_time,rate*max_time)\n",
|
|
" signal = np.sin(2*np.pi*f1*times) + np.sin(2*np.pi*f2*times)\n",
|
|
" print(f1, f2, abs(f1-f2))\n",
|
|
" display(Audio(data=signal, rate=rate))\n",
|
|
" try:\n",
|
|
" plt.plot(signal); #plt.plot(v.result);\n",
|
|
" except:\n",
|
|
" pass\n",
|
|
" return signal\n",
|
|
"v = interactive(beat_freq, f1=(200.0,300.0), f2=(200.0,300.0))\n",
|
|
"display(v)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Networks graph Example : https://github.com/ipython/ipywidgets/blob/master/examples/Exploring%20Graphs.ipynb\n",
|
|
"%matplotlib inline\n",
|
|
"from ipywidgets import interact\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import networkx as nx\n",
|
|
"# wrap a few graph generation functions so they have the same signature\n",
|
|
"\n",
|
|
"def random_lobster(n, m, k, p):\n",
|
|
" return nx.random_lobster(n, p, p / m)\n",
|
|
"\n",
|
|
"def powerlaw_cluster(n, m, k, p):\n",
|
|
" return nx.powerlaw_cluster_graph(n, m, p)\n",
|
|
"\n",
|
|
"def erdos_renyi(n, m, k, p):\n",
|
|
" return nx.erdos_renyi_graph(n, p)\n",
|
|
"\n",
|
|
"def newman_watts_strogatz(n, m, k, p):\n",
|
|
" return nx.newman_watts_strogatz_graph(n, k, p)\n",
|
|
"\n",
|
|
"@interact(n=(2,30), m=(1,10), k=(1,10), p=(0.0, 1.0, 0.001),\n",
|
|
" generator={'lobster': random_lobster,\n",
|
|
" 'power law': powerlaw_cluster,\n",
|
|
" 'Newman-Watts-Strogatz': newman_watts_strogatz,\n",
|
|
" u'Erdős-Rényi': erdos_renyi,\n",
|
|
" })\n",
|
|
"def plot_random_graph(n, m, k, p, generator):\n",
|
|
" g = generator(n, m, k, p)\n",
|
|
" nx.draw(g)\n",
|
|
" plt.title(generator.__name__)\n",
|
|
" plt.show()\n",
|
|
" "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Mathematical: statsmodels, lmfit, "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# checking statsmodels\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"plt.style.use('ggplot')\n",
|
|
"import statsmodels.api as sm\n",
|
|
"data = sm.datasets.anes96.load_pandas()\n",
|
|
"party_ID = np.arange(7)\n",
|
|
"labels = [\"Strong Democrat\", \"Weak Democrat\", \"Independent-Democrat\",\n",
|
|
" \"Independent-Independent\", \"Independent-Republican\",\n",
|
|
" \"Weak Republican\", \"Strong Republican\"]\n",
|
|
"plt.rcParams['figure.subplot.bottom'] = 0.23 # keep labels visible\n",
|
|
"plt.rcParams['figure.figsize'] = (6.0, 4.0) # make plot larger in notebook\n",
|
|
"age = [data.exog['age'][data.endog == id] for id in party_ID]\n",
|
|
"fig = plt.figure()\n",
|
|
"ax = fig.add_subplot(111)\n",
|
|
"plot_opts={'cutoff_val':5, 'cutoff_type':'abs',\n",
|
|
" 'label_fontsize':'small',\n",
|
|
" 'label_rotation':30}\n",
|
|
"sm.graphics.beanplot(age, ax=ax, labels=labels,\n",
|
|
" plot_opts=plot_opts)\n",
|
|
"ax.set_xlabel(\"Party identification of respondent\")\n",
|
|
"ax.set_ylabel(\"Age\")"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# lmfit test (from http://nbviewer.ipython.org/github/lmfit/lmfit-py/blob/master/examples/lmfit-model.ipynb)\n",
|
|
"import numpy as np\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"def decay(t, N, tau):\n",
|
|
" return N*np.exp(-t/tau)\n",
|
|
"t = np.linspace(0, 5, num=1000)\n",
|
|
"data = decay(t, 7, 3) + np.random.randn(*t.shape)\n",
|
|
"\n",
|
|
"from lmfit import Model\n",
|
|
"\n",
|
|
"model = Model(decay, independent_vars=['t'])\n",
|
|
"result = model.fit(data, t=t, N=10, tau=1)\n",
|
|
"plt.plot(t, data) # data\n",
|
|
"plt.plot(t, decay(t=t, **result.values), color='orange', linewidth=5) # best-fit model"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## DataFrames: Pandas, Dask"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"#Pandas \n",
|
|
"import pandas as pd\n",
|
|
"import numpy as np\n",
|
|
"\n",
|
|
"idx = pd.date_range('2000', '2005', freq='d', closed='left')\n",
|
|
"datas = pd.DataFrame({'Color': [ 'green' if x> 1 else 'red' for x in np.random.randn(len(idx))], \n",
|
|
" 'Measure': np.random.randn(len(idx)), 'Year': idx.year},\n",
|
|
" index=idx.date)\n",
|
|
"datas.head()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"### Split / Apply / Combine \n",
|
|
" Split your data into multiple independent groups.\n",
|
|
" Apply some function to each group.\n",
|
|
" Combine your groups back into a single data object.\n"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"datas.query('Measure > 0').groupby(['Color','Year']).size().unstack()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Web Scraping: Beautifulsoup"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# checking Web Scraping: beautifulsoup and requests \n",
|
|
"import requests\n",
|
|
"from bs4 import BeautifulSoup\n",
|
|
"\n",
|
|
"URL = 'http://en.wikipedia.org/wiki/Franklin,_Tennessee'\n",
|
|
"\n",
|
|
"req = requests.get(URL, headers={'User-Agent' : \"Mining the Social Web\"})\n",
|
|
"soup = BeautifulSoup(req.text, \"lxml\")\n",
|
|
"\n",
|
|
"geoTag = soup.find(True, 'geo')\n",
|
|
"\n",
|
|
"if geoTag and len(geoTag) > 1:\n",
|
|
" lat = geoTag.find(True, 'latitude').string\n",
|
|
" lon = geoTag.find(True, 'longitude').string\n",
|
|
" print ('Location is at', lat, lon)\n",
|
|
"elif geoTag and len(geoTag) == 1:\n",
|
|
" (lat, lon) = geoTag.string.split(';')\n",
|
|
" (lat, lon) = (lat.strip(), lon.strip())\n",
|
|
" print ('Location is at', lat, lon)\n",
|
|
"else:\n",
|
|
" print ('No location found')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Operations Research: Pulp"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {
|
|
"scrolled": true
|
|
},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Pulp example : minimizing the weight to carry 99 pennies\n",
|
|
"# (from Philip I Thomas)\n",
|
|
"# see https://www.youtube.com/watch?v=UmMn-N5w-lI#t=995\n",
|
|
"# Import PuLP modeler functions\n",
|
|
"from pulp import *\n",
|
|
"# The prob variable is created to contain the problem data \n",
|
|
"prob = LpProblem(\"99 pennies Problem\",LpMinimize)\n",
|
|
"\n",
|
|
"# Variables represent how many of each coin we want to carry\n",
|
|
"pennies = LpVariable(\"Number of pennies\",0,None,LpInteger)\n",
|
|
"nickels = LpVariable(\"Number of nickels\",0,None,LpInteger)\n",
|
|
"dimes = LpVariable(\"Number of dimes\",0,None,LpInteger)\n",
|
|
"quarters = LpVariable(\"Number of quarters\",0,None,LpInteger)\n",
|
|
"\n",
|
|
"# The objective function is added to 'prob' first\n",
|
|
"\n",
|
|
"# we want to minimize (LpMinimize) this \n",
|
|
"prob += 2.5 * pennies + 5 * nickels + 2.268 * dimes + 5.670 * quarters, \"Total coins Weight\"\n",
|
|
"\n",
|
|
"# We want exactly 99 cents\n",
|
|
"prob += 1 * pennies + 5 * nickels + 10 * dimes + 25 * quarters == 99, \"\"\n",
|
|
"\n",
|
|
"# The problem data is written to an .lp file\n",
|
|
"prob.writeLP(\"99cents.lp\")\n",
|
|
"prob.solve()\n",
|
|
"\n",
|
|
"# print (\"status\",LpStatus[prob.status] )\n",
|
|
"print (\"Minimal Weight to carry exactly 99 pennies is %s grams\" % value(prob.objective))\n",
|
|
"# Each of the variables is printed with it's resolved optimum value\n",
|
|
"for v in prob.variables():\n",
|
|
" print (v.name, \"=\", v.varValue)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Deep Learning: see tutorial-first-neural-network-python-keras"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Symbolic Calculation: sympy"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# checking sympy \n",
|
|
"import sympy\n",
|
|
"a, b =sympy.symbols('a b')\n",
|
|
"e=(a+b)**5\n",
|
|
"e.expand()"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## SQL tools: sqlite, Ipython-sql, sqlite_bro, baresql, db.py"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# checking Ipython-sql, sqlparse, SQLalchemy\n",
|
|
"%load_ext sql"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"%%sql sqlite:///.baresql.db\n",
|
|
"DROP TABLE IF EXISTS writer;\n",
|
|
"CREATE TABLE writer (first_name, last_name, year_of_death);\n",
|
|
"INSERT INTO writer VALUES ('William', 'Shakespeare', 1616);\n",
|
|
"INSERT INTO writer VALUES ('Bertold', 'Brecht', 1956);\n",
|
|
"SELECT * , sqlite_version() as sqlite_version from Writer order by Year_of_death"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# checking baresql\n",
|
|
"from __future__ import print_function, unicode_literals, division # line needed only if Python2.7\n",
|
|
"from baresql import baresql\n",
|
|
"bsql = baresql.baresql(connection=\"sqlite:///.baresql.db\")\n",
|
|
"bsqldf = lambda q: bsql.df(q, dict(globals(),**locals()))\n",
|
|
"\n",
|
|
"users = ['Alexander', 'Billy', 'Charles', 'Danielle', 'Esmeralda', 'Franz', 'Greg']\n",
|
|
"# We use the python 'users' list like a SQL table\n",
|
|
"sql = \"select 'Welcome ' || c0 || ' !' as say_hello, length(c0) as name_length from users$$ where c0 like '%a%' \"\n",
|
|
"bsqldf(sql)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# Transfering Datas to sqlite, doing transformation in sql, going back to Pandas and Matplotlib\n",
|
|
"bsqldf('''\n",
|
|
"select Color, Year, count(*) as size \n",
|
|
"from datas$$ \n",
|
|
"where Measure > 0 \n",
|
|
"group by Color, Year'''\n",
|
|
" ).set_index(['Year', 'Color']).unstack().plot(kind='bar')"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# checking db.py\n",
|
|
"from db import DB\n",
|
|
"db=DB(dbtype=\"sqlite\", filename=\".baresql.db\")\n",
|
|
"db.query(\"select sqlite_version() as sqlite_version ;\") "
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"db.tables"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# checking sqlite_bro: this should lanch a separate non-browser window with sqlite_bro's welcome\n",
|
|
"!cmd start cmd /C sqlite_bro"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# pyodbc \n",
|
|
"import pyodbc\n",
|
|
"\n",
|
|
"# look for pyodbc providers\n",
|
|
"sources = pyodbc.dataSources()\n",
|
|
"dsns = list(sources.keys())\n",
|
|
"sl = [' %s [%s]' % (dsn, sources[dsn]) for dsn in dsns]\n",
|
|
"print(\"pyodbc Providers: (beware 32/64 bit driver and python version must match)\\n\", '\\n'.join(sl))"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# pythonnet\n",
|
|
"import clr\n",
|
|
"clr.AddReference(\"System.Data\")\n",
|
|
"clr.AddReference('System.Data.Common')\n",
|
|
"import System.Data.OleDb as ADONET\n",
|
|
"import System.Data.Odbc as ODBCNET\n",
|
|
"import System.Data.Common as DATACOM\n",
|
|
"\n",
|
|
"table = DATACOM.DbProviderFactories.GetFactoryClasses()\n",
|
|
"print(\"\\n .NET Providers: (beware 32/64 bit driver and python version must match)\")\n",
|
|
"for row in table.Rows:\n",
|
|
" print(\" %s\" % row[table.Columns[0]])\n",
|
|
" print(\" \",[row[column] for column in table.Columns if column != table.Columns[0]])"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Qt libraries Demo\n",
|
|
"\n",
|
|
" \n",
|
|
"#### See [Dedicated Qt Libraries Demo](Qt_libraries_demo.ipynb)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "markdown",
|
|
"metadata": {},
|
|
"source": [
|
|
"## Wrap-up"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"# optional scipy full test (takes up to 10 minutes)\n",
|
|
"#!cmd /C start cmd /k python.exe -c \"import scipy;scipy.test()\""
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"!pip list"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "Python 3",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.6.7rc1"
|
|
},
|
|
"widgets": {
|
|
"state": {
|
|
"056d32c70f644417b86a152d3a2385bd": {
|
|
"views": [
|
|
{
|
|
"cell_index": 14
|
|
}
|
|
]
|
|
},
|
|
"2307e84bf81346d49818eef8862360ca": {
|
|
"views": [
|
|
{
|
|
"cell_index": 22
|
|
}
|
|
]
|
|
},
|
|
"4e7a6f5db8e74905a08d4636afa3b82f": {
|
|
"views": [
|
|
{
|
|
"cell_index": 15
|
|
}
|
|
]
|
|
},
|
|
"e762d7875083491eb2933958cc3331a9": {
|
|
"views": [
|
|
{
|
|
"cell_index": 21
|
|
}
|
|
]
|
|
}
|
|
},
|
|
"version": "1.2.0"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 2
|
|
}
|