{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Installating R on WinPython\n", "\n", "#### This procedure applys for Winpython (Version of December 2015 and after) \n", "### 1 - Downloading R binary" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Le volume dans le lecteur C n'a pas de nom.\n", " Le num‚ro de s‚rie du volume est 98F9-A53D\n", "\n", " R‚pertoire de C:\\WinP\\bd36\\buQt5\\winp64-3.6.x.0\\t\n", "\n", "21/10/2018 12:37 83ÿ351ÿ952 R-3.5.0-win.exe\n", " 1 fichier(s) 83ÿ351ÿ952 octets\n", " 0 R‚p(s) 24ÿ943ÿ108ÿ096 octets libres\n" ] } ], "source": [ "import os\n", "import sys\n", "import io\n", "\n", "# downloading R may takes a few minutes (80Mo)\n", "try:\n", " import urllib.request as urllib2 # Python 3\n", "except:\n", " import urllib2 # Python 2\n", "\n", "# specify R binary and (md5, sha1) hash\n", "# R-3.4.3:\n", "r_url = \"https://cran.r-project.org/bin/windows/base/old/3.5.0/R-3.5.0-win.exe\"\n", "hashes=(\"d3f579b3aacfdf45a008df3320cb3615\",\"87cf0f72dcd91ff12627178e2438cb5a8d8a13c0\")\n", "\n", "# specify target location\n", "# tweak change in recent winpython\n", "tool_base_directory=os.environ[\"WINPYDIR\"]+\"\\\\..\\\\t\\\\\"\n", "if not os.path.isdir(tool_base_directory):\n", " tool_base_directory=os.environ[\"WINPYDIR\"]+\"\\\\..\\\\tools\\\\\"\n", "\n", "\n", "\n", "\n", "r_installer = tool_base_directory+os.path.basename(r_url)\n", "os.environ[\"r_installer\"] = r_installer\n", "\n", "# Download\n", "g = urllib2.urlopen(r_url) \n", "with io.open(r_installer, 'wb') as f:\n", " f.write(g.read())\n", "g.close\n", "g = None\n", "\n", "#checking it's there\n", "!dir %r_installer%" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "\n", "### 2 - checking and Installing R binary in the right place" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " MD5 SHA-1 \n", "-------------------------------- ----------------------------------------\n", "d3f579b3aacfdf45a008df3320cb3615 87cf0f72dcd91ff12627178e2438cb5a8d8a13c0 C:\\WinP\\bd36\\buQt5\\winp64-3.6.x.0\\python-3.6.7.amd64\\..\\t\\R-3.5.0-win.exe\n", "looks good!\n" ] } ], "source": [ "# checking it's the official R\n", "import hashlib\n", "def give_hash(of_file, with_this):\n", " with io.open(r_installer, 'rb') as f:\n", " return with_this(f.read()).hexdigest() \n", "print (\" \"*12+\"MD5\"+\" \"*(32-12-3)+\" \"+\" \"*15+\"SHA-1\"+\" \"*(40-15-5)+\"\\n\"+\"-\"*32+\" \"+\"-\"*40)\n", "print (\"%s %s %s\" % (give_hash(r_installer, hashlib.md5) , give_hash(r_installer, hashlib.sha1),r_installer))\n", "if give_hash(r_installer, hashlib.md5) == hashes[0] and give_hash(r_installer, hashlib.sha1) == hashes[1]:\n", " print(\"looks good!\")\n", "else:\n", " print(\"problem ! please check\")\n", " assert give_hash(r_installer, hashlib.md5) == hashes[0]\n", " assert give_hash(r_installer, hashlib.sha1) == hashes[1]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# preparing Dos variables\n", "os.environ[\"R_HOME\"] = tool_base_directory+ \"R\\\\\" \n", "os.environ[\"R_HOMEbin\"]=os.environ[\"R_HOME\"] + \"bin\" \n", "\n", "# for installation we need this\n", "os.environ[\"tmp_Rbase\"]=os.path.join(os.path.split(os.environ[\"WINPYDIR\"])[0] , 't','R' ) \n", "if 'amd64' in sys.version.lower():\n", " r_comp ='/COMPONENTS=\"main,x64,translations'\n", "else:\n", " r_comp ='/COMPONENTS=\"main,i386,translations'\n", "os.environ[\"tmp_R_comp\"]=r_comp\n" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "# let's install it, if hashes do match\n", "assert give_hash(r_installer, hashlib.md5) == hashes[0]\n", "assert give_hash(r_installer, hashlib.sha1) == hashes[1]\n", "# If you are \"USB life style\", or multi-winpython\n", "# ==> CLICK the OPTION \"Don't create a StartMenuFolder' <== (when it will show up)\n", "\n", "!start cmd /C %r_installer% /DIR=%tmp_Rbase% %tmp_R_comp%" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## During Installation (if you wan't to move the R installation after)\n", "\n", "Choose non default option \"Yes (customized startup\"\n", "\n", "then after 3 screens, Select \"Don't create a Start Menu Folder\"\n", "\n", "Un-select \"Create a desktop icon\"\n", "\n", "Un-select \"Save version number in registery\"\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<img src=\"https://raw.githubusercontent.com/stonebig/winpython_afterdoc/master/examples/images/r_setup_unclick_shortcut.GIF\">" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3 - create a R_launcher and install irkernel" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "import os\n", "import sys\n", "import io\n", "# let's create a R launcher \n", "r_launcher = r\"\"\"\n", "@echo off\n", "call %~dp0env.bat\n", "rscript %*\n", "\"\"\"\n", "r_launcher_bat = os.environ[\"WINPYDIR\"]+\"\\\\..\\\\scripts\\\\R_launcher.bat\"\n", "\n", "# let's create a R init script\n", "# in manual command line, you can use repos = c('http://irkernel.github.io/', getOption('repos'))\n", "r_initialization = r\"\"\"\n", "install.packages(c('repr', 'IRdisplay', 'stringr', 'crayon', 'pbdZMQ', 'devtools'), repos = c('http://cran.rstudio.com/', 'http://cran.rstudio.com/'))\n", "devtools::install_github('IRkernel/IRkernel')\n", "library('pbdZMQ')\n", "library('repr')\n", "library('IRkernel')\n", "library('IRdisplay')\n", "library('crayon')\n", "library('stringr')\n", "IRkernel::installspec()\n", "\"\"\"\n", "r_initialization_r = os.path.normpath(os.environ[\"WINPYDIR\"]+\"\\\\..\\\\scripts\\\\R_initialization.r\")\n", "\n", "\n", "for i in [(r_launcher,r_launcher_bat), (r_initialization, r_initialization_r)]:\n", " with io.open(i[1], 'w', encoding = sys.getdefaultencoding() ) as f:\n", " for line in i[0].splitlines():\n", " f.write('%s\\n' % line )\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "!start cmd /C %WINPYDIR%\\..\\scripts\\R_launcher.bat --no-restore --no-save C:\\WinP\\bd36\\buQt5\\winp64-3.6.x.0\\scripts\\R_initialization.r\n" ] } ], "source": [ "#check what we are going to do \n", "print (\"!start cmd /C %WINPYDIR%\\\\..\\\\scripts\\\\R_launcher.bat --no-restore --no-save \" + r_initialization_r)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# Launch Rkernel setup\n", "os.environ[\"r_initialization_r\"] = r_initialization_r\n", "!start cmd /C %WINPYDIR%\\\\..\\\\scripts\\\\R_launcher.bat --no-restore --no-save %r_initialization_r% " ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "# make RKernel a movable installation with the rest of WinPython \n", "from winpython import utils\n", "base_winpython = os.path.dirname(os.path.normpath(os.environ[\"WINPYDIR\"]))\n", "rkernel_json=(base_winpython+\"\\\\settings\\\\kernels\\\\ir\\\\kernel.json\")\n", "\n", "# so we get \"argv\": [\"{prefix}/../tools/R/bin/x64/R\"\n", "utils.patch_sourcefile(rkernel_json, base_winpython.replace(\"\\\\\",\"/\"), r'{prefix}/..', silent_mode=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 4- Install a R package via a IPython Kernel" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%load_ext rpy2.ipython\n", "\n", "#vitals: 'dplyr', 'R.utils', 'nycflights13'\n", "# installation takes 2 minutes\n", "%R install.packages(c('dplyr','R.utils', 'nycflights13'), repos='http://cran.rstudio.com/') " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 5- Small demo via R magic" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "C:/WinP/bd36/buQt5/winp64-3.6.x.0/t/R\n" ] } ], "source": [ "!echo %R_HOME%" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The rpy2.ipython extension is already loaded. To reload it, use:\n", " %reload_ext rpy2.ipython\n" ] } ], "source": [ "%load_ext rpy2.ipython" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# avoid some pandas deprecation warning\n", "import warnings\n", "warnings.filterwarnings(\"ignore\", category=DeprecationWarning)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "%%R\n", "library('dplyr')\n", "library('nycflights13') \n", "write.csv(flights, \"flights.csv\")" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>year</th>\n", " <th>month</th>\n", " <th>day</th>\n", " <th>dep_time</th>\n", " <th>sched_dep_time</th>\n", " <th>dep_delay</th>\n", " <th>arr_time</th>\n", " <th>sched_arr_time</th>\n", " <th>arr_delay</th>\n", " <th>carrier</th>\n", " <th>flight</th>\n", " <th>tailnum</th>\n", " <th>origin</th>\n", " <th>dest</th>\n", " <th>air_time</th>\n", " <th>distance</th>\n", " <th>hour</th>\n", " <th>minute</th>\n", " <th>time_hour</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>2013</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>517</td>\n", " <td>515</td>\n", " <td>2.0</td>\n", " <td>830</td>\n", " <td>819</td>\n", " <td>11.0</td>\n", " <td>UA</td>\n", " <td>1545</td>\n", " <td>N14228</td>\n", " <td>EWR</td>\n", " <td>IAH</td>\n", " <td>227.0</td>\n", " <td>1400.0</td>\n", " <td>5.0</td>\n", " <td>15.0</td>\n", " <td>2013-01-01 11:00:00-05:00</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2013</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>533</td>\n", " <td>529</td>\n", " <td>4.0</td>\n", " <td>850</td>\n", " <td>830</td>\n", " <td>20.0</td>\n", " <td>UA</td>\n", " <td>1714</td>\n", " <td>N24211</td>\n", " <td>LGA</td>\n", " <td>IAH</td>\n", " <td>227.0</td>\n", " <td>1416.0</td>\n", " <td>5.0</td>\n", " <td>29.0</td>\n", " <td>2013-01-01 11:00:00-05:00</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>2013</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>542</td>\n", " <td>540</td>\n", " <td>2.0</td>\n", " <td>923</td>\n", " <td>850</td>\n", " <td>33.0</td>\n", " <td>AA</td>\n", " <td>1141</td>\n", " <td>N619AA</td>\n", " <td>JFK</td>\n", " <td>MIA</td>\n", " <td>160.0</td>\n", " <td>1089.0</td>\n", " <td>5.0</td>\n", " <td>40.0</td>\n", " <td>2013-01-01 11:00:00-05:00</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>2013</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>544</td>\n", " <td>545</td>\n", " <td>-1.0</td>\n", " <td>1004</td>\n", " <td>1022</td>\n", " <td>-18.0</td>\n", " <td>B6</td>\n", " <td>725</td>\n", " <td>N804JB</td>\n", " <td>JFK</td>\n", " <td>BQN</td>\n", " <td>183.0</td>\n", " <td>1576.0</td>\n", " <td>5.0</td>\n", " <td>45.0</td>\n", " <td>2013-01-01 11:00:00-05:00</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>2013</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>554</td>\n", " <td>600</td>\n", " <td>-6.0</td>\n", " <td>812</td>\n", " <td>837</td>\n", " <td>-25.0</td>\n", " <td>DL</td>\n", " <td>461</td>\n", " <td>N668DN</td>\n", " <td>LGA</td>\n", " <td>ATL</td>\n", " <td>116.0</td>\n", " <td>762.0</td>\n", " <td>6.0</td>\n", " <td>0.0</td>\n", " <td>2013-01-01 12:00:00-05:00</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>2013</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>554</td>\n", " <td>558</td>\n", " <td>-4.0</td>\n", " <td>740</td>\n", " <td>728</td>\n", " <td>12.0</td>\n", " <td>UA</td>\n", " <td>1696</td>\n", " <td>N39463</td>\n", " <td>EWR</td>\n", " <td>ORD</td>\n", " <td>150.0</td>\n", " <td>719.0</td>\n", " <td>5.0</td>\n", " <td>58.0</td>\n", " <td>2013-01-01 11:00:00-05:00</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " year month day dep_time sched_dep_time dep_delay arr_time \\\n", "0 2013 1 1 517 515 2.0 830 \n", "1 2013 1 1 533 529 4.0 850 \n", "2 2013 1 1 542 540 2.0 923 \n", "3 2013 1 1 544 545 -1.0 1004 \n", "4 2013 1 1 554 600 -6.0 812 \n", "5 2013 1 1 554 558 -4.0 740 \n", "\n", " sched_arr_time arr_delay carrier flight tailnum origin dest air_time \\\n", "0 819 11.0 UA 1545 N14228 EWR IAH 227.0 \n", "1 830 20.0 UA 1714 N24211 LGA IAH 227.0 \n", "2 850 33.0 AA 1141 N619AA JFK MIA 160.0 \n", "3 1022 -18.0 B6 725 N804JB JFK BQN 183.0 \n", "4 837 -25.0 DL 461 N668DN LGA ATL 116.0 \n", "5 728 12.0 UA 1696 N39463 EWR ORD 150.0 \n", "\n", " distance hour minute time_hour \n", "0 1400.0 5.0 15.0 2013-01-01 11:00:00-05:00 \n", "1 1416.0 5.0 29.0 2013-01-01 11:00:00-05:00 \n", "2 1089.0 5.0 40.0 2013-01-01 11:00:00-05:00 \n", "3 1576.0 5.0 45.0 2013-01-01 11:00:00-05:00 \n", "4 762.0 6.0 0.0 2013-01-01 12:00:00-05:00 \n", "5 719.0 5.0 58.0 2013-01-01 11:00:00-05:00 " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%R head(flights) " ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>faa</th>\n", " <th>name</th>\n", " <th>lat</th>\n", " <th>lon</th>\n", " <th>alt</th>\n", " <th>tz</th>\n", " <th>dst</th>\n", " <th>tzone</th>\n", " <th>dest</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>1</th>\n", " <td>ABQ</td>\n", " <td>Albuquerque International Sunport</td>\n", " <td>35.040222</td>\n", " <td>-106.609194</td>\n", " <td>5355</td>\n", " <td>-7.0</td>\n", " <td>A</td>\n", " <td>America/Denver</td>\n", " <td>ABQ</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>ACK</td>\n", " <td>Nantucket Mem</td>\n", " <td>41.253053</td>\n", " <td>-70.060181</td>\n", " <td>48</td>\n", " <td>-5.0</td>\n", " <td>A</td>\n", " <td>America/New_York</td>\n", " <td>ACK</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>ALB</td>\n", " <td>Albany Intl</td>\n", " <td>42.748267</td>\n", " <td>-73.801692</td>\n", " <td>285</td>\n", " <td>-5.0</td>\n", " <td>A</td>\n", " <td>America/New_York</td>\n", " <td>ALB</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>ANC</td>\n", " <td>Ted Stevens Anchorage Intl</td>\n", " <td>61.174361</td>\n", " <td>-149.996361</td>\n", " <td>152</td>\n", " <td>-9.0</td>\n", " <td>A</td>\n", " <td>America/Anchorage</td>\n", " <td>ANC</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>ATL</td>\n", " <td>Hartsfield Jackson Atlanta Intl</td>\n", " <td>33.636719</td>\n", " <td>-84.428067</td>\n", " <td>1026</td>\n", " <td>-5.0</td>\n", " <td>A</td>\n", " <td>America/New_York</td>\n", " <td>ATL</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>AUS</td>\n", " <td>Austin Bergstrom Intl</td>\n", " <td>30.194528</td>\n", " <td>-97.669889</td>\n", " <td>542</td>\n", " <td>-6.0</td>\n", " <td>A</td>\n", " <td>America/Chicago</td>\n", " <td>AUS</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " faa name lat lon alt tz \\\n", "1 ABQ Albuquerque International Sunport 35.040222 -106.609194 5355 -7.0 \n", "2 ACK Nantucket Mem 41.253053 -70.060181 48 -5.0 \n", "3 ALB Albany Intl 42.748267 -73.801692 285 -5.0 \n", "4 ANC Ted Stevens Anchorage Intl 61.174361 -149.996361 152 -9.0 \n", "5 ATL Hartsfield Jackson Atlanta Intl 33.636719 -84.428067 1026 -5.0 \n", "6 AUS Austin Bergstrom Intl 30.194528 -97.669889 542 -6.0 \n", "\n", " dst tzone dest \n", "1 A America/Denver ABQ \n", "2 A America/New_York ACK \n", "3 A America/New_York ALB \n", "4 A America/Anchorage ANC \n", "5 A America/New_York ATL \n", "6 A America/Chicago AUS " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%R airports %>% mutate(dest = faa) %>% semi_join(flights) %>% head" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 6 - Installing the very best of R pakages (optional, you will start to get a really big directory)\n" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "# essentials: 'tidyr', 'shiny', 'ggplot2', 'caret' , 'nnet' \n", "# remaining of Hadley Wickahm \"stack\" (https://github.com/rstudio)\n", "%R install.packages(c('tidyr', 'ggplot2', 'shiny','caret' , 'nnet'), repos='https://cran.rstudio.com/') \n", "%R install.packages(c('knitr', 'purrr', 'readr', 'readxl'), repos='https://cran.rstudio.com/')\n", "%R install.packages(c('rvest', 'lubridate', 'ggvis', 'readr','base64enc'), repos='https://cran.rstudio.com/')\n", "\n", "# TRAINING = online training book http://r4ds.had.co.nz/ (or https://github.com/hadley/r4ds)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 7 - Relaunch Jupyter Notebook to get a R kernel option\n", "launch a new notebook of \"R\" type, and type in it:\n", " \n", "library('dplyr')\n", "\n", "library('nycflights13') \n", "\n", "head(flights)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 9 - To Un-install / Re-install R (or other trouble-shooting)\n", "\n", "- launch winpython**\\t\\R\\unins000.exe (was formerly winpython**\\tools\\R\\unins000.exe)\n", "\n", "- delete the directory winpython**\\t\\R (was formerly winpython**\\tools\\R)\n", "\n", "- re-install\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.7" } }, "nbformat": 4, "nbformat_minor": 2 }