From 2689ecbca70730bc7bb0866646db685c11df1fa2 Mon Sep 17 00:00:00 2001 From: Sumit Khanna Date: Sun, 8 Feb 2015 01:34:10 +0530 Subject: [PATCH 1/2] update it to write packages to a text file Codecs is needed for reading/writing unicode chars(range 128+) too. --- scripts/pull_R_packages.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/pull_R_packages.py b/scripts/pull_R_packages.py index ff1a957..d8aa5e0 100755 --- a/scripts/pull_R_packages.py +++ b/scripts/pull_R_packages.py @@ -7,10 +7,11 @@ from pyquery import PyQuery as pq import urllib +import codecs - +text_file = codecs.open("Packages.txt",encoding='utf-8',mode="w") d = pq(url='http://cran.r-project.org/web/views/MachineLearning.html',opener=lambda url, **kw: urllib.urlopen(url).read()) -index = 0 + for e in d("li").items(): package_name = e("a").html() package_link = e("a")[0].attrib['href'] @@ -18,6 +19,7 @@ for e in d("li").items(): package_link = package_link.replace("..",'http://cran.r-project.org/web') dd = pq(url=package_link,opener=lambda url, **kw: urllib.urlopen(url).read()) package_description = dd("h2").html() - print "* [%s](%s) - %s" % (package_name,package_link,package_description) + text_file.write(" [%s](%s) - %s \n" % (package_name,package_link,package_description)) + # print "* [%s](%s) - %s" % (package_name,package_link,package_description) - index += 1 \ No newline at end of file + index += 1 From be6dfae41c897003b74962682ce1d27896fe1db3 Mon Sep 17 00:00:00 2001 From: Sumit Khanna Date: Sun, 8 Feb 2015 01:35:17 +0530 Subject: [PATCH 2/2] Create requirements.txt pip installs all these packages enlisted in here --- scripts/requirements.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 scripts/requirements.txt diff --git a/scripts/requirements.txt b/scripts/requirements.txt new file mode 100644 index 0000000..d2f7dbd --- /dev/null +++ b/scripts/requirements.txt @@ -0,0 +1,3 @@ +pyquery +urllib3 +codecs