From 725acf32bfa36a16045761d568cdb2670c9993f7 Mon Sep 17 00:00:00 2001 From: Maria Antonietta Di Girolamo Date: Tue, 20 Apr 2021 14:16:32 +0200 Subject: [PATCH 01/30] new web-crawler --- web-crawler/.pydevproject | 5 + web-crawler/CrawlerDatabase.py | 146 +++ web-crawler/Integrator.py | 446 ++++++++ web-crawler/LICENSE | 158 +++ web-crawler/Metadata.py | 111 ++ web-crawler/Notifier.py | 121 +++ web-crawler/NotifierThread.py | 27 + web-crawler/Orchestrator.py | 100 ++ web-crawler/README | 11 + web-crawler/RepositoryCrawler.py | 268 +++++ web-crawler/RepositoryCrawlerThread.py | 37 + web-crawler/TestApacheRepositoryCrawler.py | 82 ++ web-crawler/Utils.py | 390 +++++++ web-crawler/apache_data_fetcher.py | 59 ++ web-crawler/api/README.rst | 4 + web-crawler/api/api.egg-info/PKG-INFO | 19 + web-crawler/api/api.egg-info/SOURCES.txt | 15 + .../api/api.egg-info/dependency_links.txt | 1 + web-crawler/api/api.egg-info/entry_points.txt | 3 + web-crawler/api/api.egg-info/not-zip-safe | 1 + .../api/api.egg-info/paster_plugins.txt | 1 + web-crawler/api/api.egg-info/requires.txt | 3 + web-crawler/api/api.egg-info/top_level.txt | 1 + web-crawler/api/api.ini | 46 + web-crawler/api/api/__init__.py | 10 + web-crawler/api/api/views.py | 353 +++++++ web-crawler/api/setup.py | 35 + web-crawler/codeplex_data_fetcher.py | 44 + web-crawler/config | 70 ++ web-crawler/database/alter_rc_gh_archive.sql | 27 + .../database/alter_rc_gh_archive_release.sql | 38 + web-crawler/database/crawlerModel.mwb | Bin 0 -> 46296 bytes web-crawler/database/createCrawlerDB.sql | 951 ++++++++++++++++++ web-crawler/database/flossmole_apache.sql | 151 +++ web-crawler/database/flossmole_eclipse.sql | 151 +++ .../database/populate_flossmole_apache.sql | 54 + .../database/populate_flossmole_eclipse.sql | 54 + web-crawler/doap_project.py | 785 +++++++++++++++ web-crawler/dw_batch.py | 99 ++ web-crawler/eclipse_data_fetcher.py | 236 +++++ web-crawler/generic_data_fetcher.py | 37 + web-crawler/github_data_fetcher.py | 669 ++++++++++++ web-crawler/jquery_plugin_data_fetcher.py | 342 +++++++ web-crawler/lib/README.txt | 2 + .../lib/doapfiend/doapfiend-0.3.3/.pylintrc | 309 ++++++ .../lib/doapfiend/doapfiend-0.3.3/AUTHORS | 8 + web-crawler/lib/doapfiend/doapfiend-0.3.3/FAQ | 9 + .../lib/doapfiend/doapfiend-0.3.3/INSTALL | 48 + .../lib/doapfiend/doapfiend-0.3.3/LICENSE | 14 + .../lib/doapfiend/doapfiend-0.3.3/MANIFEST.in | 5 + .../lib/doapfiend/doapfiend-0.3.3/NEWS | 7 + .../lib/doapfiend/doapfiend-0.3.3/PKG-INFO | 121 +++ .../lib/doapfiend/doapfiend-0.3.3/README | 103 ++ .../lib/doapfiend/doapfiend-0.3.3/THANKS | 2 + .../lib/doapfiend/doapfiend-0.3.3/TODO | 6 + .../lib/doapfiend/doapfiend-0.3.3/build.vel | 40 + .../doapfiend.egg-info/PKG-INFO | 122 +++ .../doapfiend.egg-info/SOURCES.txt | 166 +++ .../doapfiend.egg-info/dependency_links.txt | 1 + .../doapfiend.egg-info/entry_points.txt | 3 + .../doapfiend.egg-info/not-zip-safe | 1 + .../doapfiend.egg-info/requires.txt | 2 + .../doapfiend.egg-info/top_level.txt | 1 + .../doapfiend-0.3.3/doapfiend/__init__.py | 36 + .../doapfiend-0.3.3/doapfiend/cli.py | 242 +++++ .../doapfiend-0.3.3/doapfiend/doap2html.xsl | 186 ++++ .../doapfiend-0.3.3/doapfiend/doaplib.py | 339 +++++++ .../doapfiend-0.3.3/doapfiend/lexers.py | 148 +++ .../doapfiend-0.3.3/doapfiend/model.py | 83 ++ .../doapfiend/plugins/__init__.py | 144 +++ .../doapfiend-0.3.3/doapfiend/plugins/base.py | 78 ++ .../doapfiend/plugins/fields.py | 215 ++++ .../doapfiend/plugins/freshmeat.py | 65 ++ .../doapfiend/plugins/homepage.py | 84 ++ .../doapfiend-0.3.3/doapfiend/plugins/n3.py | 91 ++ .../doapfiend/plugins/ohloh.py | 66 ++ .../doapfiend/plugins/pkg_index.py | 46 + .../doapfiend-0.3.3/doapfiend/plugins/pypi.py | 64 ++ .../doapfiend/plugins/sourceforge.py | 65 ++ .../doapfiend-0.3.3/doapfiend/plugins/text.py | 290 ++++++ .../doapfiend-0.3.3/doapfiend/plugins/url.py | 58 ++ .../doapfiend-0.3.3/doapfiend/plugins/xml.py | 80 ++ .../doapfiend-0.3.3/doapfiend/utils.py | 216 ++++ .../doapfiend-0.3.3/docs/api/api-objects.txt | 267 +++++ .../doapfiend-0.3.3/docs/api/class-tree.html | 221 ++++ .../doapfiend-0.3.3/docs/api/crarr.png | Bin 0 -> 340 bytes .../docs/api/doapfiend-module.html | 217 ++++ .../docs/api/doapfiend-pysrc.html | 155 +++ .../docs/api/doapfiend.cli-module.html | 222 ++++ .../docs/api/doapfiend.cli-pysrc.html | 413 ++++++++ .../api/doapfiend.cli.DoapFiend-class.html | 502 +++++++++ .../docs/api/doapfiend.doaplib-module.html | 770 ++++++++++++++ .../docs/api/doapfiend.doaplib-pysrc.html | 508 ++++++++++ .../docs/api/doapfiend.lexers-module.html | 154 +++ .../docs/api/doapfiend.lexers-pysrc.html | 304 ++++++ ...doapfiend.lexers.Notation3Lexer-class.html | 394 ++++++++ .../doapfiend.lexers.SparqlLexer-class.html | 381 +++++++ .../docs/api/doapfiend.model-module.html | 205 ++++ .../docs/api/doapfiend.model-pysrc.html | 305 ++++++ .../doapfiend.model.CVSRepository-class.html | 320 ++++++ .../api/doapfiend.model.Project-class.html | 557 ++++++++++ .../api/doapfiend.model.Release-class.html | 297 ++++++ .../doapfiend.model.SVNRepository-class.html | 313 ++++++ .../docs/api/doapfiend.plugins-module.html | 398 ++++++++ .../docs/api/doapfiend.plugins-pysrc.html | 352 +++++++ .../api/doapfiend.plugins.base-module.html | 144 +++ .../api/doapfiend.plugins.base-pysrc.html | 348 +++++++ .../doapfiend.plugins.base.Plugin-class.html | 425 ++++++++ .../api/doapfiend.plugins.fields-module.html | 358 +++++++ .../api/doapfiend.plugins.fields-pysrc.html | 537 ++++++++++ ...end.plugins.fields.OutputPlugin-class.html | 378 +++++++ .../doapfiend.plugins.freshmeat-module.html | 145 +++ .../doapfiend.plugins.freshmeat-pysrc.html | 325 ++++++ ...ugins.freshmeat.FreshmeatPlugin-class.html | 376 +++++++ .../doapfiend.plugins.homepage-module.html | 248 +++++ .../api/doapfiend.plugins.homepage-pysrc.html | 343 +++++++ ...d.plugins.homepage.OutputPlugin-class.html | 370 +++++++ .../docs/api/doapfiend.plugins.n3-module.html | 253 +++++ .../docs/api/doapfiend.plugins.n3-pysrc.html | 301 ++++++ ...apfiend.plugins.n3.OutputPlugin-class.html | 378 +++++++ .../api/doapfiend.plugins.ohloh-module.html | 145 +++ .../api/doapfiend.plugins.ohloh-pysrc.html | 326 ++++++ ...fiend.plugins.ohloh.OhlohPlugin-class.html | 376 +++++++ .../doapfiend.plugins.pkg_index-module.html | 251 +++++ .../doapfiend.plugins.pkg_index-pysrc.html | 167 +++ .../api/doapfiend.plugins.pypi-module.html | 145 +++ .../api/doapfiend.plugins.pypi-pysrc.html | 324 ++++++ ...apfiend.plugins.pypi.PyPIPlugin-class.html | 376 +++++++ .../doapfiend.plugins.sourceforge-module.html | 145 +++ .../doapfiend.plugins.sourceforge-pysrc.html | 325 ++++++ ...s.sourceforge.SourceForgePlugin-class.html | 376 +++++++ .../api/doapfiend.plugins.text-module.html | 310 ++++++ .../api/doapfiend.plugins.text-pysrc.html | 666 ++++++++++++ ...pfiend.plugins.text.DoapPrinter-class.html | 416 ++++++++ ...fiend.plugins.text.OutputPlugin-class.html | 378 +++++++ .../api/doapfiend.plugins.url-module.html | 144 +++ .../docs/api/doapfiend.plugins.url-pysrc.html | 274 +++++ ...doapfiend.plugins.url.UrlPlugin-class.html | 372 +++++++ .../api/doapfiend.plugins.xml-module.html | 146 +++ .../docs/api/doapfiend.plugins.xml-pysrc.html | 281 ++++++ ...pfiend.plugins.xml.OutputPlugin-class.html | 381 +++++++ .../docs/api/doapfiend.utils-module.html | 460 +++++++++ .../docs/api/doapfiend.utils-pysrc.html | 384 +++++++ .../doapfiend.utils.DoapPrinter-class.html | 395 ++++++++ .../doapfiend.utils.NotFoundError-class.html | 298 ++++++ .../doapfiend-0.3.3/docs/api/epydoc.css | 322 ++++++ .../doapfiend-0.3.3/docs/api/epydoc.js | 293 ++++++ .../doapfiend-0.3.3/docs/api/frames.html | 17 + .../doapfiend-0.3.3/docs/api/help.html | 278 +++++ .../docs/api/identifier-index.html | 903 +++++++++++++++++ .../doapfiend-0.3.3/docs/api/index.html | 17 + .../doapfiend-0.3.3/docs/api/module-tree.html | 148 +++ .../pygments.lexer.RegexLexerMeta-class.html | 330 ++++++ .../doapfiend-0.3.3/docs/api/redirect.html | 38 + .../docs/api/toc-doapfiend-module.html | 31 + .../docs/api/toc-doapfiend.cli-module.html | 36 + .../api/toc-doapfiend.doaplib-module.html | 46 + .../docs/api/toc-doapfiend.lexers-module.html | 32 + .../docs/api/toc-doapfiend.model-module.html | 38 + .../api/toc-doapfiend.plugins-module.html | 35 + .../toc-doapfiend.plugins.base-module.html | 31 + .../toc-doapfiend.plugins.fields-module.html | 40 + ...oc-doapfiend.plugins.freshmeat-module.html | 31 + ...toc-doapfiend.plugins.homepage-module.html | 35 + .../api/toc-doapfiend.plugins.n3-module.html | 35 + .../toc-doapfiend.plugins.ohloh-module.html | 31 + ...oc-doapfiend.plugins.pkg_index-module.html | 34 + .../toc-doapfiend.plugins.pypi-module.html | 31 + ...-doapfiend.plugins.sourceforge-module.html | 31 + .../toc-doapfiend.plugins.text-module.html | 38 + .../api/toc-doapfiend.plugins.url-module.html | 31 + .../api/toc-doapfiend.plugins.xml-module.html | 31 + .../docs/api/toc-doapfiend.utils-module.html | 39 + .../docs/api/toc-everything.html | 103 ++ .../doapfiend-0.3.3/docs/api/toc.html | 51 + .../doapfiend/doapfiend-0.3.3/docs/usage.txt | 10 + .../doapfiend-0.3.3/examples/details.py | 20 + .../doapfiend-0.3.3/examples/doap-doap.rdf | 142 +++ .../doapfiend-0.3.3/examples/doapfiend.rdf | 86 ++ .../doapfiend-0.3.3/examples/doapfile.py | 31 + .../examples/dotclear-sioc.rdf | 150 +++ .../examples/edd-dumbill-foaf-2.rdf | 22 + .../examples/edd-dumbill-foaf.rdf | 23 + .../doapfiend-0.3.3/examples/foaf-no-doap.rdf | 33 + .../doapfiend-0.3.3/examples/foaf.rdf | 69 ++ .../examples/gnome-bluetooth-doap.rdf | 42 + .../doapfiend-0.3.3/examples/homepage.py | 13 + .../doapfiend-0.3.3/examples/moap.doap | 139 +++ .../doapfiend-0.3.3/examples/redland-doap.rdf | 102 ++ .../doapfiend-0.3.3/examples/sourceforge.py | 13 + .../doapfiend-0.3.3/images/doapfiend1.png | Bin 0 -> 11248 bytes .../doapfiend/doapfiend-0.3.3/images/get.xcf | Bin 0 -> 20637 bytes .../doapfiend-0.3.3/images/getdoapfiend.jpg | Bin 0 -> 8157 bytes .../doapfiend-0.3.3/images/getdoapfiend.png | Bin 0 -> 14105 bytes .../doapfiend-0.3.3/images/getdoapfiend.xcf | Bin 0 -> 28976 bytes .../doapfiend-0.3.3/make_api_docs.sh | 3 + .../lib/doapfiend/doapfiend-0.3.3/setup.cfg | 12 + .../lib/doapfiend/doapfiend-0.3.3/setup.py | 38 + .../doapfiend-0.3.3/tests/data/doapfiend.rdf | 35 + .../doapfiend-0.3.3/tests/functional.sh | 37 + .../doapfiend-0.3.3/tests/test_cli.py | 24 + .../doapfiend-0.3.3/tests/test_cli.pyc | Bin 0 -> 192 bytes .../doapfiend-0.3.3/tests/test_doaplib.py | 58 ++ .../doapfiend-0.3.3/tests/test_doaplib.pyc | Bin 0 -> 2880 bytes .../doapfiend-0.3.3/tests/test_n3.py | 50 + .../doapfiend-0.3.3/tests/test_n3.pyc | Bin 0 -> 2409 bytes .../doapfiend-0.3.3/tests/test_utils.py | 25 + .../doapfiend-0.3.3/tests/test_utils.pyc | Bin 0 -> 1336 bytes .../doapfiend-0.3.3/tests/test_xml.py | 37 + .../doapfiend-0.3.3/tests/test_xml.pyc | Bin 0 -> 1715 bytes web-crawler/r_forge_data_fetcher.py | 106 ++ web-crawler/sourceforge_data_fetcher.py | 489 +++++++++ web-crawler/startCrawler | 13 + web-crawler/stopCrawler | 11 + web-crawler/stubs/README.rst | 4 + web-crawler/stubs/setup.py | 35 + web-crawler/stubs/startCrawler | 1 + web-crawler/stubs/stubs.egg-info/PKG-INFO | 19 + web-crawler/stubs/stubs.egg-info/SOURCES.txt | 11 + .../stubs/stubs.egg-info/dependency_links.txt | 1 + .../stubs/stubs.egg-info/entry_points.txt | 3 + web-crawler/stubs/stubs.egg-info/not-zip-safe | 1 + .../stubs/stubs.egg-info/paster_plugins.txt | 1 + web-crawler/stubs/stubs.egg-info/requires.txt | 3 + .../stubs/stubs.egg-info/top_level.txt | 1 + web-crawler/stubs/stubs.ini | 45 + web-crawler/stubs/stubs/__init__.py | 10 + web-crawler/stubs/stubs/__init__.pyc | Bin 0 -> 533 bytes web-crawler/stubs/stubs/views.py | 132 +++ web-crawler/stubs/stubs/views.pyc | Bin 0 -> 6238 bytes web-crawler/testAPI.py | 119 +++ 231 files changed, 35147 insertions(+) create mode 100755 web-crawler/.pydevproject create mode 100755 web-crawler/CrawlerDatabase.py create mode 100755 web-crawler/Integrator.py create mode 100755 web-crawler/LICENSE create mode 100755 web-crawler/Metadata.py create mode 100755 web-crawler/Notifier.py create mode 100755 web-crawler/NotifierThread.py create mode 100755 web-crawler/Orchestrator.py create mode 100755 web-crawler/README create mode 100755 web-crawler/RepositoryCrawler.py create mode 100755 web-crawler/RepositoryCrawlerThread.py create mode 100755 web-crawler/TestApacheRepositoryCrawler.py create mode 100755 web-crawler/Utils.py create mode 100755 web-crawler/apache_data_fetcher.py create mode 100755 web-crawler/api/README.rst create mode 100755 web-crawler/api/api.egg-info/PKG-INFO create mode 100755 web-crawler/api/api.egg-info/SOURCES.txt create mode 100755 web-crawler/api/api.egg-info/dependency_links.txt create mode 100755 web-crawler/api/api.egg-info/entry_points.txt create mode 100755 web-crawler/api/api.egg-info/not-zip-safe create mode 100755 web-crawler/api/api.egg-info/paster_plugins.txt create mode 100755 web-crawler/api/api.egg-info/requires.txt create mode 100755 web-crawler/api/api.egg-info/top_level.txt create mode 100755 web-crawler/api/api.ini create mode 100755 web-crawler/api/api/__init__.py create mode 100755 web-crawler/api/api/views.py create mode 100755 web-crawler/api/setup.py create mode 100755 web-crawler/codeplex_data_fetcher.py create mode 100755 web-crawler/config create mode 100644 web-crawler/database/alter_rc_gh_archive.sql create mode 100644 web-crawler/database/alter_rc_gh_archive_release.sql create mode 100755 web-crawler/database/crawlerModel.mwb create mode 100644 web-crawler/database/createCrawlerDB.sql create mode 100755 web-crawler/database/flossmole_apache.sql create mode 100755 web-crawler/database/flossmole_eclipse.sql create mode 100755 web-crawler/database/populate_flossmole_apache.sql create mode 100755 web-crawler/database/populate_flossmole_eclipse.sql create mode 100755 web-crawler/doap_project.py create mode 100755 web-crawler/dw_batch.py create mode 100755 web-crawler/eclipse_data_fetcher.py create mode 100755 web-crawler/generic_data_fetcher.py create mode 100755 web-crawler/github_data_fetcher.py create mode 100755 web-crawler/jquery_plugin_data_fetcher.py create mode 100755 web-crawler/lib/README.txt create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/.pylintrc create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/AUTHORS create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/FAQ create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/INSTALL create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/LICENSE create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/MANIFEST.in create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/NEWS create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/PKG-INFO create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/README create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/THANKS create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/TODO create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/build.vel create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/PKG-INFO create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/SOURCES.txt create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/dependency_links.txt create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/entry_points.txt create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/not-zip-safe create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/requires.txt create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/top_level.txt create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/__init__.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/cli.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/doap2html.xsl create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/doaplib.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/lexers.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/model.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/__init__.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/base.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/fields.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/freshmeat.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/homepage.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/n3.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/ohloh.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/pkg_index.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/pypi.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/sourceforge.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/text.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/url.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/xml.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/utils.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/api-objects.txt create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/class-tree.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/crarr.png create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli.DoapFiend-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.doaplib-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.doaplib-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers.Notation3Lexer-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers.SparqlLexer-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.CVSRepository-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.Project-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.Release-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.SVNRepository-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.base-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.base-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.base.Plugin-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.fields-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.fields-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.fields.OutputPlugin-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.freshmeat-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.freshmeat-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.freshmeat.FreshmeatPlugin-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.homepage-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.homepage-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.homepage.OutputPlugin-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.n3-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.n3-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.n3.OutputPlugin-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.ohloh-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.ohloh-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.ohloh.OhlohPlugin-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.pkg_index-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.pkg_index-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.pypi-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.pypi-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.pypi.PyPIPlugin-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.sourceforge-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.sourceforge-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.sourceforge.SourceForgePlugin-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.text-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.text-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.text.DoapPrinter-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.text.OutputPlugin-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.url-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.url-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.url.UrlPlugin-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.xml-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.xml-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins.xml.OutputPlugin-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.utils-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.utils-pysrc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.utils.DoapPrinter-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.utils.NotFoundError-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/epydoc.css create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/epydoc.js create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/frames.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/help.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/identifier-index.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/index.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/module-tree.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/pygments.lexer.RegexLexerMeta-class.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/redirect.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.cli-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.doaplib-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.lexers-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.model-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.base-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.fields-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.freshmeat-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.homepage-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.n3-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.ohloh-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.pkg_index-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.pypi-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.sourceforge-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.text-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.url-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.plugins.xml-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-doapfiend.utils-module.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc-everything.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/toc.html create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/usage.txt create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/details.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/doap-doap.rdf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/doapfiend.rdf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/doapfile.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/dotclear-sioc.rdf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/edd-dumbill-foaf-2.rdf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/edd-dumbill-foaf.rdf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/foaf-no-doap.rdf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/foaf.rdf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/gnome-bluetooth-doap.rdf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/homepage.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/moap.doap create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/redland-doap.rdf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/examples/sourceforge.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/images/doapfiend1.png create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/images/get.xcf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/images/getdoapfiend.jpg create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/images/getdoapfiend.png create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/images/getdoapfiend.xcf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/make_api_docs.sh create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/setup.cfg create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/setup.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/data/doapfiend.rdf create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/functional.sh create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/test_cli.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/test_cli.pyc create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/test_doaplib.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/test_doaplib.pyc create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/test_n3.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/test_n3.pyc create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/test_utils.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/test_utils.pyc create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/test_xml.py create mode 100755 web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/test_xml.pyc create mode 100755 web-crawler/r_forge_data_fetcher.py create mode 100755 web-crawler/sourceforge_data_fetcher.py create mode 100755 web-crawler/startCrawler create mode 100755 web-crawler/stopCrawler create mode 100755 web-crawler/stubs/README.rst create mode 100755 web-crawler/stubs/setup.py create mode 100755 web-crawler/stubs/startCrawler create mode 100755 web-crawler/stubs/stubs.egg-info/PKG-INFO create mode 100755 web-crawler/stubs/stubs.egg-info/SOURCES.txt create mode 100755 web-crawler/stubs/stubs.egg-info/dependency_links.txt create mode 100755 web-crawler/stubs/stubs.egg-info/entry_points.txt create mode 100755 web-crawler/stubs/stubs.egg-info/not-zip-safe create mode 100755 web-crawler/stubs/stubs.egg-info/paster_plugins.txt create mode 100755 web-crawler/stubs/stubs.egg-info/requires.txt create mode 100755 web-crawler/stubs/stubs.egg-info/top_level.txt create mode 100755 web-crawler/stubs/stubs.ini create mode 100755 web-crawler/stubs/stubs/__init__.py create mode 100755 web-crawler/stubs/stubs/__init__.pyc create mode 100755 web-crawler/stubs/stubs/views.py create mode 100755 web-crawler/stubs/stubs/views.pyc create mode 100755 web-crawler/testAPI.py diff --git a/web-crawler/.pydevproject b/web-crawler/.pydevproject new file mode 100755 index 00000000..d001f0ae --- /dev/null +++ b/web-crawler/.pydevproject @@ -0,0 +1,5 @@ + + +Default +python interpreter + diff --git a/web-crawler/CrawlerDatabase.py b/web-crawler/CrawlerDatabase.py new file mode 100755 index 00000000..47ccfeed --- /dev/null +++ b/web-crawler/CrawlerDatabase.py @@ -0,0 +1,146 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +### http://stackoverflow.com/questions/2248531/python-error-while-using-mysqldb-sets-module-is-deprecated +import warnings +warnings.filterwarnings(action="ignore", message='the sets module is deprecated') +### + +import MySQLdb +import sys +from Utils import Configuration, Logger, StringHelper +from subprocess import Popen +from lxml import etree + +class CrawlerDatabase: + def __init__(self): + logger = Logger.getInstance() + if not (hasattr(self, 'db') and self.db.open): + try: + self.connect() #CrawlerDatabase.db = MySQLdb.connect(host=Configuration.MySQLhost, user=Configuration.MySQLuser, passwd=Configuration.MySQLpasswd, db=Configuration.MySQLdatabase, charset=Configuration.MySQLcharset) + except Exception as ex: + message = "Can't connect to database, please check database service status and crawler's configuration." + str(ex) + logger.error(message) + sys.exit(message) + + @staticmethod + def connect(): + logger = Logger.getInstance() + CrawlerDatabase.db = MySQLdb.connect(host=Configuration.MySQLhost, user=Configuration.MySQLuser, passwd=Configuration.MySQLpasswd, db=Configuration.MySQLdatabase, charset=Configuration.MySQLcharset) + logger.info("Connected to: " + Configuration.MySQLhost) + + + @staticmethod + def source(filePath): + logger = Logger.getInstance() + try: + # source cannot be executed via mysqldb so we do it using a shell command + proc = Popen("mysql -u " + Configuration.MySQLuser + " -p" + Configuration.MySQLpasswd + " " + Configuration.MySQLdatabase + " < " + filePath, shell=True) + return_code = proc.wait() + except Exception as ex: + message = "Can't connect to database, please check database service status and crawler's configuration." + str(ex) + logger.error(message) + + @staticmethod + def execute_cursor(sqlString, parameters=None): + logger = Logger.getInstance() + try: + cur = CrawlerDatabase.db.cursor() + if (parameters is None): + cur.execute(sqlString) + else: + parameters = Utf8Helper.encode_parameters(parameters) + cur.execute(sqlString, parameters) + return cur + except Exception as ex: + message = "Can't connect to database, please check database service status and crawler's configuration." + str(ex) + logger.error(message) + + @staticmethod + def select_string(sqlString, parameters=None): + logger = Logger.getInstance() + try: + cur = CrawlerDatabase.db.cursor() + if (parameters is None): + cur.execute(sqlString) + else: + parameters = Utf8Helper.encode_parameters(parameters) + cur.execute(sqlString, parameters) + data = cur.fetchone() + return data[0] + except Exception as ex: + message = "Can't connect to database, please check database service status and crawler's configuration." + str(ex) + logger.error(message) + + @staticmethod + def select_int(sqlString, parameters=None): + logger = Logger.getInstance() + try: + cur = CrawlerDatabase.db.cursor() + if (parameters is None): + cur.execute(sqlString) + else: + parameters = Utf8Helper.encode_parameters(parameters) + cur.execute(sqlString, parameters) + data = cur.fetchone() + return data[0] + except Exception as ex: + message = "Can't connect to database, please check database service status and crawler's configuration." + str(ex) + logger.error(message) + + @staticmethod + def select_natural(sqlString, parameters=None): + logger = Logger.getInstance() + try: + cur = CrawlerDatabase.db.cursor() + if (parameters is None): + cur.execute(sqlString) + else: + parameters = Utf8Helper.encode_parameters(parameters) + cur.execute(sqlString, parameters) + if cur.rowcount > 0: + data = cur.fetchone() + return data[0] + else: + return None + except Exception as ex: + message = "Can't connect to database, please check database service status and crawler's configuration." + str(ex) + logger.error(message) + + @staticmethod + def execute(sqlString, commit, parameters=None): + logger = Logger.getInstance() + cur = CrawlerDatabase.db.cursor() + try: + if (parameters is None): + cur.execute(sqlString) + else: + parameters = Utf8Helper.encode_parameters(parameters) + cur.execute(sqlString, parameters) + except Exception, e: + logger.error("SQL = " + sqlString + " PARAMETERS = " + str(parameters)) + logger.error(str(e)) + if commit: + CrawlerDatabase.db.commit() + return cur.lastrowid + +class Utf8Helper(): + @staticmethod + def encode_parameters(parameters): + ''' + mysqldb doesn't seem to encode parameters; it does encode sql string though + this method is used to encode parameters + Some chars >=128 end up in the string so I need to remove the to prevent exceptions + ''' + for k in parameters.keys(): + if type(parameters[k]) == unicode or type(parameters[k]) == etree._ElementUnicodeResult: + safe_string = StringHelper.removeNonAscii(parameters[k]).encode('utf8') + #logger.info("Utf8Helper.encode_parameters has encoded: " + safe_string) + parameters[k] = safe_string + return parameters + +#initialize the database unique instance +CrawlerDatabase() diff --git a/web-crawler/Integrator.py b/web-crawler/Integrator.py new file mode 100755 index 00000000..e517fa08 --- /dev/null +++ b/web-crawler/Integrator.py @@ -0,0 +1,446 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +#February 2021 +#Owner Engineering Ingegneria Informatica S.p.A. +#Update the code for the MORPHEMIC release 1.5 +from dw_batch import DwBatch +from doap_project import DoapProject, DoapRepository, DoapVersion, FoafPerson +from Utils import Logger, TrivialJSONEncoder, Configuration, DateHelper, Sources, StringList +from github_data_fetcher import GithubArchiveStats +from doapfiend.doaplib import load_graph +from doapfiend.model import Project as doap +from CrawlerDatabase import CrawlerDatabase +import base64 +import site +import sys +import os, stat +import urllib3 +import json2html +import requests +import decimal +import json +from json import JSONEncoder + +import traceback + + +class GenericIntegrator(object): + """ + The class from which specific integrators inherit + during initialization it searches for a batch to be integrated of the specific source; + projects in this batch have to be compared against projects of the previous batch + (hence we store its id during initialization) and against projects of the other + sources to seek for "identical" projects (hence we store the last id of the other + sources) + """ + def __init__(self, id_source): + + logger = Logger.getInstance() + logger.info("Initializing GenericIntegrator for source " + str(id_source)) + self.id_source = id_source + # I look for the most recent batch that needs to be integrated; it also needs to be more recent + # than the last integrated(3) or notified(4) one; + # IF there's no integrated batch + sqlString = "SELECT IF((SELECT max(created) FROM DWBatch WHERE idSource=" + str(id_source) + " AND (idWFState=3 OR idWFState=4)) IS NULL, " + # select the most recent of this source + sqlString = sqlString + "(SELECT IDDwBatch FROM DWBatch WHERE idSource=" + str(id_source) + " AND created=(SELECT max(created) FROM DWBatch WHERE idSource=" + str(id_source) + " AND idWFState=2)), " + # else select the most recent of this source only if it is more recent of the latest integrated(3) or notified(4) one + sqlString = sqlString + "(SELECT IDDwBatch FROM DWBatch WHERE idSource=" + str(id_source) + " AND created=(SELECT max(created) FROM DWBatch WHERE idSource=" + str(id_source) + " AND idWFState=2) AND created>(SELECT max(created) FROM DWBatch WHERE idSource=" + str(id_source) + " AND (idWFState=3 OR idWFState=4))))" + # The above query has this shape because "SELECT max(TIMESTAMP) .." returns the full timestamp while "IF (, (SELECT max(TIMESTAMP) ..), )" returns just the year + #print "prima di logger debug di ssqlstring " + #print sqlString + #Logger.debug("GenericIntegrator sqlString: " + str(sqlString)) + id_batch = CrawlerDatabase.select_int(sqlString) + + logger.info("GenericIntegrator ID batch: " + str(id_batch) + ", id_source: " + str(id_source)) + if not (id_batch is None): + self.batch = DwBatch() + self.batch.id_batch = id_batch + logger.info("self.batch.id_batch: " + str(self.batch.id_batch)) + self.batch.id_source = id_source + + # I create an array of IDs of old batches for comparison; one for each source + self.old_batch_ids = self.batch.latest_batches(False) + + def limitBatchLength(self): + """ + Some sources provide batches of hundreds of thousands projects; before sending them to + the code analyser we split them so that each batch has no more than + Configuration.max_batch_size projects. splitter does the job + + """ + logger = Logger.getInstance() + logger.info("Entering limitBatchLength idDWBatch = " + str(self.batch.id_batch)) + #Logger.debug("Entering limitBatchLength idDWBatch = " + str(self.batch.id_batch)) + #how many projects in this batch + parameters = { + 'idDWBatch': self.batch.id_batch + } + sql = "SELECT count(*) FROM DWBatch WHERE idDWBatch=%(idDWBatch)s" + batch_length = CrawlerDatabase.select_int(sql, parameters) + logger.info("limitBatchLength batch " + str(self.batch.id_batch) + " has " + str(batch_length) + " projects") + if batch_length > Configuration.max_batch_size: + logger.info("limitBatchLength Batch " + str(self.batch.id_batch) + " has " + str(batch_length) + " projects. I will be split to reduce its size.") + while batch_length > Configuration.max_batch_size: + # I load the current batch + current_batch = DwBatch() + current_batch.load(self.batch.id_batch) + # I create a new batch with same info as this one + child_batch = current_batch + # new one is a child of current batch + child_batch.id_parent = self.batch.id_batch + child_batch.id_batch = 0 + # insert to DB + child_batch.initialize(child_batch.id_source) + # write to db all fields + child_batch.writeToDB() + # now I have the new child_batch.id_batch + # see here for explanation of following query: http://stackoverflow.com/questions/1513206/update-multiple-rows-using-limit-in-mysql + #Logger.debug("Batch " + str(child_batch.id_batch) + " created as a split of " + str(self.batch.id_batch)) + logger.info("Batch " + str(child_batch.id_batch) + " created as a split of " + str(self.batch.id_batch)) + parUpdate = { + 'idDWBatchOLD': self.batch.id_batch, + 'idDWBatch': child_batch.id_batch, + 'max_batch_size' : Configuration.max_batch_size + } + sqlUpdate = "UPDATE DWBatch SET idDWBatch=%(idDWBatch)s WHERE " + sqlUpdate += " idDWBatch=%(idDWBatchOLD)s LIMIT %(max_batch_size)s) tmp)" + #Logger.debug("Batch " + str(child_batch.id_batch) + " created as a split of " + str(self.batch.id_batch)) + logger.info("Batch " + str(child_batch.id_batch) + " created as a split of " + str(self.batch.id_batch)) + CrawlerDatabase.execute(sqlUpdate, True, parUpdate) + # let's check again if there are too many projects + batch_length = CrawlerDatabase.select_int(sql, parameters) + + + def htmlMonitor(self, html_text): + fetcher_file = open(Configuration.path_html + '/crawler/fetcher.html',"w") + os.chmod(Configuration.web_path_html + '/projects.json', 0o777) + fetcher_file.write(html_text) + fetcher_file.close() + + + +class ApacheIntegrator(GenericIntegrator): + def __init__(self): + super(ApacheIntegrator, self).__init__(Sources.Apache) + + def integrate(self): + try: + #Load the whole batch (about 200 projects) + parameters = { 'idDWBatch': self.batch.id_batch } + cursor = CrawlerDatabase.execute_cursor("SELECT Name, Homepage, DoapFile FROM RAW_Apache_Project WHERE idDWBatch=%(idDWBatch)s", parameters) + results = cursor.fetchall() + #for each project in the batch + for record in results: + name = record[0] + homepage = record[1] + doap_file = record[2] + doap = load_graph(base64.b64decode(doap_file)) + dp_new = DoapProject() + dp_new.load_from_doap(doap, base64.b64decode(doap_file)) + dp_new.modified = False + dp_new.modified_release = False + dp_new.id_batch = self.batch.id_batch + + + # GenericIntegrator helps us search the same project in a previous batch of the same source + ''' + id_old_doap = super(ApacheIntegrator, self).identicalDoapSameSource(dp_new.name, dp_new.repositoryLocations()) + id_other_doaps = super(ApacheIntegrator, self).identicalDoapsDifferentSources(dp_new.name, dp_new.repositoryLocations()) + #I search other doaps to compare with old ones and set modified* flags and to + #set id_project to an existing one if other doaps exist otherwise to a new one + dp_new.search_other_doaps(id_old_doap, id_other_doaps) + ''' + #save updated doap file or insert if new + dp_new.save_to_db() + # mark the batch as integrated + self.batch.integrate() + except Exception as ex: + Logger.error('Error integrating: ' + str(ex)) + + +class GithubIntegrator(GenericIntegrator): + + def __init__(self): + super(GithubIntegrator, self).__init__(Sources.Github) + + def integrate(self): + """ + I do integration only if RepositoryCrawler has integrated FLOSSmole's data with githubarchive's + """ + + logger = Logger.getInstance() + logger.info("GithubIntegrator ") + deployment_url = "" + idDWBatch = "" + if GithubArchiveStats.statsAvailable()>0: + self.html_monitor_text = "
Github MetadataProject
" + + try: + #Pavia: since flossmole has be deprecated we skip the whole sql join gig that uses both flossmole's and github archive datasets to generate a doap for each projects + #Pavia: we limit ourselves to github archive data only + logger.info("Starting Github Integration") + #Load the whole batch; for flossmole gh_projects table is dropped and created every time it is downloaded so we do not filter by idDWBatch + #all fields in gh_projects: datasource_id, description, developer_name, fork_number, forked, homepage, last_modified, open_issues, private, project_name, url, watchers, XML + parameters = { 'how_many': Configuration.github_top_projects_event_count } + sql_query = "SELECT rel.project_name, rel.developer_name, arc.description, rel.releaseUrl, rel.version,arc.idDWBatch " + sql_query = sql_query + " FROM rc_gh_archive_release rel LEFT JOIN" + sql_query = sql_query + "(select project_name, description,idDWBatch FROM rc_gh_archive GROUP BY project_name)" + sql_query = sql_query + "as arc on rel.project_name=arc.project_name ORDER BY rel.project_name" + #print "||=====================SQL_QUERY===============================||" + #print sql_query + #print "||====================================================||" + + #sql_query = "SELECT project_name, developer_name, description, sum_event_count, url, version, idDWBatch FROM rc_gh_archive WHERE idDWBatch =" + idbatch + cursor = CrawlerDatabase.execute_cursor(sql_query,parameters) + #logger.info("cursor esegui cursor sqlquery parameters") + results = cursor.fetchall() + #for each project in the batch, sorted by project_name, there might be more than one row per project if there are more releases + current_project_name = "" + # other_doaps_sought: I want to search for other doaps in different sources only once even if I have more than one record per project + #other_project = False + #logger.info("idDWBatch") + + for record in results: + #id_batch = self.batch.id_batch + if record[5] != None: + revision = record[4] + release_url = record[3] + release_url = release_url.replace('/releases/tag/', '/archive/') + release_url = release_url.replace('/releases/', '/archive/') + # add .zip + release_url = release_url + ".zip" + #download_page = release_url + ".zip" + deployment_url = "https://api.github.com/repos/" + record[0] + "/deployments" + #print deployement_url + #html url of the git project + #html_url = "https://github.com/" + record[0] + #repo url + url = "https://api.github.com/repos/" + record[0] + release = record[4].encode('utf-8') + developer = record[1] + project = record[0] + idDWBatch = str(record[5]) + #proviamo a creare il campo language : + #example: https://api.github.com/repos/jgtate/Hackathon2018 + #get repo json information : + ''' + r = requests.get(url) + decoded = json.loads(r.content) + language = decoded["language"] + ''' + ''' + languageUrl = decoded["languages_url"] + rl = requests.get(languageUrl) + tot_languages = encoded(rl.content) + ''' + + + parameters = { + 'project' : project, + 'developer' : developer, + 'description' : str(record[2]), + 'url' : url, + 'versionUrl' : release_url, + 'version' : release, + 'deploymentUrl' : deployment_url, + 'idDWBatch' : idDWBatch + } + + #logger.info( "=============parameters=========================") + #logger.info(str(parameters)) + #logger.info( "======================================") + + sqlInsertStrList = [] + sqlParamStrList = [] + + ''' + print len(sqlInsertStrList) + print "-------------------------" + ''' + + sqlInsertStrList.insert(0,"INSERT INTO MetadataProject (project, developer, ") + sqlParamStrList.insert(0,"VALUES (%(project)s, %(developer)s, ") + + if (len(record[2])>0): + sqlInsertStrList.extend(" description,") + sqlParamStrList.extend("%(description)s, ") + + sqlInsertStrList.extend(" url, versionUrl, version, deploymentUrl,idDWBatch) ") + sqlParamStrList.extend(" %(url)s, %(versionUrl)s, %(version)s, %(deploymentUrl)s, %(idDWBatch)s)"); + + sqlString = ''.join (sqlInsertStrList)+''.join (sqlParamStrList) + + #logger.info(sqlString) + + #CrawlerDatabase.execute("INSERT INTO MetadataProject (project_name, developer_name, description, url, release_url, release, deployment_url,idDWBatch) VALUES (?, ?, ?,?, ?, ?, ?, ?)", False, parameters) + try: + #CrawlerDatabase.execute("INSERT INTO MetadataProject (project, developer, description, url, versionUrl, version, deploymentUrl,idDWBatch) VALUES (?, ?, ?,?, ?, ?, ?, ?)", False, parameters) + + CrawlerDatabase.execute(sqlString, True, parameters) + #CrawlerDatabase.execute("INSERT INTO MetadataProject (project_name, developer_name, description, url, release_url, release, deployment_url,idDWBatch) VALUES (%(project_name)s, %(developer_name)s, %(description)s,%(url)s, %(release_url)s, %(release)s, %(deployment_url)s, %(idDWBatch)s)", True,parameters) + except: + # printing stack trace + traceback.print_exc() + #except Exception, ex: + #logger.error("Error insert MetadataProject : " + str(ex)) + #listOfProjects.append(parameters) + #mark batch as integrated (MDG march 2021)->in a new version should be removed this part + self.batch.integrate() + #url = 'https://127.0.0.1:6543/knowdata/' + #myobj = {'somekey': 'somevalue'} + + #x = requests.post(url, data = myobj) + + #print(x.text) + #json_data = TrivialJSONEncoder().encode(listOfProjects) + #print json_data + #self.writeJson(json_data) + # dop che ho completato il download del fetcher, ho memorizzato i metadati dei progetti scaricati in + # una tabella temporanea il Crawler notifca e spedisce questi metadati al KnowledgeBase + #self.batch.integrate() + #self.batch.notify() + #return json_data + + #except Exception as ex: + # logger.error('Error integrating: ' + str(ex)) + except: + # printing stack trace + traceback.print_exc() + +''' + def writeJson(self,a): + json_file = Configuration.web_path_html + '/projects.json' + if os.path.isfile(json_file): + os.remove(json_file) + #print("File Removed!Now I'm going to create it") + fetcher_file = open(json_file,"w") + os.chmod(Configuration.web_path_html + '/projects.json', 0o777) + else: + fetcher_file = open(json_file,"w") + os.chmod(Configuration.web_path_html + '/projects.json', 0o777) + with fetcher_file as out_file: + json.dump(a, out_file,indent=2) + #json.dumps(a,out_file, indent=4) + + + + print('write file json ') +''' + +class JQueryPluginIntegrator(GenericIntegrator): + def __init__(self): + super(JQueryPluginIntegrator, self).__init__(Sources.JQueryPlugin) + + def integrate(self): + logger = Logger.getInstance() + self.html_monitor_text = "JQueryPlugin Integrator
" + try: + #Load the whole batch + parameters = { 'idDWBatch': self.batch.id_batch } + cursor = CrawlerDatabase.execute_cursor("SELECT idRAW_JQ_Project, entry_title, attribution, description, download_link FROM RAW_JQ_Project WHERE idDWBatch=%(idDWBatch)s", parameters) + results = cursor.fetchall() + #for each project in the batch + for record in results: + idRAW_JQ_Project = record[0] + + dp_new = DoapProject() + dp_new.name = record[1] + dp_new.shortdesc = "" + dp_new.description = record[3] + dp_new.homepage = "" + dp_new.old_homepage = [] + tmp = DateHelper("") + dp_new.created = str(tmp.date) + + dp_new.download_page = record[4] + dp_new.mailing_list = "" + dp_new.category = [] + parameters = { + 'idRAW_JQ_Project': idRAW_JQ_Project + } + tmp = DateHelper("") + + curCat = CrawlerDatabase.execute_cursor("SELECT t.name FROM RAW_JQ_ProjectTag pt JOIN RAW_JQ_Tag t ON pt.idRAW_JQ_Tag=t.idRAW_JQ_Tag WHERE pt.idRAW_JQ_Project=%(idRAW_JQ_Project)s", parameters) + resultsCat = curCat.fetchall() + for recordCat in resultsCat: + category.append(str(recordCat[0])) + dp_new.category.append(str(recordCat[0])) + dp_new.os = [] + dp_new.programming_language = [] + dp_new.license = [] + curLicense = CrawlerDatabase.execute_cursor("SELECT l.name FROM RAW_JQ_ProjectLicense pl JOIN RAW_JQ_License l ON pl.idRAW_JQ_License=l.idRAW_JQ_License WHERE pl.idRAW_JQ_Project=%(idRAW_JQ_Project)s", parameters) + resultsLicense = curLicense.fetchall() + for recordLicense in resultsLicense: + #dp_new.license.append(str(recordLicense[0])) + license.append(str(recordLicense[0])) + #dp_new.language = [] + language = [] + # I need the latest version so that I can guess the download link for earlier versions; e.g.: + # latest version 1.9.4 + # download link http://github.com/CSS-Tricks/AnythingSlider/zipball/v1.9.4 + # replacing "1.9.4" with "1.9.3" and so on.. I get download links to older versions + # often the download link does not contain the latest version name though + latestRelease = CrawlerDatabase.select_string("SELECT version, max(date) FROM RAW_JQ_Version WHERE idRAW_JQ_Project=%(idRAW_JQ_Project)s ", parameters) + release = [] + #dp_new.release = [] + curRelease = CrawlerDatabase.execute_cursor("SELECT version, date FROM RAW_JQ_Version WHERE idRAW_JQ_Project=%(idRAW_JQ_Project)s ", parameters) + resultsRelease = curRelease.fetchall() + for recordRelease in resultsRelease: + dv = DoapVersion() + dv.name = recordRelease[0] + tmp = DateHelper("") + tmp.load_from_jquery_format(recordRelease[1]) + dv.created = str(tmp.date) + dv.revision = "" + dv.file_release = [] + if latestRelease in dp_new.download_page: + dv.file_release.append(dp_new.download_page.replace(latestRelease, dv.name)) + dp_new.release.append(dv) + + dp_new.download_mirror = [] + dp_new.wiki = [] + dp_new.bug_database = "" + + dp_new.developer = [] + dp_new.documenter = [] + dp_new.helper = [] + dp_new.tester = [] + dp_new.translator = [] + dp_new.maintainer = [] + curMember = CrawlerDatabase.execute_cursor("SELECT m.name FROM RAW_JQ_ProjectMaintainer pm JOIN RAW_JQ_Maintainer m ON pm.idRAW_JQ_Maintainer=m.idRAW_JQ_Maintainer WHERE pm.idRAW_JQ_Project=%(idRAW_JQ_Project)s ", parameters) + resultsMember = curMember.fetchall() + for recordMember in resultsMember: + names = recordMember[0].split(" ") + fp = FoafPerson(names[0], " ".join(names[1:]), "N/A") + dp_new.maintainer.append(fp) + + dp_new.platform = "" + dp_new.service_endpoint = "" + dp_new.audience = "" + dp_new.blog = "" + dp_new.modified = False + dp_new.modified_release = False + dp_new.id_batch = self.batch.id_batch + + ''' + # GenericIntegrator helps us search the same project in a previous batch of the same source + id_old_doap = super(JQueryPluginIntegrator, self).identicalDoapSameSource(dp_new.name, dp_new.repositoryLocations()) + id_other_doaps = super(JQueryPluginIntegrator, self).identicalDoapsDifferentSources(dp_new.name, dp_new.repositoryLocations()) + #I search other doaps to compare with old ones and set modified* flags and to + #set id_project to an existing one if other doaps exist otherwise to a new one + dp_new.search_other_doaps(id_old_doap, id_other_doaps) + ''' + #save updated doap file or insert if new + dp_new.save_to_db() + + # mark the batch as integrated + self.batch.integrate() + except Exception as ex: + logger.error('Error integrating: ' + str(ex)) + + diff --git a/web-crawler/LICENSE b/web-crawler/LICENSE new file mode 100755 index 00000000..8fb25c89 --- /dev/null +++ b/web-crawler/LICENSE @@ -0,0 +1,158 @@ +Mozilla Public License +Version 2.0 +1. Definitions + +1.1. “Contributor” + + means each individual or legal entity that creates, contributes to the creation of, or owns Covered Software. +1.2. “Contributor Version” + + means the combination of the Contributions of others (if any) used by a Contributor and that particular Contributor’s Contribution. +1.3. “Contribution” + + means Covered Software of a particular Contributor. +1.4. “Covered Software” + + means Source Code Form to which the initial Contributor has attached the notice in Exhibit A, the Executable Form of such Source Code Form, and Modifications of such Source Code Form, in each case including portions thereof. +1.5. “Incompatible With Secondary Licenses” + + means + + that the initial Contributor has attached the notice described in Exhibit B to the Covered Software; or + + that the Covered Software was made available under the terms of version 1.1 or earlier of the License, but not also under the terms of a Secondary License. + +1.6. “Executable Form” + + means any form of the work other than Source Code Form. +1.7. “Larger Work” + + means a work that combines Covered Software with other material, in a separate file or files, that is not Covered Software. +1.8. “License” + + means this document. +1.9. “Licensable” + + means having the right to grant, to the maximum extent possible, whether at the time of the initial grant or subsequently, any and all of the rights conveyed by this License. +1.10. “Modifications” + + means any of the following: + + any file in Source Code Form that results from an addition to, deletion from, or modification of the contents of Covered Software; or + + any new file in Source Code Form that contains any Covered Software. + +1.11. “Patent Claims” of a Contributor + + means any patent claim(s), including without limitation, method, process, and apparatus claims, in any patent Licensable by such Contributor that would be infringed, but for the grant of the License, by the making, using, selling, offering for sale, having made, import, or transfer of either its Contributions or its Contributor Version. +1.12. “Secondary License” + + means either the GNU General Public License, Version 2.0, the GNU Lesser General Public License, Version 2.1, the GNU Affero General Public License, Version 3.0, or any later versions of those licenses. +1.13. “Source Code Form” + + means the form of the work preferred for making modifications. +1.14. “You” (or “Your”) + + means an individual or a legal entity exercising rights under this License. For legal entities, “You” includes any entity that controls, is controlled by, or is under common control with You. For purposes of this definition, “control” means (a) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (b) ownership of more than fifty percent (50%) of the outstanding shares or beneficial ownership of such entity. + +2. License Grants and Conditions +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, non-exclusive license: + + under intellectual property rights (other than patent or trademark) Licensable by such Contributor to use, reproduce, make available, modify, display, perform, distribute, and otherwise exploit its Contributions, either on an unmodified basis, with Modifications, or as part of a Larger Work; and + + under Patent Claims of such Contributor to make, use, sell, offer for sale, have made, import, and otherwise transfer either its Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution become effective for each Contribution on the date the Contributor first distributes such Contribution. +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under this License. No additional rights or licenses will be implied from the distribution or licensing of Covered Software under this License. Notwithstanding Section 2.1(b) above, no patent license is granted by a Contributor: + + for any code that a Contributor has removed from Covered Software; or + + for infringements caused by: (i) Your and any other third party’s modifications of Covered Software, or (ii) the combination of its Contributions with other software (except as part of its Contributor Version); or + + under Patent Claims infringed by Covered Software in the absence of its Contributions. + +This License does not grant any rights in the trademarks, service marks, or logos of any Contributor (except as may be necessary to comply with the notice requirements in Section 3.4). +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to distribute the Covered Software under a subsequent version of this License (see Section 10.2) or under the terms of a Secondary License (if permitted under the terms of Section 3.3). +2.5. Representation + +Each Contributor represents that the Contributor believes its Contributions are its original creation(s) or it has sufficient rights to grant the rights to its Contributions conveyed by this License. +2.6. Fair Use + +This License is not intended to limit any rights You have under applicable copyright doctrines of fair use, fair dealing, or other equivalents. +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted in Section 2.1. +3. Responsibilities +3.1. Distribution of Source Form + +All distribution of Covered Software in Source Code Form, including any Modifications that You create or to which You contribute, must be under the terms of this License. You must inform recipients that the Source Code Form of the Covered Software is governed by the terms of this License, and how they can obtain a copy of this License. You may not attempt to alter or restrict the recipients’ rights in the Source Code Form. +3.2. Distribution of Executable Form + +If You distribute Covered Software in Executable Form then: + + such Covered Software must also be made available in Source Code Form, as described in Section 3.1, and You must inform recipients of the Executable Form how they can obtain a copy of such Source Code Form by reasonable means in a timely manner, at a charge no more than the cost of distribution to the recipient; and + + You may distribute such Executable Form under the terms of this License, or sublicense it under different terms, provided that the license for the Executable Form does not attempt to limit or alter the recipients’ rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, provided that You also comply with the requirements of this License for the Covered Software. If the Larger Work is a combination of Covered Software with a work governed by one or more Secondary Licenses, and the Covered Software is not Incompatible With Secondary Licenses, this License permits You to additionally distribute such Covered Software under the terms of such Secondary License(s), so that the recipient of the Larger Work may, at their option, further distribute the Covered Software under the terms of either this License or such Secondary License(s). +3.4. Notices + +You may not remove or alter the substance of any license notices (including copyright notices, patent notices, disclaimers of warranty, or limitations of liability) contained within the Source Code Form of the Covered Software, except that You may alter any license notices to the extent required to remedy known factual inaccuracies. +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, indemnity or liability obligations to one or more recipients of Covered Software. However, You may do so only on Your own behalf, and not on behalf of any Contributor. You must make it absolutely clear that any such warranty, support, indemnity, or liability obligation is offered by You alone, and You hereby agree to indemnify every Contributor for any liability incurred by such Contributor as a result of warranty, support, indemnity or liability terms You offer. You may include additional disclaimers of warranty and limitations of liability specific to any jurisdiction. +4. Inability to Comply Due to Statute or Regulation + +If it is impossible for You to comply with any of the terms of this License with respect to some or all of the Covered Software due to statute, judicial order, or regulation then You must: (a) comply with the terms of this License to the maximum extent possible; and (b) describe the limitations and the code they affect. Such description must be placed in a text file included with all distributions of the Covered Software under this License. Except to the extent prohibited by statute or regulation, such description must be sufficiently detailed for a recipient of ordinary skill to be able to understand it. +5. Termination + +5.1. The rights granted under this License will terminate automatically if You fail to comply with any of its terms. However, if You become compliant, then the rights granted under this License from a particular Contributor are reinstated (a) provisionally, unless and until such Contributor explicitly and finally terminates Your grants, and (b) on an ongoing basis, if such Contributor fails to notify You of the non-compliance by some reasonable means prior to 60 days after You have come back into compliance. Moreover, Your grants from a particular Contributor are reinstated on an ongoing basis if such Contributor notifies You of the non-compliance by some reasonable means, this is the first time You have received notice of non-compliance with this License from such Contributor, and You become compliant prior to 30 days after Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent infringement claim (excluding declaratory judgment actions, counter-claims, and cross-claims) alleging that a Contributor Version directly or indirectly infringes any patent, then the rights granted to You by any and all Contributors for the Covered Software under Section 2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all end user license agreements (excluding distributors and resellers) which have been validly granted by You or Your distributors under this License prior to termination shall survive termination. +6. Disclaimer of Warranty + +Covered Software is provided under this License on an “as is” basis, without warranty of any kind, either expressed, implied, or statutory, including, without limitation, warranties that the Covered Software is free of defects, merchantable, fit for a particular purpose or non-infringing. The entire risk as to the quality and performance of the Covered Software is with You. Should any Covered Software prove defective in any respect, You (not any Contributor) assume the cost of any necessary servicing, repair, or correction. This disclaimer of warranty constitutes an essential part of this License. No use of any Covered Software is authorized under this License except under this disclaimer. +7. Limitation of Liability + +Under no circumstances and under no legal theory, whether tort (including negligence), contract, or otherwise, shall any Contributor, or anyone who distributes Covered Software as permitted above, be liable to You for any direct, indirect, special, incidental, or consequential damages of any character including, without limitation, damages for lost profits, loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses, even if such party shall have been informed of the possibility of such damages. This limitation of liability shall not apply to liability for death or personal injury resulting from such party’s negligence to the extent applicable law prohibits such limitation. Some jurisdictions do not allow the exclusion or limitation of incidental or consequential damages, so this exclusion and limitation may not apply to You. +8. Litigation + +Any litigation relating to this License may be brought only in the courts of a jurisdiction where the defendant maintains its principal place of business and such litigation shall be governed by laws of that jurisdiction, without reference to its conflict-of-law provisions. Nothing in this Section shall prevent a party’s ability to bring cross-claims or counter-claims. +9. Miscellaneous + +This License represents the complete agreement concerning the subject matter hereof. If any provision of this License is held to be unenforceable, such provision shall be reformed only to the extent necessary to make it enforceable. Any law or regulation which provides that the language of a contract shall be construed against the drafter shall not be used to construe this License against a Contributor. +10. Versions of the License +10.1. New Versions + +Mozilla Foundation is the license steward. Except as provided in Section 10.3, no one other than the license steward has the right to modify or publish new versions of this License. Each version will be given a distinguishing version number. +10.2. Effect of New Versions + +You may distribute the Covered Software under the terms of the version of the License under which You originally received the Covered Software, or under the terms of any subsequent version published by the license steward. +10.3. Modified Versions + +If you create software not governed by this License, and you want to create a new license for such software, you may create and use a modified version of this License if you rename the license and remove any references to the name of the license steward (except to note that such modified license differs from this License). +10.4. Distributing Source Code Form that is Incompatible With Secondary Licenses + +If You choose to distribute Source Code Form that is Incompatible With Secondary Licenses under the terms of this version of the License, the notice described in Exhibit B of this License must be attached. +Exhibit A - Source Code Form License Notice + + This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +If it is not possible or desirable to put the notice in a particular file, then You may include the notice in a location (such as a LICENSE file in a relevant directory) where a recipient would be likely to look for such a notice. + +You may add additional accurate notices of copyright ownership. +Exhibit B - “Incompatible With Secondary Licenses” Notice + + This Source Code Form is “Incompatible With Secondary Licenses”, as defined by the Mozilla Public License, v. 2.0. diff --git a/web-crawler/Metadata.py b/web-crawler/Metadata.py new file mode 100755 index 00000000..40e79893 --- /dev/null +++ b/web-crawler/Metadata.py @@ -0,0 +1,111 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +from generic_data_fetcher import GenericDataFetcher +from CrawlerDatabase import CrawlerDatabase +import urllib2 +from Utils import Logger, Configuration, Sources +import fileinput +from shutil import move +from subprocess import Popen +import glob +from calendar import monthrange +from datetime import date, timedelta +from RepositoryCrawler import GithubArchiveStats + +class FlossmoleDataFetcher(GenericDataFetcher): + def __init__(self, source): + super(FlossmoleDataFetcher, self).__init__(source) + if source == Sources.Github: + self.short_name = "gh" + self.directory = Configuration.flossmole_file_path + "/gh" + + def run(self): + self.fetchFiles() + self.deleteExistingTables() + self.latestFiles() + self.runScripts() + + + # Fetches from Flossmole files of a specific forge; it searches for current year and, if not found for previous year files + # it decompresses them 'bzip2 -d ...' and deletes the compressed files + def fetchFiles(self): + logger = Logger.getInstance() + logger.info("About to fetch files from Flossmole." + self.short_name) + #Flossmole stores files per year /2014 and per batch /2014/2014-Jan + #ENG MdG marzo 2020 current_year = date.today().year + current_year = 2013 + #In each batch there are some tab-delimited files and some sql files bz2 compressed + base_url = "http://flossdata.syr.edu/data/" + common_parameters = " -r -np -nH -N --cut-dirs=5 -o log -A sql.bz2" + + proc = Popen("wget " + base_url + self.short_name + "/" + str(current_year) + common_parameters, shell=True, cwd=self.directory) + return_code = proc.wait() + if return_code == 8: + #If I got nothing this year I must go to previous year + proc = Popen("wget " + base_url + self.short_name + "/" + str(current_year - 1) + common_parameters, shell=True, cwd=self.directory) + return_code = proc.wait() + if return_code == 8: + logger.warning("FlossmoleForge has not found files for forge \"" + self.short_name + "\" ") + #If there are files I will decompress them + dl_files = glob.glob(self.directory + "/*sql.bz2") + for dl_file in dl_files: + # I decompress them -d and keep them -k so that they are not downloaded again + proc = Popen("bzip2 -d -k " + dl_file, shell=True, cwd=self.directory) + return_code = proc.wait() + + # looks on local file system for the latest file, if it is compressed it uncompresses it and returns the full path + def latestFiles(self): + months = ['Dec', 'Nov', 'Oct', 'Sep', 'Aug', 'Jul', 'Jun', 'May', 'Apr', 'Mar', 'Feb', 'Jan'] + #ENG marzo 2020 current_year = date.today().year + current_year = 2013 + recent_year_files = [] + self.latest_files = [] + #I search for files issued in the most recent year; I start from current year and proceed backwards in 2013 + while recent_year_files == []: + recent_year_files = glob.glob(self.directory + "/*" + str(current_year) + "*sql") + current_year -= 1 + if len(recent_year_files) > 0: + current_month = 0 + while self.latest_files == [] and current_month < 12: + for sql_file in recent_year_files: + if months[current_month] in sql_file: + self.latest_files.append(sql_file) + current_month += 1 + return self.latest_files + + def deleteExistingTables(self): + # script from Flossmole contains "CREATE TABLE" without checking if the table exists and without dropping it; hence I need to delete them every time + cursor = CrawlerDatabase.execute_cursor("select TABLE_NAME from information_schema.tables WHERE TABLE_NAME like '" + self.short_name + "%'; ") + results = cursor.fetchall() + for record in results: + CrawlerDatabase.execute("DROP TABLE " + record[0], True) + + def runScripts(self): + #first of all I check to see whether a "structure" file that has to be executed first + for sql_file in self.latest_files: + if "structure" in sql_file: + CrawlerDatabase.source(sql_file) + for sql_file in self.latest_files: + if (not "structure" in sql_file): + CrawlerDatabase.source(sql_file) + +class GithubDataFetcher(FlossmoleDataFetcher): + def __init__(self): + super(GithubDataFetcher, self).__init__(Sources.Github) + + def run(self): + super(GithubDataFetcher, self).run() + gas = GithubArchiveStats() + +class DataFetcher(FlossmoleDataFetcher): + def __init__(self): + super(GithubDataFetcher, self).__init__(Sources.Github) + + def run(self): + super(GithubDataFetcher, self).run() + gas = GithubArchiveStats() + diff --git a/web-crawler/Notifier.py b/web-crawler/Notifier.py new file mode 100755 index 00000000..b3574878 --- /dev/null +++ b/web-crawler/Notifier.py @@ -0,0 +1,121 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +from Utils import Logger, Configuration, States, Sources +from CrawlerDatabase import CrawlerDatabase +from dw_batch import DwBatch +import urllib2, urllib +import traceback +import notify2 +import os +import requests +from Metadata import Metadata + +#MDG 2021 +import json + + + +class Notifier(): + + + def run(self): + logger = Logger.getInstance() + page = '' + path = "http://127.0.0.1:6543/values" + listOfProject = [] + #path='http://' + Configuration.KAaddress + ':' + Configuration.KAport + '/values' + #MDG on march 2021: al momento abbiamo solo il caso di git + parameters = { + 'idWFState': States.Integrated + } + try: + CrawlerDatabase.connect() + + except mysql.connector.Error as err: + print("Something went wrong: {}".format(err)) + + #print "estraggo i miei metadata di idProject and updated=0" + + try: + headers = { + 'Content-Type': 'application/json' + + } + postdata = {} + cursor = CrawlerDatabase.execute_cursor("SELECT idProject,project, description, versionUrl,version, deploymentUrl, url, idDWBatch,updated FROM MetadataProject where updated=0") + results = cursor.fetchall() + #print results + if (results == ()): + logger.info("There is no metadataproject to send. Please, wait.....") + #Attenzione : questa operazione non deve essere simulata la risposta REST + #Spezzare il metodo in due parti restituire True o False o array vuoto + #se non ci sono i metadati allora il codice continua + #altrimenti gestisce con il rest + #return + else: + for record in results: + metadata = { + 'idProject' : str(record[0]), + 'name' : str(record[1]), + 'description' : str(record[2]), + 'versionUrl' : str(record[3]), + 'version' : str(record[4]), + 'deploymentUrl' : str(record[5]), + 'url' : str(record[6]), + 'idDWBatch' : str(record[7]), + 'updated' : str(record[8]) + } + ''' + postdata = json.dumps(metadata) + request=urllib2.Request(path, str(postdata)) + request.add_header("Content-type", "application/x-www-form-urlencoded") + page=urllib2.urlopen(request).read() + ''' + listOfProject.append(metadata) + l = len (listOfProject) + + #soglia_max = 100 + soglia_max = 5 + num = l/soglia_max + resto = l%soglia_max + p = [[]] + i = 0 + num = num+1; + int_max =0; + while (i 0: + gh = GitHubDataFetcher() + gh.run() + gh.batch.complete() + end = time.time() + print('Time taken to load github fetcher into crawler db is : ', str(end - start)) + #logger.info("Fetching data from GitHubArchive through RepositoryCrawler") + ''' + start = time.time() + ghi = GithubIntegrator() + if hasattr(ghi, 'batch') and not (ghi.batch is None): + ghi.integrate() + ghi.limitBatchLength() + + end = time.time() + print('Time taken to load into MetadataProject table is : ', str(end - start)) + ''' + # APACHE + + if Configuration.apache_every_n_days > 0: + logger.info("Starting ApacheDataFetcher") + adf = ApacheDataFetcher() + #logger.info("ci arrivi qu iprima del run?") + adf.run() + adf.batch.complete() + ai = ApacheIntegrator() + if hasattr(ai, 'batch') and not (ai.batch is None): + ai.integrate() + ai.limitBatchLength() + + #JQueryPlugin + if Configuration.jqueryplugin_every_n_days > 0: + logger.info("Starting JQueryPluginDataFetcher") + jq = JQueryPluginDataFetcher(Sources.JQueryPlugin) + jq.run() + jq.batch.complete() + jqi = JQueryPluginIntegrator() + if hasattr(jqi, 'batch') and not (jqi.batch is None): + jqi.integrate() + jqi.limitBatchLength() + + except Exception, ex: + #logging.error(str(ex)) + logger.info(str(ex)) + + +Orchestrator() diff --git a/web-crawler/README b/web-crawler/README new file mode 100755 index 00000000..22404339 --- /dev/null +++ b/web-crawler/README @@ -0,0 +1,11 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL +MARKOS Crawler has been realized for the MARKOS EU Project (2003-2005) +MARKOS Crawler is updated for the MORPHEMIC project by ENG team. +create under hme directory : $HOME/markos/markos02/github +Example: /home/ubuntu/markos/markos02/github +On july 2020 the flossmole forge has been dismissed , so the ENG team +decide to dismiss flossmole from MARKOS Crawler for the MORPHEMIC project. \ No newline at end of file diff --git a/web-crawler/RepositoryCrawler.py b/web-crawler/RepositoryCrawler.py new file mode 100755 index 00000000..5b5c26ec --- /dev/null +++ b/web-crawler/RepositoryCrawler.py @@ -0,0 +1,268 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +from Utils import Configuration, Logger, Sources +from datetime import date, timedelta +from CrawlerDatabase import CrawlerDatabase +from subprocess import Popen +import glob +from calendar import monthrange +import json +import os +import traceback +import time + +from generic_data_fetcher import GenericDataFetcher +from __builtin__ import False + +from webob import Response, exc,request +from cornice import Service, validators +import uuid +import sys +from pyramid.renderers import render +from webob.request import Request + +sys.path.append('..') + +crawler = Service(name='sendmessage', path='/crawler', description="send message") + +class RepositoryCrawler(GenericDataFetcher): + ''' + + ''' + def __init__(self): + super(RepositoryCrawler, self).__init__(Sources.Github) + + def run(self): + ''' + + ''' + + months_of_stats = 0 + dt = date.today() + while months_of_stats < Configuration.github_archive_months: + dt1 = dt.replace(day=1) #go to first day of month + dt = dt1 - timedelta(days=1) #back one day so I get previous month + year_minus_cursor = dt.year + month_minus_cursor = dt.month + gas = GithubArchiveStats(year_minus_cursor, month_minus_cursor) + #do I have data for this month + parameters = { + 'stat_year': year_minus_cursor, + 'stat_month': month_minus_cursor + } + if CrawlerDatabase.select_int("SELECT COUNT(*) FROM rc_gh_archive WHERE stat_year=%(stat_year)s AND stat_month=%(stat_month)s", parameters) > 0: + months_of_stats = months_of_stats + 1 + elif gas.filesAvailable(): + gas.fetchFiles() + gas.processFiles() + months_of_stats = months_of_stats + 1 + #process donwloaded file finisched + #Crawler send a message to knowledgebase : ready + #self.send_message() + + + + + +class GithubArchiveStats(): + """ + We fetch statistics from http://www.githubarchive.org/ + an instance is taking care of a specific month in a year + """ + def __init__(self, y, m): + self.y = y + self.m = m + + self.mm = "%02d" % m + #self.idDWBatch = self.batch.id_batch + dt = date.today() + dt1 = dt.replace(day=1) #go to first day of month + dt = dt1 - timedelta(days=31) #add 31 days so I go to next month + self.mm_next_month = "%02d" % dt.month + self.yyyy_next_month = "%02d" % dt.year + + + @staticmethod + def statsAvailable(): + """ + Returns true if there are N months of statistics in the local database out of the last N+1 months + where N = Configuration.github_archive_months + we look back N+1 months because testermonth's statistics will not be ready the during the first days + of the month; so it is ok to have the last N available even if yestermonth is not there + """ + logger = Logger.getInstance() + months_of_stats = 0 + how_many = 0 + date_cursor = date.today() + while months_of_stats <= Configuration.github_archive_months: + dt1 = date_cursor.replace(day=1) #go to first day of month + date_cursor = dt1 - timedelta(days=1) #back one day so I get previous month + year_minus_cursor = date_cursor.year + month_minus_cursor = date_cursor.month + #do I have data for this month + parameters = { + 'stat_year': year_minus_cursor, + 'stat_month': month_minus_cursor + } + + if CrawlerDatabase.select_int("SELECT COUNT(*) FROM rc_gh_archive WHERE stat_year=%(stat_year)s AND stat_month=%(stat_month)s", parameters) > 0: + how_many = how_many + 1 + months_of_stats = months_of_stats + 1 + logger.debug("GithubArchiveStats.statsAvailable: Do we have any stats to process?" + str(how_many >= Configuration.github_archive_months)) ## DEBUG + return how_many >= Configuration.github_archive_months + + + def filesAvailable(self): + """ + Are files available at http://data.githubarchive.org + I assume all files for a month are available if first file of next month is available + """ + proc = Popen("wget http://data.githubarchive.org/" + self.yyyy_next_month + "-" + self.mm_next_month + "-01-0.json.gz", shell=True, cwd=Configuration.temporary_directory) + return_code = proc.wait() + if return_code == 8: + return False + return True + + def fetchFiles(self): + """ + Files are per hour with name: YEAR-MONTH-DAY-HOUR.json.gz + """ + logger = Logger.getInstance() + #Pavia: for day_iter in range(1, monthrange(self.y, self.m)[1] + 1): #number of days in this month + for day_iter in range(1, 3): #number of days in this month + + #Pavia: for hour_iter in range(24): + for hour_iter in range(10, 12): + sz_day = "%02d" % day_iter + sz_hour = str(hour_iter) + + if not os.path.isfile(Configuration.github_file_path + "/gh/" + str(self.y) + "-" + self.mm + "-" + sz_day + "-" + sz_hour + ".json.gz"): + proc = Popen("wget http://data.githubarchive.org/" + str(self.y) + "-" + self.mm + "-" + sz_day + "-" + sz_hour + ".json.gz", shell=True, cwd=Configuration.github_file_path + "/gh") + return_code = proc.wait() + if return_code == 8: + logger.error("wget http://data.githubarchive.org/" + str(self.y) + "-" + self.mm + "-" + sz_day + "-" + sz_hour + ".json.gz" + " returned error code 8") + + + + + def processFiles(self): + logger = Logger.getInstance() + + compressed_files = glob.glob(Configuration.github_file_path + "/gh/" + str(self.y) + "-" + self.mm + "*.json.gz") + for compressed_file in compressed_files: + proc = Popen("gunzip " + compressed_file, shell=True, cwd=Configuration.github_file_path + "/gh") + return_code = proc.wait() + + uncompressed_files = glob.glob(Configuration.github_file_path + "/gh/" + str(self.y) + "-" + self.mm + "*.json") + for uncompressed_file in uncompressed_files: + with open(uncompressed_file) as f: + content = f.readlines() + for line in content: + try: + decoded = json.loads(line) + # GistEvent lines have no repository + if decoded["type"] != "GistEvent" : #not interested in Gists + #To speed up testing restrict to ReleaseEvent + #if decoded["type"] == "ReleaseEvent": + repo = decoded["repo"] + + logger.debug("Parsing event type: " + decoded["type"] + " from project: " + repo["name"]) + try: + if decoded["type"] == "RepositoryEvent" and ( decoded["action"] == "created" or decoded["action"] == "edited" or decoded["action"] == "renamed" ): + try: + project_description = decoded["description"] + logger.debug("Found description:" + project_description + " for project: " + repo["name"]) + except: + project_description = "" + else: + project_description = "" + + #print("DEBUG!! processfiles ") + #time.sleep(1.5) + ''' + if decoded["type"] == "PullRequestEvent" : + payload = decoded["payload"] + pull_request = payload["pull_request"] + deployments_url = pull_request["deployments_url"] + license = pull_request["license"] + language = pull_request["language"] + logger.debug("deploy " + deployment_url + " license " + license + " language " + language) + ''' + #Pavia: in questo pezzo di codice incrementa gli eventi relativi ad un progetto gia' conosciuto per il periodo preso in considerazione nelle statistiche + #se in precedenza abbiamo trovato una descrizione del progetto aggiorna il relativo campo + parameters = { + 'project_name': str(repo["name"]), + 'description': str(project_description), + 'stat_year': self.y, + 'stat_month': self.m, + } + #print "parameters" + #print(str(parameters)) + #print "str(idDWBatch)" + #print str(self.idDWBatch) + #print("SELECT count(*) from rc_gh_archive WHERE project_name = '" + str(repo["name"]) + "' AND idGhProject >0 ") + #if CrawlerDatabase.select_int("SELECT count(*) from rc_gh_archive WHERE project_name = '" + str(repo["name"]) + "' AND idGhProject >0 ") > 0 : + #Mria February 2021 limitiamo il dowmload dei progetti da GitHub solo per ReleaseEvent + if decoded["type"] == "ReleaseEvent": + + if CrawlerDatabase.select_int("SELECT COUNT(*) FROM rc_gh_archive WHERE stat_year=%(stat_year)s AND stat_month=%(stat_month)s AND project_name=%(project_name)s", parameters) > 0: + if parameters['description'] == "": #if description is empty I do not overwrite it as it might have been there in other events + CrawlerDatabase.execute("UPDATE rc_gh_archive SET event_count=event_count+1 WHERE stat_year=%(stat_year)s AND stat_month=%(stat_month)s AND project_name=%(project_name)s", False, parameters) + else: + CrawlerDatabase.execute("UPDATE rc_gh_archive SET description=%(description)s, event_count=event_count+1 WHERE stat_year=%(stat_year)s AND stat_month=%(stat_month)s AND project_name=%(project_name)s", False, parameters) + else: + #Maria febbratio 2021 : Insert non funziona sull achiave primaria project_name quando + #con diversa release developername + #USiamo inert in diverso modo : INSERT INTO ins_duplicate VALUES (4,'Gorilla') ON DUPLICATE KEY UPDATE animal='Gorilla'; + #CrawlerDatabase.execute("INSERT INTO rc_gh_archive (project_name, description, event_count, stat_year, stat_month) VALUES (%(project_name)s, %(description)s, 1, %(stat_year)s, %(stat_month)s) ON DUPLICATE KEY UPDATE project_name=%(project_name)s", False, parameters) + CrawlerDatabase.execute("INSERT INTO rc_gh_archive (project_name, description, event_count, stat_year, stat_month) VALUES (%(project_name)s, %(description)s, 1, %(stat_year)s, %(stat_month)s)", True, parameters) + + #Pavia: se l'evento e' di tipo ReleaseEvent, qui parsiamo le informazioni necessarie per popolare la rc_gh_archive_release + #Commento di Maria February 2021 al momento prendiamo solo progetti della ReleaseEvent + #limitiamo il donwload dei progetti da GitHub + #if decoded["type"] == "ReleaseEvent": + #Pavia: l'"url" e l'"id" della release ora li troviamo sotto payload->release + payload = decoded["payload"] + release = payload["release"] + developer_name = '' + developer_name = repo["name"].rsplit("/",1)[0] + #print developer_name + parameters = { + 'project_name': str(repo["name"]), + 'developer_name': str(repo["name"].rsplit("/",1)[0]), + 'url': str(release["url"]), + 'version': str(release["id"]) + } + + #print("Found release event for project: " + repo["name"] + ", release id: " + str(release["id"]) + ", release url: " + release["url"]) + #print ("SELECT count(*) FROM rc_gh_archive_release WHERE project_name = " + repo["name"] + " AND version = " + release["id"]+ " AND developer_name=" + developer_name) + #sqlstring = "SELECT count(*) FROM rc_gh_archive_release a , rc_gh_archive b WHERE a.project_name = '" + repo["name"] + "'" + "AND a.project_name = b.project_name " + #sqlstring = sqlstring + " AND a.version ='" + str(release['id']) + "' AND a.developer_name = '" + str(repo["name"].rsplit("/",1)[0]) + "'" + #print "=======================================================" + #print sqlstring + #print "=======================================================" + + #if CrawlerDatabase.select_int("SELECT count(*) FROM rc_gh_archive_release WHERE project_name = '" + repo["name"] + "' AND version = '" + release["id"]+ "' AND developer_name='" + developer_name + "'") == 0: + # CrawlerDatabase.execute("UPDATE rc_gh_archive_release SET project_name=%(project_name)s AND version=%(version)s AND developer_name = %(developer_name)s AND url=%(url)s", False, parameters) + #else: + #try: + # CrawlerDatabase.execute("INSERT INTO rc_gh_archive_release (project_name, url, version, developer_name) VALUES (%(project_name)s, %(url)s, %(version)s, %(developer_name)s) ", True, parameters) + #except: + # logger.error("Error INSERTING INTO rc_gh_archive_release: " + str(parameters)) + #else: + #logger.info("WARNING: The github project " + str(repo["name"]) + " with release " + str(release["id"]) + " for the developer name " + str(repo["name"].rsplit("/",1)[0]) + " exist.") + + logger.debug("Found release event for project: " + repo["name"] + ", release id: " + str(release["id"]) + ", release url: " + release["url"]) + CrawlerDatabase.execute("INSERT INTO rc_gh_archive_release (project_name, url, version, developer_name) VALUES (%(project_name)s, %(url)s, %(version)s, %(developer_name)s) ON DUPLICATE KEY UPDATE url = %(url)s, version=%(version)s, developer_name=%(developer_name)s", True, parameters) + except Exception, ex: + logger.error(traceback.format_exc()) + + #logger.error(str(ex)) + # do nothing; sometimes repository is missing + + except Exception, ex: + logger.info(str(ex) + " missing in " + line) diff --git a/web-crawler/RepositoryCrawlerThread.py b/web-crawler/RepositoryCrawlerThread.py new file mode 100755 index 00000000..f1527284 --- /dev/null +++ b/web-crawler/RepositoryCrawlerThread.py @@ -0,0 +1,37 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +#from RepositoryCrawler import RepositoryCrawler +from generic_data_fetcher import GenericDataFetcher +#from Integrator import GithubIntegrator +from github_data_fetcher import GitHubDataFetcher +from Utils import Configuration, Logger, Sources +import time +from CrawlerDatabase import CrawlerDatabase +import logging + +gh = GitHubDataFetcher() +logger = Logger.getInstance() +while True: + try: + + if Configuration.github_every_n_days > 0: + logger.info("Fetching data from GitHubArchive through RepositoryCrawler") + #before to integrate in the MetadataProject , the crawler should be send to the Analyser + #that metadataproject is ready ... + gh.run() + gh.batch.complete() + + sleep_time = Configuration.repository_crawler_sleep_time + logger.info("Repository Crawler about to sleep for " + str(sleep_time) + " seconds.") + time.sleep(sleep_time) + # to avoid 2006 'MySQL server has gone away' issue + CrawlerDatabase.connect() + logger.info("Repository Crawler waking up after " + str(sleep_time) + " seconds.") + + except Exception, ex: + logger.error(str(ex)) + \ No newline at end of file diff --git a/web-crawler/TestApacheRepositoryCrawler.py b/web-crawler/TestApacheRepositoryCrawler.py new file mode 100755 index 00000000..4a924f4b --- /dev/null +++ b/web-crawler/TestApacheRepositoryCrawler.py @@ -0,0 +1,82 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +from Utils import Configuration, Logger +from datetime import date, timedelta +from CrawlerDatabase import CrawlerDatabase +from subprocess import Popen +import glob +from calendar import monthrange +import json +import os + +# This is the class we want to test. So, we need to import it +import Orchestrator as OrchestratorClass +import dw_batch + +class Test(unittest.TestCase): + """ + The basic class that inherits unittest.TestCase + """ + orchestrator = OrchestratorClass.Orchestrator() # instantiate the Orchestrator Class + + # test case function + def test_0(self): + + ai = ApacheIntegrator() + if hasattr(ai, 'batch') and not (ai.batch is None): + ai.integrate() + ai.limitBatchLength() + + listOfProjects = [] + IdBatch = 6 + + dwb = DwBatch() + #I search the batch and get the source out of it + dwb.load(IdBatch) #load from db + #I create the information source + info_source = InformationSource(Sources.names[dwb.id_source]) + + #I search all the doaps in it and add the information source to them + parameters = { 'idDWBatch': dwb.id_batch } + cursor = CrawlerDatabase.execute_cursor("SELECT d.idDoap, p.idProject, p.Name, p.Homepage FROM Doap d JOIN Project p ON d.idProject=p.idProject WHERE idDWBatch=%(idDWBatch)s", parameters) + results = cursor.fetchall() + for record in results: + cp = CrawlerProject() + cp.id_project = record[1] + cp.name = record[2] + cp.homepage = record[3] + #I add to cp the doap + dp = DoapProject() + dp.load_from_db(record[0]) + dp.source = info_source + cp.doap.append(dp) + #I search all the doaps in the same project from a different source e.g. not in this batch + ids_string = "" + batch_id_other_sources = dwb.latest_batches(True) + all_ids = [] + for i in range(1, Sources.count + 1): + if i != dwb.id_source: + all_ids = all_ids + batch_id_other_sources[i] + if len(all_ids) > 0: + ids_string = "AND d.idDWBatch IN (" + ', '.join(all_ids) + ")" + parameters = { 'idProject' : cp.id_project } + cursor_other_doaps = CrawlerDatabase.execute_cursor("SELECT d.idDoap, dwb.idSource FROM Doap d JOIN DWBatch dwb ON d.idDWBatch=dwb.idDWBatch WHERE d.idProject=%(idProject)s " + ids_string, parameters) + results_other_doaps = cursor_other_doaps.fetchall() + for record_other_doaps in results_other_doaps: + #I add to cp the doap + dp = DoapProject() + dp.load_from_db(record_other_doaps[0]) + dp.source = InformationSource(Sources.names[record_other_doaps[1]]) + cp.doap.append(dp) + + listOfProjects.append(cp) + a = TrivialJSONEncoder().encode(listOfProjects) + print "This is only test. The result is : " + print a +if __name__ == '__main__': + # begin the unittest.main() + unitest.main() diff --git a/web-crawler/Utils.py b/web-crawler/Utils.py new file mode 100755 index 00000000..c5153751 --- /dev/null +++ b/web-crawler/Utils.py @@ -0,0 +1,390 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +import logging +import logging.handlers +import base64 +import datetime +import ConfigParser +import json +import os +import sys +from json import JSONEncoder +from datetime import date +import urllib2 + + +class Logger(): + + __instance = None + __logLevel = None + __stdout = False + + + @staticmethod + def getInstance (): + """Static Access Method""" + + if Logger.__instance is None: + Logger() + + return Logger.__instance + + def __init__(self): + """virtual private constructor""" + if Logger.__instance is None: + Logger.__instance = self + Logger.__instance.__setParameters__ () + else: + raise Exception ("This class is a singleton class !") + + def __setParameters__(self): + logLevels = { + "CRITICAL": [logging.CRITICAL,5], + "FATAL": [logging.FATAL,4], + "ERROR": [logging.ERROR,3], + "WARNING": [logging.WARN,2], + "INFO": [logging.INFO,1], + "DEBUG": [logging.DEBUG,0] + } + logLevelArray = logLevels.get(Configuration.logging_level,[logging.DEBUG,0]) + self.__logLevel = logLevelArray [1] + logging.basicConfig(level=logLevelArray [0], format='%(asctime)s %(levelname)8s %(message)s', filename=Configuration.log_path, filemode='a') + self.__stdout = Configuration.stdout + + #logging.getLogger( ).setLevel(Configuration.logging_level) + # html monitor is a web page written by all processes so that the user is informed almost in real time on the status + # it's written in a subdir 'crawler' of Configuration.path_html + + if not os.path.isdir(Configuration.path_html + '/crawler'): + os.makedirs(Configuration.path_html + '/crawler') + index_file = open(Configuration.path_html + '/crawler/index.html',"w") + index_file.write("\n MARKOS crawler status\n") + index_file.write(" \n") + index_file.write(" \n") + index_file.write(" \n") + index_file.write(" \n") + index_file.write(" \n") + index_file.write(" ") + index_file.close() + header_file = open(Configuration.path_html + '/crawler/header.html',"w") + header_file.write("header") + header_file.close() + fetcher_file = open(Configuration.path_html + '/crawler/fetcher.html',"w") + fetcher_file.write("fetcher") + fetcher_file.close() + notifier_file = open(Configuration.path_html + '/crawler/notifier.html',"w") + notifier_file.write("notifier") + notifier_file.close() + #handler = logging.handlers.RotatingFileHandler('/tmp/markos_crawler', maxBytes=1000000, backupCount=5) + # create formatter + #formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + # add formatter to ch + #handler.setFormatter(formatter) + #logger.addHandler(handler) + logging.info('Initializing logger') + + + + def debug(self,msg): + if self.__stdout and self.__logLevel<=0: + print ('DEBUG: '+ msg) + #BUG logger does not remember its level; it remembers its file path though + #logging.getLogger( ).setLevel(Configuration.logging_level) + logging.getLogger().debug(msg) + + def error(self,msg): + if self.__stdout and self.__logLevel<=3: + print ('ERROR: '+ msg) + #BUG logger does not remember its level; it remembers its file path though + #logging.getLogger( ).setLevel(Configuration.logging_level) + logging.error(msg) + + def info(self,msg): + if self.__stdout and self.__logLevel<=1: + print ('INFO: '+msg) + #BUG logger does not remember its level; it remembers its file path though + #logging.getLogger( ).setLevel(Configuration.logging_level) + logging.getLogger( ).info(msg) + + def critical(self,msg): + if self.__stdout and self.__logLevel<=5: + print ('CRITICAL: '+msg) + #BUG logger does not remember its level; it remembers its file path though + #logging.getLogger( ).setLevel(Configuration.logging_level) + logging.critical(msg) + + def fatal(self,msg): + if self.__stdout and self.__logLevel<=4: + print ('FATAL: '+msg) + #BUG logger does not remember its level; it remembers its file path though + #logging.getLogger( ).setLevel(Configuration.logging_level) + logging.fatal(msg) + + + def warn(self,msg): + if self.__stdout and self.__logLevel<=2: + print ('WARN: '+msg) + #BUG logger does not remember its level; it remembers its file path though + #logging.getLogger( ).setLevel(Configuration.logging_level) + logging.warn(msg) + + + def warning(self,msg): + if self.__stdout and self.__logLevel<=2: + print ('WARNING: '+msg) + #BUG logger does not remember its level; it remembers its file path though + #logging.getLogger( ).setLevel(Configuration.logging_level) + logging.warning(msg) + +class DateHelper(): + def __init__(self, stringdate): + try: + #sometimes dates a wrongly written: "1967-7-10" instead of "1967-07-10"; DateHelper normalizes them + yyyy = stringdate[0:4] + mmdd = stringdate[stringdate.index("-")+1:len(stringdate)] + mm = mmdd[0:mmdd.index("-")] + dd = mmdd[mmdd.index("-")+1:len(mmdd)] + self.date = date(int(yyyy), int(mm), int(dd)) + except: + self.date = date(datetime.MINYEAR, 1, 1) + def load_from_timestamp(self, unix_timestamp): + tmpDate = datetime.datetime.fromtimestamp(int(unix_timestamp)) + yyyy = tmpDate.strftime('%Y') + mm = tmpDate.strftime('%m') + dd = tmpDate.strftime('%d') + self.date = date(int(yyyy), int(mm), int(dd)) + def load_from_jquery_format(self, jquery_format): + # Sep 15 2014 + tmpDate = datetime.datetime.strptime(jquery_format, "%b %d %Y") + yyyy = tmpDate.strftime('%Y') + mm = tmpDate.strftime('%m') + dd = tmpDate.strftime('%d') + self.date = date(int(yyyy), int(mm), int(dd)) + def load_from_SF_format(self, SF_format): + # 2012-05-21T00:54:08.304Z + self.date = datetime.datetime.strptime(SF_format[:19], "%Y-%m-%dT%H:%M:%S") + +class StringHelper(): + @staticmethod + def removeNonAscii(thisString): + return "".join(filter(lambda x: ord(x)<128, thisString)) + @staticmethod + def makeUnicodeSafe(thisString): + ''' + It is probably equivalent to the above method + ''' + while True: + try: + return unicode(thisString) + except UnicodeDecodeError as ex: #UnicodeDecodeError + thisString = thisString[0:ex.start] + thisString[ex.end:] + + + +class StringList(): + separator = " " + + def __init__(self): + self.plain = [] + self.base64_encoded = "" + + def load_plain(self, strings): + self.plain = strings + separator = "" + for s in self.plain: + self.base64_encoded += separator + base64.b64encode(s) + separator = StringList.separator + return self + + def load_base64(self, base64_encoded): + self.base64_encoded = base64_encoded + for s in self.base64_encoded.split(StringList.separator): + self.plain.append(base64.b64decode(s)) + return self + + def remove_empty_strings(self): + # self.load_plain(filter(lambda text: text.strip(), self.plain)) + self.load_plain([text for text in self.plain if text.strip()]) + +class Configuration(): + + #CAaddress = '' + #CAport = '' + + KAaddress = '' + KAport = '' + + MySQLhost = '' + MySQLuser = '' + MySQLpasswd = '' + MySQLdatabase = '' + MySQLcharset = '' + + sleep_time = '' + path_html = '' + notifier_sleep_time = 0 + repository_crawler_sleep_time = 0 + + max_batch_size = 0 + github_archive_months = 0 + github_top_projects_event_count = 0 + sf_updated_days = 0 + + #flossmole_file_path = '' + github_file_path = '' + + apache_every_n_days = 0 + github_every_n_days = 0 + jqueryplugin_every_n_days = 0 + exit_now = False + sf_file_path = '' + stdout = False + + def __init__(self): + Config = ConfigParser.ConfigParser() + + Config.read(os.path.dirname(os.path.realpath(__file__)) + "/config") #file 'config' is in the same folder + + #Configuration.CAaddress = Config.get("CodeAnalyser", "ip_address") + #Configuration.CAport = Config.get("CodeAnalyser", "port") + + Configuration.KAaddress = Config.get("Knowledgebase","ip_address") + Configuration.KAport = Config.get("Knowledgebase","port") + + Configuration.MySQLhost = Config.get("Database", "MySQLhost") + Configuration.MySQLuser = Config.get("Database", "MySQLuser") + Configuration.MySQLpasswd = Config.get("Database", "MySQLpasswd") + Configuration.MySQLdatabase = Config.get("Database", "MySQLdatabase") + Configuration.MySQLcharset = Config.get("Database", "MySQLcharset") + + # If True the Orchestrator will stop at the end of the loop or at the end of the sleep + Configuration.exit_now = Config.getboolean("General", "exit_now") + + # How many seconds the orchestrator will sleep at the end of each iteration + Configuration.sleep_time = Config.getfloat("General", "sleep_time") + + # How many seconds the notifier will sleep at the end of each iteration + Configuration.notifier_sleep_time = Config.getfloat("General", "notifier_sleep_time") + + # How many seconds the repository crawler will sleep at the end of each iteration + Configuration.repository_crawler_sleep_time = Config.getfloat("General", "repository_crawler_sleep_time") + + #Configuration.sf_file_path = Config.get("General", "sf_file_path") + # path to sourceforge files to be imported + + #Configuration.flossmole_file_path = Config.get("General", "flossmole_file_path") + # path to flossmole files to be imported + + #Ferbuary 2021 Now we have github_file_path + Configuration.github_file_path = Config.get("General", "github_file_path") + + Configuration.temporary_directory = Config.get("General", "temporary_directory") + # path to system temporary directory + + #Configuration.max_batch_size = Config.getint("CodeAnalyser", "max_batch_size") + # maximum l""ength of batch to be sent to the code analyser + + Configuration.logging_level = Config.get("Logging", "level") + Configuration.log_path = Config.get("Logging", "path") + Configuration.stdout = Config.getboolean("Logging", "stdout") + Configuration.path_html = Config.get("Logging", "path_html") + # path to html files where monitoring information is written + + Configuration.apache_every_n_days = Config.getint("Fetchers", "apache_every_n_days") + Configuration.github_every_n_days = Config.getint("Fetchers","github_every_n_days") + #Configuration.codeplex_every_n_days = Config.getint("Fetchers", "codeplex_every_n_days") + #Configuration.sourceforge_every_n_days = Config.getint("Fetchers", "sourceforge_every_n_days") + #Configuration.flossmole_every_n_days = Config.getint("Fetchers", "flossmole_every_n_days") + Configuration.jqueryplugin_every_n_days = Config.getint("Fetchers", "jqueryplugin_every_n_days") + #Configuration.eclipse_every_n_days = Config.getint("Fetchers", "eclipse_every_n_days") + + #how many months of statistics should the crawler download and import from githubarchive.org + Configuration.github_archive_months = Config.getint("RepositoryCrawler", "github_archive_months") + + #how many top Github projects counting events (commit, issues change, ...) + Configuration.github_top_projects_event_count = Config.getint("RepositoryCrawler", "github_top_projects_event_count") + + #filter for SourceForge projects; integrated only if they have been updated in the last N days + Configuration.sf_updated_days = Config.getint("RepositoryCrawler", "sf_updated_days") + + #[KnowledgeBase] configuration: + Configuration.web_path_html = Config.get("Knowledgebase","web_path_html") + +class TrivialJSONEncoder(JSONEncoder): + def default(self, o): + return o.__dict__ + +class States(object): + Initial = 1 + Completed = 2 + Integrated = 3 + Cancelled = 4 + Notified = 5 + + + +class Sources(object): + + Github = 1 + Apache = 2 + #SourceForge = 2 + #CodePlex = 3 + #da 4 diventa 2 + + #da 5 diventa 3 + JQueryPlugin = 3 + #Eclipse = 6 + # I also store the number of sources so far implemented in "count" + count = 3 + every_n_days = [0,0,0,0] + names = ["", "", "",""] + def __init__(self): + Sources.every_n_days[Sources.Apache] = Configuration.apache_every_n_days + #Sources.every_n_days[Sources.SourceForge] = Configuration.sourceforge_every_n_days + #Sources.every_n_days[Sources.CodePlex] = Configuration.codeplex_every_n_days + Sources.every_n_days[Sources.Github] = Configuration.github_every_n_days + Sources.every_n_days[Sources.JQueryPlugin] = Configuration.jqueryplugin_every_n_days + #Sources.every_n_days[Sources.Eclipse] = Configuration.eclipse_every_n_days + Sources.names[Sources.Apache] = "Apache" + #Sources.names[Sources.SourceForge] = "SourceForge" + #Sources.names[Sources.CodePlex] = "CodePlex" + Sources.names[Sources.Github] = "Github" + Sources.names[Sources.JQueryPlugin] = "JQueryPlugin" + #Sources.names[Sources.Eclipse] = "Eclipse" + +class UrllibHelper(): + @staticmethod + def urlopen(url_string, max_attempts = 3): + ''' + Sometimes the download just fails for no apparent reason; retrying right after the + failure solves the issue; so this method retries max_attempts times with a default of 3 + ''' + success = False + n_attempts = 1 + ret = "" + while not (success or n_attempts>max_attempts): + try: + response = urllib2.urlopen(url_string) + ret = response.read() + success = True + except Exception as ex: + Logger.error('Error downloading ' + url_string + " - Attempt n.:" + str(n_attempts) + " - " + str(ex)) + n_attempts = n_attempts + 1 + return ret + +class InformationSource: + def __init__(self, name): + self.name = name + +#initialize unique instance of configuration +Configuration() + +# I initialize an instance to have the configuration read in the static variable print Sources.every_n_days +Sources() + +#initialize the logger unique instance +Logger() diff --git a/web-crawler/apache_data_fetcher.py b/web-crawler/apache_data_fetcher.py new file mode 100755 index 00000000..ad66dd5a --- /dev/null +++ b/web-crawler/apache_data_fetcher.py @@ -0,0 +1,59 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL +# Author: Davide Galletti + +from doapfiend.doaplib import load_graph +from generic_data_fetcher import GenericDataFetcher +import urllib2 +from xml.dom import minidom +from CrawlerDatabase import CrawlerDatabase +import base64 +from Utils import Logger, Sources + +class ApacheDataFetcher(GenericDataFetcher): + def __init__(self): + super(ApacheDataFetcher, self).__init__(Sources.Apache) + def run(self): + logger = Logger.getInstance() + #reading the list of doap files + #Maria file.xml don't exist response = urllib2.urlopen('https://svn.apache.org/repos/asf/infrastructure/site-tools/trunk/projects/files.xml') + response = urllib2.urlopen('https://svn.apache.org/repos/asf/comdev/projects.apache.org/trunk/data/projects.xml') + + + #print "after reading list of doap files " + xml = response.read() + xmldoc = xml.dom.minidom.parseString(xml) + itemlist = xmldoc.getElementsByTagName('location') + + # reading + for s in itemlist : + print s.firstChild.data + try: + # getting the rdf-xml from the url + doapurl = urllib2.urlopen(s.firstChild.data) + xml = doapurl.read() + # parsing the rdf + doap = load_graph(xml) + # creating ApacheDoap also saves it to the RAW database + ApacheDoap(doap, xml, self.batch.id_batch) + logger.info("Read " + doap.name + " from " + s.firstChild.data) + except Exception as e: + logger.error('Error loading doap: ' + s.firstChild.data) + logger.error(str(e)) + + +class ApacheDoap(): + def __init__(self, doap, xml, idBatch): + #Maria da errore riformulo l'istruzione CrawlerDatabase.execute("INSERT into RAW_Apache_Project (name, homepage, doapfile, iddwbatch) VALUES ('" + doap.name + "', '" + str(doap.homepage) + "', '" + base64.b64encode(xml) + "', " + str(idBatch) + ")", True) +#'rdfSubject('')' + #le date nel file xml sono a volte caricate in modo incorretto o incompleto , per questo motivo + #decido al momento di inserire IGNORE nell'istruzione di inserimento + #questo pero dovra essere modificato e gestito + #MAria str(doap.homepage) riporta errore MySQL sostituisco con un replace della stringa + #CrawlerDatabase.execute("INSERT into RAW_Apache_Project (Name, Homepage, DoapFile, idDWBatch) VALUES ('" + doap.name + "', '"+ str(doap.homepage).replace("\'","\"") + "', '" + base64.b64encode(xml) + "', " + str(idBatch) + ")", True) + logger.info("qui no ") + CrawlerDatabase.execute("INSERT into RAW_Apache_Project (Name, Homepage, DoapFile, idDWBatch) VALUES ('" + doap.name + "', '"+ str(doap.homepage).replace("\'","\"") + "', '" + base64.b64encode(xml) + "', " + str(idBatch) + ")", True) + \ No newline at end of file diff --git a/web-crawler/api/README.rst b/web-crawler/api/README.rst new file mode 100755 index 00000000..fa00fc4d --- /dev/null +++ b/web-crawler/api/README.rst @@ -0,0 +1,4 @@ +Documentation +============= + +Put a brief description of 'api'. diff --git a/web-crawler/api/api.egg-info/PKG-INFO b/web-crawler/api/api.egg-info/PKG-INFO new file mode 100755 index 00000000..104242a0 --- /dev/null +++ b/web-crawler/api/api.egg-info/PKG-INFO @@ -0,0 +1,19 @@ +Metadata-Version: 1.1 +Name: api +Version: 0.1 +Summary: api +Home-page: UNKNOWN +Author: UNKNOWN +Author-email: UNKNOWN +License: UNKNOWN +Description: Documentation + ============= + + Put a brief description of 'api'. + +Keywords: web services +Platform: UNKNOWN +Classifier: Programming Language :: Python +Classifier: Framework :: Pylons +Classifier: Topic :: Internet :: WWW/HTTP +Classifier: Topic :: Internet :: WWW/HTTP :: WSGI :: Application diff --git a/web-crawler/api/api.egg-info/SOURCES.txt b/web-crawler/api/api.egg-info/SOURCES.txt new file mode 100755 index 00000000..433354c2 --- /dev/null +++ b/web-crawler/api/api.egg-info/SOURCES.txt @@ -0,0 +1,15 @@ +README.rst +setup.py +api/__init__.py +api/crawler.py +api/views.py +api/views19Marzo2021.py +api/viewsOldVersion.py +api.egg-info/PKG-INFO +api.egg-info/SOURCES.txt +api.egg-info/dependency_links.txt +api.egg-info/entry_points.txt +api.egg-info/not-zip-safe +api.egg-info/paster_plugins.txt +api.egg-info/requires.txt +api.egg-info/top_level.txt \ No newline at end of file diff --git a/web-crawler/api/api.egg-info/dependency_links.txt b/web-crawler/api/api.egg-info/dependency_links.txt new file mode 100755 index 00000000..8b137891 --- /dev/null +++ b/web-crawler/api/api.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/web-crawler/api/api.egg-info/entry_points.txt b/web-crawler/api/api.egg-info/entry_points.txt new file mode 100755 index 00000000..afafbf93 --- /dev/null +++ b/web-crawler/api/api.egg-info/entry_points.txt @@ -0,0 +1,3 @@ + [paste.app_factory] + main = api:main + \ No newline at end of file diff --git a/web-crawler/api/api.egg-info/not-zip-safe b/web-crawler/api/api.egg-info/not-zip-safe new file mode 100755 index 00000000..8b137891 --- /dev/null +++ b/web-crawler/api/api.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/web-crawler/api/api.egg-info/paster_plugins.txt b/web-crawler/api/api.egg-info/paster_plugins.txt new file mode 100755 index 00000000..d94f7c98 --- /dev/null +++ b/web-crawler/api/api.egg-info/paster_plugins.txt @@ -0,0 +1 @@ +pyramid diff --git a/web-crawler/api/api.egg-info/requires.txt b/web-crawler/api/api.egg-info/requires.txt new file mode 100755 index 00000000..8b1b5c4a --- /dev/null +++ b/web-crawler/api/api.egg-info/requires.txt @@ -0,0 +1,3 @@ +cornice +PasteScript +waitress diff --git a/web-crawler/api/api.egg-info/top_level.txt b/web-crawler/api/api.egg-info/top_level.txt new file mode 100755 index 00000000..eedd89b4 --- /dev/null +++ b/web-crawler/api/api.egg-info/top_level.txt @@ -0,0 +1 @@ +api diff --git a/web-crawler/api/api.ini b/web-crawler/api/api.ini new file mode 100755 index 00000000..f0d5dc54 --- /dev/null +++ b/web-crawler/api/api.ini @@ -0,0 +1,46 @@ +[app:main] +use = egg:api + +pyramid.reload_templates = true +pyramid.debug_authorization = false +pyramid.debug_notfound = false +pyramid.debug_routematch = false +pyramid.debug_templates = true +pyramid.default_locale_name = en + +[server:main] +use = egg:waitress#main +host = 0.0.0.0 +port = 6543 + + +# Begin logging configuration + +[loggers] +keys = root, api + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = INFO +handlers = console + +[logger_api] +level = DEBUG +handlers = +qualname = api + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(asctime)s %(levelname)-5.5s [%(name)s][%(threadName)s] %(message)s + +# End logging configuration diff --git a/web-crawler/api/api/__init__.py b/web-crawler/api/api/__init__.py new file mode 100755 index 00000000..203933f2 --- /dev/null +++ b/web-crawler/api/api/__init__.py @@ -0,0 +1,10 @@ +"""Main entry point +""" +from pyramid.config import Configurator + + +def main(global_config, **settings): + config = Configurator(settings=settings) + config.include("cornice") + config.scan("api.views") + return config.make_wsgi_app() diff --git a/web-crawler/api/api/views.py b/web-crawler/api/api/views.py new file mode 100755 index 00000000..28b0d927 --- /dev/null +++ b/web-crawler/api/api/views.py @@ -0,0 +1,353 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL +from webob import Response, exc, response, request +import json +from cornice import Service +import uuid +import sys +from datetime import datetime +import time +from webob.request import Request + +#from NotifyMessage import NotifyMessage +#import notify +#import os + +import urllib + +sys.path.append('..') + +from dw_batch import DwBatch +from CrawlerDatabase import CrawlerDatabase +from Utils import Sources, Configuration, TrivialJSONEncoder, Logger + + +import traceback + +import urllib2 + +#Create Service for the KnowledgeBase +projects = Service(name='retrieveSendNewMetadata', path='/projects', description="get/post projects metadata with updated=0") + +project = Service(name='updateproject', path='/project', description="update project ") + +#At the moment this service is not used +newRepositoryDeployed = Service(name='newRepositoryDeployed', path='/newRepositoryDeployed', description="receives the notification of a new repository deployed") + +_VALUES = {} + + + +class _401(exc.HTTPError): + def __init__(self, msg='Unauthorized'): + body = {'status': 401, 'message': msg} + Response.__init__(self, json.dumps(body)) + self.status = 401 + self.content_type = 'application/json' + +@projects.get() +def get_value(request): + if request.remote_addr != Configuration.KAaddress: + raise _401() + + listOfProjects = [] + IdBatch = 0 + logger = Logger.getInstance() + try: + IdBatch = request.GET['batchId'] + except Exception: + pass + #print IdBatch + dwb = DwBatch() + #I search the batch and get the source out of it + dwb.load(IdBatch) #load from db + #I create the information source + parameters = { 'idDWBatch': str(IdBatch) } + #print str(parameters) + #info_source = InformationSource(Sources.names[dwb.id_source]) + #print "ENTER INTO TRY ADN EXCUTE SELECT on METADATAPROJECT" + try: + cursor = CrawlerDatabase.execute_cursor("SELECT idProject, project, description, versionUrl,version, deploymentUrl, developer, url, idDWBatch FROM MetadataProject WHERE idDWBatch = %(idDWBatch)s", parameters) + #print ("SELECT idProject, project, description, versionUrl,version, deploymentUrl, developer, url, idDWBatch FROM MetadataProject WHERE idDWBatch = %(IdBatch)s", parameters) + results = cursor.fetchall() + for record in results: + logger.info("processing this record to generate fetcher: " + str(record)) ## DEBUG: !!! + #id_batch = self.batch.id_batch + meta_data = { + 'idProject' : record[0], + 'name' : record[1], + 'description' : record[2], + 'versionUrl' : record[3], + 'version' : record[4], + 'deploymentUrl' : record[5], + 'developer' : record[6], + 'url' : record[7], + 'idDWBatch' : record[8] + } + listOfProjects.append(meta_data) + + metaprojects = TrivialJSONEncoder().encode(listOfProjects) + return metaprojects + except: + # printing stack trace + traceback.print_exc() + +''' +Aggiorno la risorsa MetadataProject in modo parziale quindi uso patch dopo che ho spedito al knowledgebase, +updated ritorna a 0 , al successivo giro tutti i metadataproject con updated a 0 non dovrebbero essere spediti +''' + +@project.patch() +def updated_data(response): + #updata data after the data has been sent to KnowledgeBase + if request.remote_addr != Configuration.KAaddress: + raise _401() + + logger = Logger.getInstance() + logger.info("UPDATE MetadataProject, SET updated=0 with a specific idProject: " ) + data = response.json_body + if (data['message'] != '401'): + print data['message'] + idProject = data['message'] + try: + cursor = CrawlerDatabase.execute_cursor("SELECT updated FROM MetadataProject WHERE updated=0 and idProject= '" + str(idProject) + "'") + results = cursor.fetchall() + #print results + for record in results: + CrawlerDatabase.execute("UPDATE MetadataProject SET updated=1 WHERE idProject ='"+str(idProject)+"'", True) + #logger.info("UPDATED") + return 200 + except Exception as ex: + print(str(ex)) + print(traceback.print_exc()) + else: + return 401 + +@project.put() +def message(response): + if request.remote_addr != Configuration.KAaddress: + raise _401() + + logger = Logger.getInstance() + #logger.info("MESSAGE: " + str(response.json_body)) + data = response.json_body + #try: + if (data['message']!= 'ERROR'): + #logger.info("Status of the create data is :" + data['message']) + idProject = data['message'] + #print idProject + try: + cursor = CrawlerDatabase.execute_cursor("SELECT idProject FROM MetadataProject WHERE updated=0 ") + results = cursor.fetchall() + #print results + for record in results: + idProject = record[0] + CrawlerDatabase.execute("UPDATE MetadataProject SET updated=1 WHERE updated=0 and idProject='" + str(record[0])+"'", True) + #logger.info("UPDATE MetadataProject SET updated=1 WHERE updated=0 and idProject='" + str(record[0])+"'") + #logger.info("UPDATED") + return response.json_body + except Exception as ex: + print(str(ex)) + print(traceback.print_exc()) + #else: + +''' +def valid_unique(request, *args, **kwargs): + + _VALUES['metadata'] = [] + + #print "estraggo i miei metadata" + + try: + cursor = CrawlerDatabase.execute_cursor("SELECT idProject, project, description, versionUrl,version, deploymentUrl, url, idDWBatch,updated FROM MetadataProject where updated=0") + results = cursor.fetchall() + for record in results: + metadata = { + 'idProject' : record[0], + 'name' : record[1], + 'description' : record[2], + 'versionUrl' : record[3], + 'version' : record[4], + 'deploymentUrl' : record[5], + 'url' : record[6], + 'idDWBatch' : record[7], + 'updated' : record[8] + } + _VALUES['metadata'].append(metadata) + + #CrawlerDatabase.execute("UPDATE MetadataProject SET updated=1 WHERE idProject ='"+str(record[0])+"'", True) + + + print len(_VALUES['metadata']) + + l = len (_VALUES['metadata']) + soglia_max = 100 + num = l%soglia_max + + i = 0 + num = num+1 + int_max =0 + + while (i", xml_position) + while end_of_project > 1: + project_doap = header + codeplex_xml[xml_position:end_of_project + len("")] + footer + xml_position = end_of_project + len("") + end_of_project = codeplex_xml.find("", xml_position) + try: + doap = load_graph(project_doap) + # creating ApacheDoap also saves it to the RAW database + CodePlexDoap(doap, project_doap, self.batch.id_batch) + logger.info("Read " + doap.name) + except Exception: + logger.error('Error loading doap: ' + project_doap[0:100]) + +class CodePlexDoap(): + def __init__(self, doap, xml, idBatch): + CrawlerDatabase.execute("INSERT into RAW_Codeplex_Project (name, homepage, doapfile, iddwbatch) VALUES ('" + doap.name + "', '" + str(doap.homepage) + "', '" + base64.b64encode(xml) + "', " + str(idBatch) + ")", True) + + diff --git a/web-crawler/config b/web-crawler/config new file mode 100755 index 00000000..6e2efa77 --- /dev/null +++ b/web-crawler/config @@ -0,0 +1,70 @@ +[General] +#The main thread waits sleep_timeseconds after each loop. +sleep_time=60 +#The notifier thread waits notifier_sleep_time seconds after eachloop. +notifier_sleep_time=30 + +#ENG marzo 2020 repository_crawler_sleep_time=30060 +repository_crawler_sleep_time = 60 + + + +#da scommentare prima di eseguire commit sf_file_path=/home/ubuntu/markos/markos02/data-for-doap-work +github_file_path = /home/maria/markos/markos02/github +temporary_directory=/home/maria/markos/markos02/tmp +exit_now=False + + +[Fetchers] + +# a negative number (e.g. -1) disables the source +# AAAA_every_n_days < 0 ==> AAAA data won't be crawled +# For every forge there a configuration line which tells how many days +#will the crawler wait before fetching data again from that forge; +#a negative number (e.g.-1) disables the source. +github_every_n_days=1 +apache_every_n_days=-1 +jqueryplugin_every_n_days=-1 + + +[RepositoryCrawler] +# how many months of stats is the minimum I should crawl from githubarchive before integrating github projects +github_archive_months=3 +# how many events make a project interesting for the code analysis +github_top_projects_event_count=100 +# filter for SourceForge projects; integrated only if they have been updated in the last N days +sf_updated_days=376 + +[Logging] +# POSSIBLE LEVEL +# CRITICAL +# FATAL (= CRITICAL) +# ERROR +# WARNING +# INFO +# DEBUG +level = INFO +stdout = True +#path=/home/ubuntu/markos/markos02/logcrawler.txt +#path_html=/home/maria/markos/markos02/crawler/www +path_html=/var/www/html/crawler/www +path=/home/maria/markos/markos02/logcrawler.txt + +#Configurations of the KnowledgeBase +[Knowledgebase] +web_path_html = /var/www/html/knowledgebase +# ip address and port of knowledgebase, format it like this: 192.168.178.39:8080 +ip_address=127.0.0.1 +port=6543 + + + +[Database] +MySQLhost=127.0.0.1 +MySQLuser=markos +MySQLpasswd=r3i-3R!bL0 +MySQLdatabase=markosN2_db2 +MySQLcharset=utf8 +#MySQLuser=markos-n2_2 +#MySQLpasswd=W31i-R3!bL0 + diff --git a/web-crawler/database/alter_rc_gh_archive.sql b/web-crawler/database/alter_rc_gh_archive.sql new file mode 100644 index 00000000..735b1f1f --- /dev/null +++ b/web-crawler/database/alter_rc_gh_archive.sql @@ -0,0 +1,27 @@ +ALTER TABLE rc_gh_archive + ADD idDWBatch int(11); + +ALTER TABLE rc_gh_archive + ADD idProject int(11); + + + + + -- ----------------------------------------------------- +-- Table `markosN2_db2`.`DoapRepository` +-- ----------------------------------------------------- +DROP TABLE IF EXISTS `markosN2_db2`.`Repository` ; + +CREATE TABLE IF NOT EXISTS `markosN2_db2`.`Repository` ( + `idRepository` INT NOT NULL AUTO_INCREMENT , + `Browse` VARCHAR(255) NULL , + `Anon_root` VARCHAR(255) NULL , + `Location` VARCHAR(255) NULL , + `Type` VARCHAR(45) NOT NULL , + `idSource` INT NOT NULL , + PRIMARY KEY (`idRepository`) ) + +ENGINE = InnoDB; + +alter table MetadataProject +add language VARCHAR(400) \ No newline at end of file diff --git a/web-crawler/database/alter_rc_gh_archive_release.sql b/web-crawler/database/alter_rc_gh_archive_release.sql new file mode 100644 index 00000000..c64d3c45 --- /dev/null +++ b/web-crawler/database/alter_rc_gh_archive_release.sql @@ -0,0 +1,38 @@ +ALTER TABLE rc_gh_archive_release +ADD COLUMN `developer_name` VARCHAR(40) not NULL + 'project_name' : record[0], + 'developer_name' : record[1], + 'description' : record[2], + 'url' : record[4], + 'release_url' : release_url, + 'release' : revision, + 'deployment_url' : deployment_url, + 'idDWBatch' : str(idDWBatch) + + + CREATE TABLE IF NOT EXISTS `MetadataProject` ( + `idProject` INT NOT NULL AUTO_INCREMENT , + `project_name` VARCHAR(255) NULL , + `description` VARCHAR(4000) NULL , + `url` VARCHAR(255) NULL , + `created` DATE NULL , + `release_url` VARCHAR(255) NULL , + `release` VARCHAR(255) NULL , + `deployment_url` VARCHAR(255) NULL , + `idDWBatch` VARCHAR(255) NULL , + `developer_name` VARCHAR(40) NULL, + `programming_language` TEXT NULL , + `os` TEXT NULL , + `language` TEXT NULL , + PRIMARY KEY (`idProject`) ) +ENGINE = InnoDB; +DEFAULT CHARACTER SET = utf8 +COLLATE = utf8_swedish_ci +COMMENT = 'utf8_swedish_ci due to the fact that this collation is used in Flossmole table'; + + +ALTER TABLE MetadataProject +ADD COLUMN `developer_name` VARCHAR(40) not NULL + +ALTER TABLE MetadataProject +ADD COLUMN `updated` BOOLEAN diff --git a/web-crawler/database/crawlerModel.mwb b/web-crawler/database/crawlerModel.mwb new file mode 100755 index 0000000000000000000000000000000000000000..ac6ee4af4bf6897db7967f5adbeca6a06f153feb GIT binary patch literal 46296 zcmZ_$Wl$Vlv<3onxSBeWi{I_GK?|LP}xU`m`mwSQJCU`kdmYO3{qvp+WX`~5Pi#5A*J z=jN>E!R_v#saC~wi+j}c2Cx2oY<^z!{ae0?wk)r!;qkjZ&N-fJ=J~RAqs+;=e&F>1 z9ur&Y<`+L;3;p}i2w4m_1F{L#MUR9~%vby~V)umusY)rw>i6qqz3U>z;O-uQ=S;kZ zRLZ!+R=_IDua(Hfqlw~i71N)=CFha&=bc3+X zFv)}A{iCVL7GZy5&|+JConW6{eM0WP9~wzLaju>Azfbo$+S>HK)mzRLKJV;J)yxHE zS|#V#HZ^a`r1AC}_dQHmvY3Ijw*sBTJMBcypL$1@kNo{IAQU%u-4Pc%Z_VXCCU04F z!8JPVWC7lY24A^FW3nwh+XZ8gUC-Tsc)ieAvoNt0o zNd(YVzjuLC-xQHqUB5bSf}G-Ljr#ptt0yC)X9WAXj?YbBFioA-sYgcb7EW%D9#7aR z`+FYioIimv2>&Q%6a=3}dfnb06_m?Xbox8~_OEszq{jkLF?LEDJ%_a5;Om#Q>Mj9j zw}&-b)%2FaQa*3oEP=_NUk|r$4!$fuPCN}4=)foD=;eJiX5Hr58*zMj$^9)H>mN6` zn^~c2dt$ob{yKFOF8m|jv0bPBV|7b$}EzRKTztyPe@TvHfYk8~OI)~~pd89cT6sz{*?=ei0&#_mu3 z(}P5v-h}K811pYB?N^w6y90|pU!=9aF$Z?>8Dq!Z&#ikw*aytCZFMWozq-El{BeQq72~ z%=~i)Vr?X+>-_W02!uWM_&l=y@xp~^>P?*x9{0T(g}S}iYB)r@dtM)>*aHa+y?C|G zcxYx_grt8r24LZ_P9ExJ*2RA5y<}FMYTs76j~6x!y{SSAYBCSf54DGd$%Hj0Gir`c zA-}|yuA;qu?~(76=b1$6G1NIEXs~dorT>Gg9~z3|VLtMFv|L>|Dkd$ccuNUa1jtj1 zE84{uH^Ptg%rS$oXNu4lP;bp-VW+W^OZ#X%^!Enf{d{wOnaU*_em1>G6#b1&l7^4K zNgJ7PzhB#BWNVAp4KL>@wcf=Tbz4$aG%Y0%?QCjE@(tUQB^&ze3skDhd5@06}6ydbVd-RsSD zucOa#?puNm6;}maG7gOo6&Jjz2`tR39Yti{*v!G>pw3t;7m(Iqi0ep4Bf$GQRF#58 zl;K-r8;2spC^ia78l?Yh1;z0+g1_#kp8qWY)3Sw|o6U?q8J1K}Y*G!(ca7L&qEF55 z!O*%Q=X#8SQU-;u`6-fd58;zae|JIfdrn{C48hSucUhHRBW0O^0wHFpL=JgEom9d> zNS;b-1?586K7VsDWB^>%Se%A#q;%QQ{lrC#`**)sfGtr(tLDrO7_7-f9PsPcLDZR0 z8{gAa$;gqqC-Mp1^|tJLv%lZX4SmepKhNxRZNIeqZ=}Yx88f^+LX-6?vTRN!Jr6sw zY$?^j*ZuItzXChIvqo7~rf2(6j7L!q#$3~1r%HodoUeLa> zVJUK!uF^dBwNB){%b>P~6erJ8Yq5MvyOobWvF>5Nt*?(=g672deD#NIB)>RTOsRr( zoFG;_7cYz-X6-PcA_Hl-$gHQa+lxgs=c5PQb#?VY*G*65WUlT3aT4$%fG9~mhqCyA zX<>oh3NdxmaWwzn-R?-^sz~xn?SYf2{j;2HkBctp{WS0StWw>^Sx-xpU+d(}$f?Dv z-#8OC z8naG>rFh`Cl1~+V_oMpWr26efL07%~1S}}N9wp&7i;woXbo0f(uT*B{3-A7Pa8ap@ zyP81$<>rVSNlgC246=h5W}#o7uVd5Ta#CUzg{74!&HOIdY0r3na3;Sm(A347@v|7? zU$#KrrFX5?&*l5*UL)LCT9QM%UZKeZ)~IFi%hEiDTHhyCUv6j*i#%}|nUh%()Nwdj z3<@6hQ7!V|RbTBD2Ry6PG0gW}>L&>P*;s2x8DoNPDrWZIteK;$>y5J)FVCKgkk!?% zrB$)iW{3`uKWav!T+sutRayWEiRf20sCvT@$5vI}k&TK;i$u9%Z4^OlS;8nj4I?>} zl?@GwwkX{uS6>pjIm5b`1L^EHfrUmV-c+Ik1^^Etk_SX+5$Hk<%daBLVc>QGr)aF; zP4eBv>ItbU;9HvjIeu@(17}?mIb{n<&#cw+Gg%B5!YEarj)6VSMD3iT?S5M9Ju?^nS4=`Du+LB9=W1{D zvB$}OOEg}F^I2neezLuF6E@90b?AMK{{Gl>+H|R5_riVPRq>NJ_5I~)Ch_m~*E`3# z{hO2JeP*K3$6RQUN)p|Z*!M2NJJ0^&4r@W$0uL{|`Z|@O(P2k@RgC&7gzI(Ka7-La z)mg0A+9Z^bvGNWii2X5Cz2$)ysQJ|RqjH)IoNHY_9Xn&biQs?tE%lhjHxAz5H}B@M z(aCc5BIG8tpGNOphi(p8 zSN|gPFRZ=SfSc)0HHa`zs=RcI0Ah_W?jW9=D2EZ_qx6W;o@nhtOR&Q@kv7b*kJty# zCY_@Yu4U1Y@HM?*>?#NL&)?6#5u(<+;azLi;dbHvxS;j8ru6?4h~GwV{rF!zaQgo$ zmOZ3@d^y5M0=TTaJStM9RGZ|PzTXAi_Px$fakr>?&DF*KIknI> zHu?MYhbhbU!@xHlRyI*qT~&{&AdcN_?6_E>_Ki{|G?A^K7v$O|=~Gdo{4d!zrp`ua z?CiS~wHaiKj@!{9n#VVczX6VjC<;ky!}w4>F%dZf~vk zpEf&5cUU&vMF*=04_wxl_YV)x%l@3_?riWCcGu+pJFn>(M(_N$GM9ONR(q2BwQ80% zqsDesUsWiC4gM$IRICZkO#Ast^~-7XKjzL(HEOhpH5f3WWERzQLW5s~{TR||@m1>7 zv+trx^Hh1{%gdq3t(H+93C4}Wk1LWD4xF~5$R+e42##HIEen}ZWLwuz&ljT{3Lj!y z6V=sQZk4XLr{$yhroIdWfytYr<)@mnx%U8LKHs}W5%4=OF2+A-<=?YYBB9?4o4<*| zb~$+vZl(qkMFo(-tHyo%(L|vR32j6A(t+o%Ot%~kfm~gi{$;rLyc8QDN%dwEM!Mi+ zS*ih|l$Ft#dl`4r2IC=(N;=+35@c5INF^0h4gk=wn=F?^0s{&ui}(LIEl;l+9~5Wn z46DzNPWG@?C#h5^qALt6NoUVa=u_rXTZ3} z;S5A4B&`vU&|%f+7e+*lnJp!Yj_u<>-Ls(aL*t{TnGxrSzz;u+6wr06WRk0)A0h6) zBOn@L@o&F;oxL&|cYK|>yP@iDA?G_k`ySi-Bl7V8Tuy?d3mI%6nuCj$cBd&PZ^=T? zUZEoX(m+6Q5y)JV2IZ5bDZP0JW+X!Bz3Cv!%u|pCC5r^(Ng+Bv zon0uGnC})TGWYj;YuMft&Dgr1U(1^si;k>x5J#=cUN$Ldx6|Na8qypKlD$W7qJt-G z-VNZF&UCto1pf$moB8iHt7F7W-AP50RFl;CI5II_ ztPpH?%aL-5!ygHv0$Q{=Zqf9N##jC%OO3l8@Mf%;4f=VZw`#wy4buaek_V(z=s~ z5H|frR-ZA2;S%NO>|inwtxSnXfktyI&=y_WV2|Yh>(r}Ftf^>=97lP8`H8)}l?kUH z8`cbp-5>fOYSW=}ugA{O-lU_UN;PK*0I9@_9x-jOQcB!|J?8{vpi3aP6`Q4tKi%O)pC8SG8CBXtq*f|E}q==Ilt~b=q z-{&Xuw}K2J2+IR^T3!Cl>l}E29KHihj1?D=J z2!g1BW9NUON&X0E_gsw$Z+wBv8jHPIoVDLQuDtVx-ZF+|XKB#q@IuF70fl{V%b$ zNyOYbw9+DYYJ+ke33=3CViLebY*` z09Be?ZYD7@U;kRDpxx6!3so)s9DDBBqu8hj#9B;6KZj(ejUoLU$`k&Wcoca#62&qd zkCm%x6f+BptjYq0w@oG`^f-W<#aF)Tw_*2I7fkM`;o{aRKaAn-kZ{+lGuoMKVim3h zfst`62d(I>q~O{8f%tDU=$0Cg|GE(kz=v*GADt}qrQg6PjtYHiaBVt)wURPHBawL& z_j(ET(8&(-6^qSDp%K}8cBv9U&C+4165B}@n#*7O;GKD{I`+Z}#8TCkaU6kMggL@x zGhWKo28R4SZRy-svm|N&ipQ{98lkJhu+l~%QxfEPs>b2QhirdW->YAoo7yY2v$r!T zOyU4Q#^7n}sj6tTsjeb$+!Z*^tR-TsJ>aDgPw-MdN$X1*K?9}P^j@-9EIz3uDmRCR002&E$)9+jh+b#ei+M$i zZH}(_0yOfcXOM~Q(c_}H#5E9OeeISqG!oUTCC!AiV-_az2Gxi7M-K-+Dtt%@8Q<3O z1aPrjsGB&H=;H^GiBt@or74q3dDkwwTV z1=j&(bjLJVsngE}KI;%*W6M}wJ_&c^6fnp$R4>3?(ul?OfCiSqF67b5Q4b-c+YOD^ zmc+-wh?GYrDt!`xBJ2HQUOy@A0T^;~YH8(SGx|Y&w80r?C$WI<{39BfP7gK;07h^Q z+P8^|jbj`bfw(2lW4b|i{PHOWb_o&eF_%Q7XEP{Z6l11T;%mz#@@1eA6rrZrD`3^u zLELB6r87dGb`RN;!=HAS=o|6Q!bR&dX4;7)gBr@Jvh#h2MnB2JVV0|tagCCv>P4pA zX`D$_56r~rCvze&_%~3#)4iw)9!bwC{NO6UXS*4h^#issGa*6OLh0ODk6c#^)X7X#UpfG7hBR1}Qxzxe3rmAM z=!@JgtuUiJeiJGOv9<)AK=ebqhl~s~^w?pN`j9w0d5Itg*vU9wC`i&((dfUYsijv( z@0C}aCbyYqNpB@wHV#LN=pn=>c(PQfA?69-)=GmZMi!Iy(SpRN#TnOb%;^W@@V zb7RMItJW6_-R?6_J-AA#x`(T8-{jQcHaZCbBdXLr`v>pU1{VTx0iI>@2Nq za@8;$>uY70hoFGC7)30+^m~y6ztiON^EfFPqgiO9q6saK#iV?sjA}FJfVg?aH7T|y zbh;(PUjG^enm2?~IqHC7eCqa6(Y$h+@#@voiq;ZVZXVyaYjC6pG$rNg$S0vdcjGBt z#1y<|Hv`yWDEC|N;n;Yjz-!qj$fuXjZpB9vtvI1hEDdWPDLgn> zrgeJKr|)BTwttSthG#0Q0`%%M`NkdG%SknLoNWEL@7ee!WJQ2iVcPorr!lwc>!qsf z%KOc6MLQMw`HY`_(9_WEd4cIW1KB7nCBg&1W|s zidh=T(R$azpLf#NzVU<=vg%yZ#Gk~k-hD5=ch|#|Z%xtfM^87u-2GY!3l|@y^XVR! zxyb;L0;)usmJwTDVb~Y-_~h+~O~j?(OVRYF5ELp=nB}d0#H<5Cur6C~hl;f5P|F*{ zk|%K06U#Ke*-hCcKAL0-es*Kx?{0)vV*dp7Zr($eL6iu>iWlJ@9hWCXr$Vuua+Ax3 zfpWzffXxhmHN&*pZ)~P-%ox=sR83aI3>6vw(^WUCSx%^f#9_6|W7m``#+`Tr-6%-aXwmIZfg z=LOAbCUP7Ge=yqbPAnga>Mjz0u40J;7H<&aV*Qne2($v9z~u2H*u=Ts z{yMhHUBBPb^__jb_exkj&+BK+k=;m?y#C_T#T*>a|M1&ky!h(C;&t!D$!pzn-Mf?X z4d%W}(=~x0IQZYO?DLiAQ>!iT@zw#DflBp!$?YS#0rcv7sIL)M{kKWucew$0v_FZyT*}c8ZCke4_AMmQyJ)2P;eTn@K%K(fuqMx7KF*L?2tkk4bp2EWFDA%i zC%ZPZ!~Ic_KL3ER8{7Eu6M4nk`A4mrBIp3%T_^`G*P!d5eGD-xjYc9JlX)^59cG!T zX4uat1$%*T_`}T9y!o%?VT~x_HxV#=CuyR z1~Mp3MvhX>L(XxCniGEmu{vj_VGd#3dE>T@$6g1Vaxtsdd!LzZNmF-#dSAB*{kEtqA zW+qu?O-0O7U+|R+#o$Q1q%-3SdP*F*9ed1AnWqWDfEvLcI#xA2{?Y&q5f7YVU&Sl1&yPo(5eo*XJ zj(mVbgQ-qzYAV9dP5AAXl~L$z-eds{I zpFTa4yA{Ld+&9G ziM)b9M^afp2GxIXJ$x^=fb>o! z#JJ|KlVS5H>l(zR0+{gC4b-~Sxh0wuxg{tRQ5`^+)X0Bj;?**iahDMgD=`uZW$+DK z9RWkq>kU^s2VkDRXY|rYmoxX

yiZmpjQ6d0%Fh1D1IH{jQ=Wr|OXgACSjbd;<|6 z=ONPo*exxl)ihLO2Q!eMe?OlTYUX)y1O${6<% zv%QIRO$;1*sb*pE@6(T-g2U)aN~l-_k!BVSX?8A5>)ezsDKk@hX&*d-OdPtsIl))Z zF*SeU?#{)wHB8PPUtDGqU2<3~Sza=%QYp(*zGUahCy;;3=BgMRzw>zHR_ETLc`ss&#`|(Z*+J|= z#^W*8iT-_1kWRA$9c>dGRu>}?NXa7}D5ip+7>B#HZYFf(E@Zt!+|0oET%JQzm0GHQ zUP}L0$MXfq654~TKv-WkLSDL)jWg#!-dfeM>1w^B(~PW@v@5~af$k!bd-(6F${ve1 zE^0MQ1+!i%2ULdWE)JoyrIzJc286V4e4f;v)IjkiA*!dkpuHfhr;<7fS*_ala%BZM zLSFwK@^oYo~f)}@OYvKrB&Fv=`N0~LSQR^Qn z-^@a9-RPJ$bNbeiL9kShuvs;|d+RS+$ny}lvFl;X*Y&Hggjx-Eto;uEdI`d+9Z50j z`Ld3{%#a)P%S=5wCK?S~XuOZVPhl^^&pnQjT!wfX5Jx(CShdDHN`mtX}x(~xa)VA6w4N^J5oo}o3)LRK05_e>#x%7oSd z0iJbQE3Ac5JiZqozZeCrEFeIuOP5V2bG6TckHa802aipnx@9?mvVaF8QXbB|5ikz0 zUBq{oHj=8=KsQ&4$4fsRK;D%mGvP8UKiDmb-bI?);SFctMQRm~0rULzI`8kqE1!7BeWv zunQ-$^(u}YT-`gTw{`8S7FBpR4~J9brARUkgUQ7E9HxdL6Z;t z?1UyX$2MT!5fS|SvtKkg^U>W8hs26hc4d|na?13I|4s4MW&XdxAN2yJN5X64xO(v? z^;gli3`CEu^A1m{mdnis{XJfIdjQV?p|mTmp9B_|xr|{!!||X8pcAi4%!;_838~NI zk^9ZAF$#p#$tC0P6v8fIN0|wGs1b_@1RIvYUGN5fD7*t~yFQK&mmW_&s81@7%tVu; zMNU1p{7UKK>klRw@n5!*Bb$KqL|n&MjrKpQVuZF_7=N&bC(LLX$q&jvXb+Pb<)! zIwne>DO_7H+L>w48QAhSk2+%Z`erUwni6fPl$0Ju!BA>RxC!be$c$GUMKT+OWDWAc z*+6l2aauY^P(90|6!_Z4|D$i+BA=BLQz4AXVKLgHQq)8Dpp}7EA3cm#w{m-CLN75* zyLRz2h{v&LX>3hw2?-leM~0!H3G~+;0E^Wor%_r+i5WRY!K{bzaQ?i!XACWfuaFow zMf^QPxRS}t^`FgH?9Xlj5~id#EKL#*L=~rnH1q)M^>lJaOZI$NCK6TcJ5Jv*C6YYI z;~#8NU(0QLCFjAbJ!85wGv8cHTONLsC735uCsxZ6d;cN6B*)zf z_1`$XGp0PO4al>gB`iB%vt`+DU=16m79RYz=bM5qL!fZ*C7O36C1-(JrkrprQ)3bq z9_*4@9F{9wTq(zlLFok`)t*@}Rw%XP1Ix*Q8V%4L$T{*Ct_KQPo}P(*9AR7SQc^VH zGLC+ujQ@m>9ExI#D^ClgtW*J&9N@}qQ zsAW}p*|YpO}6J4e?mU`oZuX}2*M|^VB?KdRK;-zI*03lS!}A3r`7N`EfX&> zct+98ox%}d<%q)|d!$DUKaB<21X@X)Pw8bl^*qlNWlKuAH{`8Ax6zy=1;pmWGzw1; z8Xfo}sQL!am&G(P3Obnokwv3M7e3iO{EsYZzS(Tt9Ohbcusy$9!&N{TSeKUIHie;V z9oBKrC8IASJ_;8bQ51>%><7+kj6kN|PMY|aYODPp<6afl-O#U$yOGp zs{BmWm77BpCtf_Bvk68L9(Y*%UE2b>u~syVYQZDOU$2=T`@1SHri~dt0jpl>D`J zfPPAHUin-2m)p3te(=7OXg+djOAw1oR*N_DpG9mkRv5)u8Wv`Ng{Od}^SBa2;RwVo zxyoIjWf#T{!~Qy;&7u7Kd(8a3!dR4k;{-B3uRP)p5V0=S!XN*|oTRVzo7hDaY9!2! z)Jv8Tm=!|%F@*r0dw4ikjX^oL*|@B~Da()8izY4dRqj0EQdJ=xyzto%Fim_%pha{% zYr+MSQI!A!aX<4_d$#{J;a)K~8ZEV1x}1+qm@CFJ$=*pT)qFOnI;jj%T*5345tYY} z*}UnC$*oW>rAgHtgZEZ(+UVF=UEBCx5~XaNq6DKPrdXkwXHVOP?S^YlTaIzas^9*- ztLx6o=FZUUq5@GH$rt-6yrC3eo47%nHBUE1lA8Yy#4?%VB(<(g#Ig_1N{&7Pw7@`@ zMkr5S2QOVT7sLJ4fwF=?wlbNhDSB$?82mpGP6u>)YsZAYtHz>iT&9 zIW;+o&Y(p<9A4FjS>!mGUo_7XvrRC8gP0 z1yZ5|vizn>y_V+q(pzd7$;X!vG;U9gI91>dQzTeV4*}w3>|8kVhhezU9umpJCKQ== zHT0+^qTZ6@%#hyzk=N-qUR`d-QTaUtOUWaL;72SFS#T7Pkiu7sKc z!XOo9A%j>>DSaFlOIQr9+AAK#0yCPKp5@@4pDoVvmgiVzc>-j(Jq#6ZCE#?X;ZhK! zG*_+uVn)Vck@8Pzo%vD?Iorg64J54q{93giE;4m|T>?jFJFm)YI-x+1D=^s2mZT#(2O+5lH^l zl*{Aym!sq5)vtE{uQ@#9lbR`<(Ia2oOuScCX4lqZ!h@lk0Mop?6ooB`5xKlz?UR5x zLzJf9jJF4K_3!9}bU;1^D`$^%rAhiGYW&i?QE_S^R)-N$fuu7P1Cu$X-QW87e~!KN zL!F=hR|5JN1Q>LO|D-+hE6V>jtcT(qE$bSMf`Q9WQam@vkSs!C?GXI_(%1Sj_U+T@ z&%Ca#Xr8Xws(7}+x4cWB(0+Ao`@MK<*|TD(KN6S{c9FaRG1{CUzi=tJRyhJ+S4S=7 zkC-r7-J!m@a=G3Oai%1GcSy1YHtz@kZM~47IHoo43BBP!?MBFZ!lNc`L%6^F`^-gH zime(pv~=?0Mb*xTV-3C~R}W?aU5swA?`|kLnMOi~eepBb_qG^H6ZSdSRX5e`U%n*f zC+4fZBotwvdsJ}hpMk2|nxV%gTSmMsq0RhlP^Zb^8Kex1NDEW}48k1ja`2}ZNu=3L zcXH|uAJ@7>FDlk@*BmiP%fIMD{??43mmCt?)wCFFYwZ0hYUVpV<7?E`BO;zWA?h znAGk{au62n$#Pm6j{FGXnBqKT$pl!(0>)Tj${3w!%aP z?tIcxrzlseM7LBYq8=PaD96B)Aa|^i`Kh0x5pFFp)}5;i6rNC0aMYvJ+)uivg{Uad zH!bl~htpA;6{Dgi$@BZuTIcrueVr|J;_fC-#YDm>R5K4g=NgX_I-Hi1E{mKV@12e# zma`s@15J-3+RBWMw}efPKibGNQ5BnDECQ8w0|m2_d1tVkY_pT+Dk#MS4zXe%PP$w# zh{vS@E_>YQml5sH@?jUa^rVC{RQ!rra>vr06n@1Vij9slUa#q=L9{iA_k^TbSZB4t4--3>uJSxq;oS;-%PTZghm|7ai8v*f8o4ThrU!4y>u z5?hX8wRf0}AhTSAumfEJwnufGRHa}P!kVQ@nWZpa2-ZZh^&o{S7hWX#s07kPq9vVZ z+lJN7@58YVewRW<@raQ^c7nCCGhEZ0WoDr^DM2N6f#RMuRv?8yoy;fmU?bGypsk8o zuT!H%2+qnN1QL?;Qs_~pQdbSq-oG?%Qb5Bean7;dn+98|sijNz4zb|-Q{=@k!yWWBwmmZ)7;34UjJ#5+oiDyi~cp=19 zRPhD=N?0<6EbT$L6QYVwf1T?ZvomuEQ)?Mhu=}XPCR!-SbRACg1wQ@3`86k;3m{+2 zrJ&)8q02}6A(qQY`QiXa9XW?D=q9i>stN)vBqJ$5|o%9D!&T<(0U*!);3vYv`tP()CP336Z| z1gPVGAXHJPs^R-v75Eyaqbfo6t?5lWRsRDh`ot(cW4lkg=EF0ve$L-dkw;`;4anZ- zNSaLAhhbpVQ~^#%K{K#&Dg!YP7O93z)aQXpX3g=~nXEILYGB2R1DbJfKMUcVBDyh^ zz@nyk`50kEuGQ#fE^@3M32{Cu&!JkcK++gCY3K6f2aE3|^-`2uC_tB>Fy;(mf+6cO zC-jUM0!PD40%Ajz%eAIyF6ut9eZ_x}OrO82mSmY!jq?Qwve;c$mhHd2^_f5}!jIAn z6Y<~+@+SeVF%i~mjaU!@ZRD@5YtS^`V_9*M2tPMi3q|#pf=?1>#6{+*C8aWfJuF>P z=>qw;4t#xN#bmT;MptV>4MAI_Lv$x4G^|c|B`8{da|?m$)#n)GRKCy{xgAV}^kzK4^F$Oc;1J?O{y{dil(qk9NZ#2Zyd{jXd7kD=Uv2=3^59 zhRN|&K&f~hOZ6JP^1x6VWM|`0L^HtA5b}cPza*mLNMWW@Nx{XaHk>Yf!J2;z)cYRk zQJkM21PezGiLXJ|wdk^s=oswzaJi0yC1kO=Da1qPv9FIw=~C^&)6_T0H87QjspJv) z7QF_qV<~12O6mg+ns|ETrqf6$2m{Qseo4}*&P!=N&`t-oDriIlU!ipd&8 z1O9iEU~QC5rXXCCGDu)@>E-P)$nh~z`13+9zn2$=vuD00k!hf?R=mHE#mYF;b2zTj zzsygM*jffyMJ%=y|HB?BDRO!_U7YO{?1^dS z?}OUA3&&MqlZP8R4PeLHN)8fmgQb>#rq_L9?Iu3I(;>+h9 zI+Y^Ecmdz$BUboe)4`qICB(kg#Z=IR>Q!SVcup$EjY)?&p4(olz{n(zg#9kLnVq> z4V}Y993fRUcB-WLVsT#i3mMC7;@j`#)02|3#S!;bQn}2U+mVWc)a9GA{f9to`}+iV z+fm#y?4waCMQ5kjWwcJNHw-PpnDE9MhtW)+9RoRIHlbl@W#QT%we_J>+I0?E9kg_M z=AsOJOAMa!59LmzZH_OEaX8K!CCYPl9$ZUqkWH-4$WKvhhOSJE!vJg9CSPe39L`mUsiRDq)uz>q&<`*zkodf5fBG0GfzL#OUok|lt<#z)Pyhm zpma-fzhuanDJ%KE!xXZa%l1xZ>Q7e{WQp&8|IX}u^}DrvD0r93vY0O;D0@HsiVB*k z7KkaSF(BbB;3ww#FFwK%g8;&kO=<&CeX|Qi(S(J=$vwzFT>Hf@Ssuw}p`7?UZBaI;8ixkRFI|L?LStMg<23i?#Jso!U zv9u;(KwwF_z>qLVlNzpaFtHlG-mko(0^T*h6l53Gz=a8vIDl$4DB_ZHVA%uNiz;$I zpsWh&hP&eAC)U9iU;ya2A0*SQ$dn>w;bl=%ERT-KwV5CxuZDo*9=*j^Th$QlgktpDKpyEs;B zyiCJyYDt~EFc(2Yyt&OVNhYFJr(QG2Xba~7480=keDJ4UNhSpicU{MR@C)ah=_b0X z8^<>|B_ATD7Ez`N9)0AB9tp&0!=6} z7L{xj68Yz(5?=nmxCqq@%QO!kp3cOJ5X?BBB0G|aay_i1n{zZen)EU6DHLTwUD^%b zQsptDHeg^$kaciPsmV`2Q;==Y4yQNKI85Cfo(6xBZ4m!|-f^ca4L(&Vj3lDWCUFLv zV)ix61qB(q&h#_guHTu)q!MA2=0Nl&`n)Y>4E4XEYmglCc0wrLegiI&W?HFy-bMnS zRalF%cWG>BSIg*qC4VjU4*wqmXMs30p>*g!P`-&MXfyv1eZ%EFbySq>h229PP zOMOlI_KM+4VYSvFc`7>m^``~Vn zWJlxwbalpfy@13$*1Zc-wWZ>Mw>Qx^r!j5cI*qNGk(w>g#NRN{Udz zls9hbznOpPMZHp&1HVU>mQDQ3)s|dD2U9AO{;?PEq;ZVgtl6B~Z9Y5k5KoYES@$|L= z_c2OBxSWyjn&{R_sDv~86kDRfzMl!Cy@S(qWlydfiI=CA=l%VFT3dM|Y}HePW5htP z)=5b;Q(H3^TY_0W?5M>baX=zQv>fq54kM(qT#4U2{mA234s~#Bi4T(w8I~-HFpzoW z-SfZvx3T77%Tup_a2Gy&2vLHRM=VBLgF*JXY*qCmL=Q@kF3-JZFub0?SICQSA;}kDf%Bj0iA`^N9_04JSIE zR^mtW&Jqy>tAMK&7an6)9;Ls*ivbQjk@TdEA7#T8{X;&7vVgkw7T#Az$^rH`vnzyO^!sJR z5LZ#;A)Q``XpThF)yX-lw_xPds5_mN#>9#u)7xfrlg~%vKgY zFPA13pRgJS0*&W!x z3%-Rg$Vk)|d54aXu!rju3s2VaEbLS@=QK~)`6XnCzUIrIC%8wKmS)WnhnadLE+2c* zfyuf*)xzN8-~>2y{|o(F){X$Gh6$R$k+eEA16x4$&3}yX28Mx6Q^il6Iw~#mvAidX z_9!C7#Zm&-pNN6YG-vGkyQaxEVaqU3<_VU`w^K;tqH)Rqv$_%$b6R4)0+v+-j$9L$ zGS+W;#0jAVW2NsE$yX_aQRyp~_O=#3(#;Z4G>OeAl>-fy0D`uNmr6s8M@tDToK@u8 za0|x%72R7u9Rw`;6cuBGct;J)JBo$ABXJ0T+vQMU@D&;B zK&EevpSS782^btGOd82q65bee2uH1ukV|im)O0E}F}U_0FPe-4(I+>Bq}UWBBg-T! z*2Pm$rN&{6X8B)CqI@L&cOOR3Gu>S!pSnw1b9Hn1#hA6_8fIEEJb8|^8aJ&Era6ff_81(-8HzoySuv&1b4S!NpKkkcMZa+N&FrH0RiT-@kQLs3Gq2UkB_H!wuoNlYx5&) z$dhdx1;2&H7vS+7X%oAP9Rt!@L?A%KAluAO6a?7-yVWew%zY${b0Uvf>H7IT6Pr6H zh_xyhzEBLTMGhD+21N|4icKuGcX>Ov*okc{G*sU)v6Htf>&H-H+oa3wtg+e^g()Db z!1c2W{^0zMpC11?`<5mb3EHe2xcb`eeApoq`%k-eKwg~GG~ih^{bRl!aJY`wFcpC; zeHRQSbN~f}$&+}Ieyyr!FYHelf=#D7dxcEwc2AJ%(G0(R|26(er{g23T}m+uwH6{9 zj?Dy)Y|+D=S+j-68YK=qwU!P+3^yHY61XE)_68Eh_G^H$4W_ZO+wb#34VNez!0CBH zt@gUv+*P8ej4dIfGxumh3P>oSoxx&y$hLP_ag}$TBt=>wjHX9)FByb#AL94E zTk_P7Tkv`snlF*Dl=I@VN0Rf?L$_10H=I@;a-JIHg6|7=kU0)xT zj9-dfJbS8c;LoM};W8adj-0*~Ag9IIqeRq-A~cLd&ExxV(D_!+foq2dmir%l`RoJw`lDLVDWpXst%S^ zfFYa%M5gmDLq7Q6dw&e&cK)r4v{QZT>3n#?Sy6uIH8#UL#7Nv!?osN8yyydL6l@%v z%Ti=y@;X)a;+8DSEWjS)aJEv3;s#P-DVz`)v6_rxrYatNu1wa}nvG{oa`{7-YVGnx zD7O$+6tqX?uM6yUw<-S(a*rJG=ZHfN98RVd$PoA*3u9j-KcU`MZyE zcYOVEy%VGW>4G&YMEOVqeUZQX<TEfv3BGbJY^CC9iAR` zQQTyi%gZ6p4-r!ZmRQUPnB|uBe+!QLkeV`K#Rzw0w!Iioji&>!UJ3thE#UkZa8FeU z^KVgk3+qt;poHYBf3?57zMOof$YWiB>;N}2;!<8=MWci26JOf^z*HnLtQqhj>SU|c9#S$YG*)bMmX!zHNtMEZ0>2HgO-P ze_rOuH*74BtW6H5l!!48w2^AIW6K33o zKIe1Aye-`7!XL<}dF(sp<4Rn|xzyoOFoRw>RWRY)sb@>fL8qHF-8OO235v~$x7wSb zzaOcZ(ygLyS2M^OmZMtAM6c!wti&9x<-8jp`VlT!5l24x6?;spUn{f*gI7-$hbIAr z$JA~S%(>Xiv_^=$r_CSpC7ik{TmjiepLWUOeht23@g!h}jfI|1Bjs=chYxstPorL3@(r!_C4rz(hym?jYxMX2Ca8DY;) zsK+=jUXy!nVL^Nk=2mia8kR3pjRSLuL=JaU$69*=tBwDS);qMBQ(IAuZNxR*zraZS zLq6gLvpIzbCT|yw^v8Dp0*|#=JQH9lHO$RiiIb*0R!c734AvAEfaRy+yAyLpF;R9@ z6#oLm48-471luFO#_xq4Vb=>xrxVg~9o@Mia_972-J5ggawn9I!j)C{AU(_Q@OF7% z6`fO_T&Rj_N13)cs!*ZTad>>a;D(R-2~sTD7&7EzaY??j+%kc!?)-r)i4*~IzX~Il z5-CuA75=+CZifcH%o!a=RT^Y|P*v(`GH8u^(QG+?nG;cw8N(q@rQ0ZDP?*85X_#Qv z(J7e((MU5#@{XeqWHN_3Rl4u*+9om?Yx2aX&L4#=HCGBtBqlX}98+)l2uzl4ogHfY z2(+uU3>DQH!~Mvq!3&vdSXoE4HY|G4RHDCY%flwwOtq3etVL?r44TunmW+;!Py>qIUU6gC)KBOya7)^I%AS- zVp_%;%)4&tQKG$=7%6ITo@fTj{1ZgrChyL^9e0_@LQ!l>2NjLZdI+_Q41i0>nrJK( z#`>pdQ*{0lgwHX$e<9<;Ex==6zm_@}jzEQF2Pyl)C3B(YP7u*Q_;_wLGtIIqn9CD7 zdC5dB4Tn$6MsT!8o2s54s> z>v;cM1dgw|yS4N;)evugC@LP#Dh%T(#>68hmL*P+QNYkbC@7{7zR>HihbxVuDXuB) zD5Sq`fG}Tx5Bc4Vh+f+$Ys}%7wodPu(6F!6cw3^V@#Lfy)6UrFo;PbYN zU3Tms!HA7VI7UFPISi354j;4hoC8xfLfLyqjr=nUmHb^QevB9`a2m?dOl6UXOjKo1 zqTZu}!;bzqcKz!u`8vLzJozRXd?~7U2BXTNveD^SqS@)A5=e?r`xFwK!IRjY1?-sU z=)w|nW715~tYwi1rI2sn{VBNcjx9UW74Bx~z1R&DgnB}=Z~i~jlX3|4r1dZL#6tWm z^?y-M%FCZ&3F+`)oo-yqlsW-=u_bJT>VTEv{>gt=6KW1IWaSVB<_LOGvb zme&FMD<>I;k>k3LvQGcUyFn$#HhPxKp;E??8s7^L-$=d8yL$yrr8YmSK_x3kdg+VK z*D93vPaFK+O*fvEawCg+J*e(xt-NcrQ89+P_;zQa=hNcoST?ztQD^F^R#jOy>7r1; z)ib$2<>zZA_6VOpX!PEdJsbONRqjskxyE{r;?mpb<>AxK&T?Y(F2tfiRU6KcgKwH(u!d;<2yy|(jV;{^DV ze+~lt>i1@1ZB(~&jR^)c)gGVT7NSck2_d|ek2~FNpYMKV|Lni)Xu%dAZhyFK{1f)( zcsg=--|XT4%cCP8Ti#^_@3R@#`>pl`TPlgfu>^t7X>7`WcF%&w!v8q#dj^Lj;3I1+MVpL$GV-IC1x`D?9sHB{^uXq{?9+~x@Efa|Mm}<)>Z$Go4qH5y^1}QtDBD7Y;n|k^Ii^2$6GbH z8KxpOk>s*KQgADaj}97EYvW20rP*jc9g|Jw57z&%zV@U|taUW7 z8m1f^Yf?kriZRWzEgU(Qb*G+l5tKnTkycgWIwzSzZ?ws~Dh z1;TKZu#bekXv@1E!je9fnji1@kv~&_bh{gZ@BOqOfEtG=J~#SNY)THFv0Lp9DoT!) zj9cwNY0Jb682<|7Nfo&^xLa*Xj{CeyHOPR3smaN|Vg&`0lJ~xQPIq#K%*3;@mPfC` z`5{?{Pk%8n7lQ&ef%K^q4>}fgSh8 z@sWg)6lFlgkSqi<)mXxtEImykjKfc_o|JnRl^In{*o{QSfAc7cKN!qN*_=tnjDq|R z9x>c?_Bbq9C}*PLB_kJysWZ?@Ne0pB6^34;u-oJlF+J>RA9;Hilh;))GLy;-wZ^t& z?GDWCX?kc$Llk*{REPsPir^K4?ZH&j`aLq$eF#n{3KC7?nZ>wBdOX$cRNJyId7`NVxBEId|I|(KfLu zs$;T3pUiVhi1n8FhY(Tqk96=ToI9f**Y=_EO5TK(*qF~AOoHl%yJ7NZu_)kf`a7Is zcS0O`av2{#Rt(KNX5B}*AoqlD1OYbYb~^=R zN@l;gqEH`N!e|uVrUr7YeyKwCXp~U!_f$D}CuJ#J_m!g1-}sk|%!c^)yD=!@9Q&}q z&>wj4U(Uyq4&j^zD@E(7Iwp|`22xWJah=~O_9KZa3iV5SA}h7vs#KO(ywvcOb5XNAJ?5K#DltVGo@aSamQ)uMiwcrzrD45b2 zVzK!PAWd=!c}depu+MhH*g@HO=bwd6rh7QYx!P5d(7T$Y0tY3AO!m*)azGuh)TUWy zP>$8wSM>}xo;pg?caC!f2rlf{FOQnjQ@#a`j9JXOs4>u$CXr`6<;%~Q%XUZjlK75u z{03nUf9ExAsu}xc3Pq$A&O4FQXq|kMt+3lD8hD-fR8ZS4GLKnSe_o?5dG1gOQ0KNu zyq|N~+`>8*#VR6`-5HUuf)=zj8IgBJldmmAW;Jmz=yq}0kvU{*3u%LT=yrccUqh^C zWVqVSEws5;owVJ0Yy`UD67R9U711cLBPGx%&2^^cwfrj6$I91p#)03RhPBZCu$q!9 zC*@a*syLEo$nEONh!-v+<@XqRE&m6?U~DE)p=gM!$ z{Ag?7@Tc(iR5uKG2DkEa;PcVW8-Z`W7h2;OXA-5NX;`LnShd#*uCS#qF{N)yYm&)~v@s7VIIsRK&ON(^5G^3Y5QpIe zJy}x@9O2h1Nk^PO&4cAA8$p!`ChUv7?9N#= z%ICKRo~8>?ZP#@nk>`K8=7aU>|KysnOfnDVPhVCye6;yz%F6Wk`qqC2`0i}HTFpv% zyYc^P>Dnz4q?mz7Q}PchxeRW3Q7r?YhOT6p3<4_jbgHRfGy6@o#4=rX^v6U<&l$+CD|TKkiq z`MZiXp-y#hf~jswvNZe)0n2wpx3Q~En4aNo5<_T-0kEyu!?3w7Cdu+(>(G%zrPM0) zP_iVegPSML6)XN`?s9yqt~G3F#D^wbkSQ%6SAisw%J75EM?Va>zgcfPVUqtU&3gNr za|ffu53&ZaO+Jc)R-p-U%nrI7N>3|7hf?&58A-CuPnDP&Nl*}r?I?5dChm%_(_9ec z=26K9-Z4)Z2Bh^hIp)Elvn@HOb&Evu)L#u&V;HjqP-8IGqZMph#qO;xUbfmrNXmVP zHE@6mlGNZwZDhYWEp|reFr4euv|tuGNo2oE<-YFy%mVKm<-Xht$ooHbm8))X65(AL zP`7#@o_!>CZet>5W@`|mTULBTZl1h56DTuWz8=fpD7iWsQ}&D<)0%+UELpE$Z@ISG z!A-ZD_IPnVXo*O-TfY0<%UL6H!pdav9{~GILU!DEaSG-rp#WzWSv&p*r=zE!RFR=B z!}E@WmB`Xff{3M}gV2UDwMJ3VTs$0iZoXW+VQ*O~p%JK5t`P1-;^~<4|D?ZT5b3Y_ z&=@j1ici6iCK*iPyP-Z2xkwx`-D3s>kk}k{?WY9&_}HU98$niKq8w~i`-q4N=69PV z*Sq~i;n$XqyAjN&=J54JEG+C+I`I+)YT-Poc#vISG%hGQ7fYXvo}oFoz|t{y4#yJZ zyR3wI?r_qr1+8XtW6S4~waG4}+Le}fH=mM6wr&nbD*=E{g6sq)J+&pEkkzs}mR~BH zW!8=`13uCRX;Y5hgj$L;meXs?3|sKKz_#7h!XL18UG8f)98+$ZR`pC=j+WIU&ZF6x zbyo4xGIKT&&VYU3Z{NYLO*}}&0drvfW^m5CNUg6eTHCf1`*R9>x@)0iG-F(s)BG2C zP>d{|QttC&EyWs8sWD8BGB$cjo>U&k>@eVmNXHyd@mI*zA6^rF9XLI%pBp6Tio;3R zYcML-WXvzU_GvmB)7>Z}wvk{vLF^eyfFWfmh8e&3u;uDvMGk6fa61PQNqsv723h3s zm^eX>YdP!XTIiFChVrc@^8Cn-+qlI1UxCg$m02aS!2}yrPK9aIvy!MT6fJX+dBwp5 zB=&`BhNx_vbZEx*UQ5zl1f(=a(e(zIK4iE>Tag6WlHnBCufXd(hr$}*zMIOzAil&i z`|CZ}`R(~|{U`TuVH*EDDxI7&Tl_>jF5fSC0mf8SO5k!EA-=MM<~!IdxjZc;yd`3_>KQOC z9k?9wYOgWmO3#o8ab9lLs^XV?hZ87A@JT0s$EAuslu5$H%|&jhNd-mx90sadK6Te_ z=H#Z4JZNY4z$djh1CWzZGNVupFF|R|BS+woJ#o@gOjH7DSJ$<&^77vnvD`in#_m)t zA2J$ev!VZ>5Re=*-M-08JCR#) zX$ps&`4yLQ)4Au$2gWwXzh$v`Pg-dz7DfJ^K*1;H=?bzAO7LaI1yX~qOo?E|2NCl` z0YnJngU^|wqM)%vQMB-3m9KC&k)sv_~o^q5@!PkwrN`s;MG*jH9K zH%*v!7gd>Q^s36BiK33gm(G$gyu*v+7#7enZF0A{za+k@T#Sf`-x z2cORw{@i3KwKtnvGW-d2cjzw)vimId*89izMa7ppD4`~{rN?B0O|#>k#q~_uPFhzj zX9&Z-VNtXJ$cf&ZNyLU!MVUl|6B9^fm7AS{zLM>Xg&mlM!*40PmVVdEta_b)@Oii_ z6B%;t(^b*8$%9>d_v=0Wcr5bK=V5kkG=psc9ZH?D4`r%3ugPC$QvarV9RzM0N*!Bv z=$GxyT~8~N>dw)tj|Ixhcqx@~Oe)l8kSD77qt0>D(!`JkwF^3BLGWe3xODp5E`fI`kZ7MdO0f<HZ4V!JO^V( zkjM}=f1Th}db%;;efom-H1=*C(zcPYV^wNg)^QQWB)Q9c7!3pHO!U|&Z8387S1ztR z>6K_ixE4(#(DK`B$P~3jYMAcQfp+jyYDp2Lh4&SX{O2C*IC3tB=F;JO6APEbjvqr> zI(ErNdMzxD7yKTnb_J3mLXHQx+6DMTvXhUD9=scdAH~f$R?2A(5_~!NY0MAkCw-jG4(#wvT@#U0k6+>tk!g~(k#?NJTW&aA21+~-}8-tr-Ur|_#82;j?JQhLN z(y{DE511qnU0N*~!Xjxef&=ICm9g6sN38b2*4(8B_5VTonsj7f|1y0&n69e(Gsmk* zkEEdM8QcBsO(e+wo>i<-0aEv;Y%8hC($nvf*)@kJZ5dk^N>wU-fo5LJOf1l zs$7)Encn%?w%*;;&YQROD~H>s01=puo)pg*iQe><5@ko>f_)05!ya zSqCnGmzT|C$nuy8N9$P6pbz z>ztV>YontArIJx-E6P}vhS9cCg`Jd`L*H|)c&NH}h$%7O02@tJw8c1(pc{2(C$+^^ zV`)@$(js&bNJRr3n2TeT6m^zW6M;*^7U@v7s***QLpVM>>0!M1c;4O|Cz)VgJyL7e zuI}QaG;8Gv*iv~j7``(3xKL=mV3MyDTxq2B0*a{MbE3A|K_ae>P7LX|y~y`0BY1gz zy``P;aV5#ekTRpPSS?rFS5M(W34BJ$S5ZmLkxz1?xqPFkH>!yzCQ1*#^QAI=w_{jy zab8@Co;`a6^;@6`1xxN*esyXzEAHW2vd@W&2d2ic=#G$)$%PF_eI*pi$bqST#u3GO z8?%w%*IS~`Q`(K>imNrbO;~Zp;rtnGTm`)*12K)zgep^;HiOYzKqwu#8evvvpC&`c z*W~zJQZy0vQ+u8|gmiB-(&e?KNE|tlx@!yFM~dYz!pA#`Wlb>LaoHy+Y9u=;cXM0j(KPYFSkw{-TOlfU?}1BfPaXd%F{lo{Jg2>8z80u!}N@ zEG_7ZT~iu7MnJs@*(*NXU`i5EcE@-nzQVgtj!e^jz#9_4Fb*0#fEyBc&q_jL+Rfmy>8z+&OE)nPbnF zIz4A_$oTkE62E`X)Xbf%h6MPE%lP8P|Z8hx4- z-8O*@ufS?_!M0BnA4Zf%)@Ifkbg1G zz=7RBRXtj`oeuYLz0I!5;vt=-^s3Vy2_M7W`E&BkR3RVr53yUSrf+v4-J1V?FSls< zXkB+_zA?oz78Zhn3BJzUJVU!R!4U2(EyF)(yRiB+$w@8q3H6{&RS5DH*{z9f{?35N zQJXH^y7niP^CK7j-qK|YoP##eFFOm6Ds{#U8<<|ke6n?C;;5adbkL^A#FTO$lCPzW zY?rDdQVf#8xiz8eE&cFma;Y}cA#BRUu5}DR0KoZJumq-J@$~$A#Q}TR8zqriBbIc7 zvTt!*U7m*(A{qnT7ztmOlpvBjVDqyG7o;K&FF9iU?bY~;Lv zL{x_PyOxUC@FSS%+y53TXiPl8%&hn(8uz>95>U$((mon`H-Cbp8@|B&dfgDBp>126 zdxt)re5fPqX+)~GikAp6+XT{LiZjB{!X^uRwrqUVyNa3S}IAVylqIJ zrWo|ClRAKv6{lyt)k?I_se+v@%8TjKWpy*`4UPHO7U@%N&fk8l9%yq zu1&;D<@#}Rrm;G8c_|h*d&w>A*AaRicVVHH#c@{L#6=h(wy$2sWxjE0kl#h5ymU<2 z#WMyog+wSqb*w^*raTcDc}Xx=bqrk_b^$P==EdbB)`9A*9G_L~?_)@~c3{-vWbw*> z^LuUJcLR%*;g{&_^pb9ZpbAOD-mv;NNDSqsXKC*{;*7X=BUC5t{#rK4;CyzL&Fz^C zB)ej>h$=f16|6yJ_Elda2qd+_Z7mSOYNaMJn6hv0wBy9P5ZHYa4Z8IMqEBJ3Q8tCXM+tGz- zeVx`4^AmA-D<)A+Dd0s`PI;im+@H~IP#Y56&~BI-hsyvq#DZ=0{>9Dn3h^KYlmcNr z@MQ)6+Xqtm=L+CXanfCgM-J@Xyg7hLRElVn9S4vh2dR9BC|tuuL~mfs(Kx?ZBMf{R z^t)>t`C(Ga6zxmaY_A~iiS=VmVLsj>aG7!gx` z{47YDaU|06;@j}rpLZGE4OR-QBY)Q*hr!m z$h$eJsy#VyR9JX|jIgcZjJNcKk7?^L!%4!WtGW0bvG@xO!_3jv*1k+m$;a|waCBpJ znl~TZN=hZk_;Cqo9^j&I2m%5+6mso>zoTS|Ac2<#4h$V%gFF5UJrj=O^Voff zTm|i@YSiOYQ^cRszVC=XTsMtukB^*$Ee(R>>#H@EgM_KE%qc1KYt9y+tIRoiCA9Ca z@cCw-BO85z-cVXeb?PvIGxXyUrvpK#4fWK{I=7O6*pkIfk_| zbq9KsihQwp4B`5Gl-&3l-cW0uw%C0FX>RPlak?f&wXJfx^3*|%Kh8mV@aRmrQIhoZe@Gj|*2fZ{%`o4CB{^_>JE`E)e}2{~Zd1e6!8$mlBWc zy;aNS)ql$tQRt9+aW>#S2CK+*xo>z*}qM`0Rt@VO;2;Xjz zKjn>)w;yFwqEZU%>Z4{-=POXxff=hi%TVs@8`3x3;nTgnoR>d_l>+3`57VDrp=VuK zNKUw|Y;;zdXg*!&&r52dy<`$e;H8-yAif*HWR_eoPoPE5mWbkkq*h`o>Pc1gk|9K3 zG@3MMoQk@tEDT9gVUtCcb%HAt2sIFj3pScub5C&oc)stXb=PuI)npNk5E>J zEs6%*qj}0AQ3k1IpmcxA3v6AaG_#)aXA!op_|>E5O`9gQysgsw9Am&)OPh4@FX5!aA3Ag`)O$U(ctZn=Bp#L6e4q zggI37&tSKK6Ux!t^id!j?jk7S*F=w`nxd@z*al_x&h;E@e$Hdv{oCL5CS+v%p2$ff zOZ7=A(Y#QkVwhc3FP;sx^1@fHZqDj`V^+A_KHxquY}n2I=_g zjGL*!F*TjDGWq*~!4NPj8M9+FOPM_ID^{HHe>??wwGS#f&!aK<+-b+h4M`rnJ-z-N zU(61FygZt|Jlw4q2Tmz`U2s@I()xGe$>m1kJyGS}siz*B`V6Vig-P4}eFQcy^_6k2 zI>^?T;EZ1QA~NXz2zm`XH57h!i72}M55?0Gm98z?i6XHThNi0d0Vq6D@~prT<@soo z=9n38w;JpAwnYpXBDO+?h7kGt@E^qc?teqPyOi%%Gs0xWnT+w% z4q%1fu&ECIwRV4rp9K;iba&)ocx~o)iLfbz&eK?V4plt8lFOSi98#=cjXVG4Zc?dd z%0(i@rfSV^eos~W@@55c`1QK@ymCtsh*2DPcX~#>aU%SMzvsI1tLwdp>9iI1hFtA7 zD<*{AjaxTtUCpxG^Q%%$D^BtyaJ9?Q)MMRLuW**l%VxUF@2gXYn*Pb9(I59oz?I3- zhJD)Aulw5BS(bJi*Zj7r|Irqp_1MLu(ViFhg8b?LlfJ4!t1lS3o3gmYQY{mwaD~Q< z@AeN)Iuf>E!6-Z74_*g>f-?1{DN@sUkDU zD9>Oj2Q4rwpWiKgU8CWJMqB;Ap%U@olO z@>%xF(OVnS+Yfg{(A&@2%gq)gox4Fa<`c)0O8|{~ey$veGT@Y+_S@ur*&OUbUjnr? zi5gG)nSzm*Kmu6|MoOIuuVNFMNHybdlbe==X^|}39kB)7H6Q#Bu1+1;FY>I%Bi z9Y+dngV+O4KNM`xrxT%9vX1_%dX+p7G`FdBqpsL4h$;uIyn~sSKyK8A+oRbx>Tx;a zTyRSu3UA1_*=q9iB=GGI4p@MRqq1+9Rza(Dk=Zv!tDrjrU2ihn4>Q?qnmp?FW=kN6 z_u24#`)p1lU(P3zoJQc;(T_p1Uu|=*bJdD$0SD9>WupyCtd1iQjFj&`SpMpi)`r|N zXxoBszv`@cg1u@I0_@t{_|c)Yg79Ut_9>BjUx*FkF5iQi83QhhM5+=^8K#i7o=|=z z_Wl$Jg8G`8Lc(Fr;MviKOR1qfN5f8 z@nm|kNQ0jUG1!jNBv`Y>=l3-#s76nPKT53#O~;#n=}L#kked&)(zZab35hZ!c(Muf zh4l$xQ%lezQuL#u*Cbja;~;1sJ#zm-fwLA{+f@wzBKz`CB zU2EjOUhfGy;CL7K>zS)kaK(zu^7Xmsx8JkarvwS{?+pP5aHcg*uRjTivvGOHVw)c* z(RKw@PO*pU+E1>n4ehSxm%WB;NF=C#rr3BJG|pLI^1WPBpCJ;)>gDdyHSrYCc7a{kDmF zx{tUL9gtB2PZ6PlZP7mv4=$cv7G3_1AW$1S3sKtJ0j$AxobOyUOQZsC-2+^#4jYYo zrwAGL3aK`?wT6okXQ02oDVxZY5+F$E@F|6=lwaFN^ z^J!8NT$OeWMf@I70_5&xijL) z9UgyZqkg+1pF3k&t>m|&I+*mbGdLF}MJiC@>dT8rBBV8EFF?#?Bj26dA5LEd*4~{BSgu)YcOACj6h2wyFnVT6 z@8I4iyf?d`82MO}4EpwTN0XK;R&APZQ(nHGB~mppyu9xBFw%rKxN`$qE}wK}H95Rx zmZ%xw*2j;1l2vGY$h4Hf)X_MR_!*EnNF3PaP`u$pf71Dzcjbv_erfuGZ2tS{?ThtP z9Z4X&gz1r0T=I?T_RpU~Je^R3G1@szL__XWMo+}Sa)E7E@Y72FXvZqJ^0f3rDk-tCk6WDcZPu5pZ)30_iqfp-is}g zY-vcma#H45Y^g>@dBO1u7gS!5jVvLI=+?Vo`WP<9YUp)*4m*o+$!0^VqQ*r0f0 z=>J3dNN#YN(;IbQuAtZSI{N5lDAvi=5h0cKcAfHCdS&kTZ4xtp_f*VVJHb`eZBXI$ zr@*5O5qJL2!uaxbW2*Z206+5K=UeukEVAq1qGy4EeBq9*f#?7V15F8D8D$J2?VrI> zc$>Xc_Xnr<1S3lcRQ`8u6gg5h2x!c{ePNE6gojZ_!P)_sb5h@#04?ow}lD3b&T zDS>x&(|!=NfS>RIoxhK1Hu{VM?=jvRU$@B8&60IOkebQHs?UjElJU;j&ilu*SZIOCVd3xr6Ma=pm@lYT}462z1)yR2z)Gb5qA_9q_|gqcXo&?SCeo* z)H~W{iVEhZF+P^X8Kj#qk|H721bQYPv(=_gHi8TeGc2B-Yvvis)&-<0UmQJ*d%g-! z=0shO$z>*d>$(3jxt!4kRKEh`J_a4vRGai7ydN3=;PWfH3$a zmUTHtikxrb#7vLwmz@`1iJMpd-n-Q85zJR?B9&%K91AwyJ#7|iQ>2@QGg~W$q!~KS7NW6U&1Xmd%B;ISGP|^8`K$!u;C%b zTD4p{rM@wN3@4uHaG%@VOJ08hh5ABFz{$-}$vC^K?L<*fZ?`di!u#bQ^RD z&l*14ev=u?f5JT8)BKs=R#b498zs@q%lHK1xSgJvyVCw>T*v+A@W$J1EUyjoMfn43 z%ZoPK%8&+Nh}5Vb%`221M=4Et$fGDJen>JO?!10P8|4@H;lk?ufo;?IN1p{y(owAL z`L{eA1H`I(#_hmW?F25bQHUfd5lBO7x~Kl@$)qvI*`clqLJA zkm55%&U<~18qQTm*-wF%y&UX>tE-$T@7{LDRYEt0oW5eqEl73%6W;TXcS#GzU---t z$=NV7sW~(f%hL6BI-3ST$wL?W&N6C*-{uF(R_{i$942Zx0BIkVvfAxce1tKIv7i-w zLYDX$Z{SLB%C()q^8AXC?MGz9-uhOoO=rwAF4SP?%#}#H-2T z_DPogAS%+(CGM^#h;C&Ay#Pk@$2(nWS?OULVru!Jih^PT_lZ%uZe^K?vvo)5 ziPQxF6cr3?JT~BT`bjAM&=bKYW($<;;c%G`<11T6(ZnOiBuKpN$}u;r^7eEV%2aq~ zElU_wO(G^NKO^YvQR%1g@{Q;#TyiASIjOP&Q z!B61|iQQ#%Xt>|}4@ zpjs>>OJjVgN@0fIeQ03Gm6V5%lHSuEfZJbjrHyyI>}(}8;`Iv(_Vm~z8p2m^@Kvey zX>C2)}S#qotRZWbS?i^SbM|lg`omo z8xOXMuTK1DLO7|Um7t=L-)iK5Ur*8O>Qx%<-tgZy6x@{*_MR}2Q9xzbfB`B9xM;6= zjz2Bgo&*XI^}*Z5|3!g2tfUp2LhX0+pgI*34s-Y27MCLO)|0QfVu=<^amjk&F@G&h zE-oC?Ve_q$qDQhT3gAjdl7P~Qqlqo|{~}6qSRYR47APxP!g0$KlyPMDN`dbqp_r*`{zWagN!$zj}F%_LGN|CtW z_B~POZr3}7jcUbVn}Ur33oQFvsHhPPHc|wcO0LJsQb$jv_zdPH+{>w^HsnIC2@-GM z8i+~-sc7XUM*90r%{?Nn_;9uso`f@YD+W9$R}9k(1T|Tc%GO3|`w$!m5n+6?OGYMT zOI>4!V7#F_AC#C9J5#w+_`g*M-D6@~mSMNK@T2-m)23t0wZM8a(SsF(0A>tviev`! zA;`x$CUoK2spY))!L?baoc`r77zFDjQU;FuGJS@{lzGb39oiTC+Pt}M_fH}LuZDb& zx{^3l1x^XpSvW;|!r}$A4(dDb#bc!_)XALVf({GYLIvm7MeLS>4fO1H>wKMtb7MsK ziMJ>$d(_>c@ZK&&rV9>A*~zTaIa6dDNR4I=L-s&eC7hbDP1eqD+gtT;Q!utaFq^6PsP^Q8pDRIHZC%5Ftgw>UptF%ylnZ&Hz`G$98NMLO9f?PJqYyKwtBOT)JI zRja*!XNhi?jV0Rx+|?f<2W6rhs-%5KiIfX;PpB6KsmUMY-d3|;dOtUt6y%Q%I7 z$NC!~FYzP|tRRni*==5-BK2zziifKp+mB+RnUTf;^5@Ugpe`m;pohcHK-1?-PL3IL zuF44$_=7nHf3N{d4qCFrR+LPKBhI*sTPle8oETlFtKE+l2D5LPh&oEgpJYaJ*etcR zPGrZWk!V*@q@6Q_iXIroBe9~axQ8C|N|8OjK4i2~jk7o@kzkr$AuqIRoMt&wE2`WhrBSLNX6GeBqKZmeTf-x3U;TB+p_ckV386& zZ?F$J9BDZ986g`LJTFV#f0aT z_p*hDjQW(khGmQGY<1x>(L6lJlXXcEHi!@vtH)^VR6%_)$Z#HHuwvza(CEfYWVVhB zP0MZzOC{|}GD4zL?$PK(W|-Im1PfLCem*_J?{P>%+en>Zxww>0m`Qrb%pPS9x%fu+ zW7SaRF64%xjQNCuffGh9Xa;fRx~%1-FYdw1JoU-c2D11f;rQQ?ubTr!mwjxR$DuWX z%j^7rJxTVF&>CHXI%%iIB%MB@eCD0oH({HRA=-h`OwLHPc&?(_`mm+J+UTW4u~Y8s zA=)CvsYM+GH(btn+Na!Ae{b_~AccviZ_#YG!b1aehH~wl>1SyS1nzu~aTSRUXcLVv zA%Rv8gej*ETXUHGuDc|?B7`VNPM_y6NM)^XIjV!~dfM zEAZ^4QMl*zPcWIM$>$0dX@RHR;A*?X zG=r?AE`^l@ql@t;4n;VamWOC#W)7YS6y&814o4+zD^r`ULVfS7YFcVGbod==KLxK=lOoG&CRzrFKJ+YHgk z_rWt7{|~$&bhR%Ai?VEb3=T&91FC-JQsfcwH*;@TM8iN(Wzn2bIw5>f8rL+2Nm(oV+$-%Nf*KZISx`YsD6fpl*s!)OkZ;)4>bcf5Owze?_1^5`f2j6;z>dA+jx3Sc!0!6aYQpx` zrA&}PSZ?IK9E~>6cOTZl*VFR`AxC5*Ou)r_d;fpsJj~+Ue-EMZ$#eJc`;x0$b@}yi z`}dwVvFX5{S^Et7R;Ob<)8|(V2JxZMKUBYcACl*y5rvnA|0I5lpZPKJaC-k29vq6S zR$R=(9iv12N$x*S2rs`~Cesa)^lw)4mr*M~tzf6KfqW$1!#s_7$2%kRF8fJ)vj7K^X+5uAuW22f)W z3d*VbfxOMP5GM2R5Q2WY26cjZwl;%RaR5J0wW-!Bsjqd?(Xgchrk5a!3Ou>5;VALaCdiinVszacHi4`cINcyp6R|l zeeP7<`fgR-`U=(YZVtET9M3;Udmc7b$)CgKg?|??McSWUXRp$bY`_C$0g`pTe5w_% zJ>UaK_cMi9bqXX0!5>a9wz{=}*o0|O_O+NK;AOG-P-*a@BzkeuaH}vHp!IoLyh_K4 z^jhjtC8}cLLDuN8UfuDtQZ$J_H_#REES}h>`))K2i`K`NHNz`sHd&`AI*i=w&Q^RF z>Q!NhgM_zI5X?B$B8)2{60&MGI>G0?VBd7r&1Oj|0$k^0(82zN43c%kVV^r0X;6X+G2 zE&x`e>pd&#wVF{~QaSCU7DgZy#dBX$rOgOdlx%z~2tQt8%E+;0U`JWrRQ*%z7;79) zvbWqIB(v_Xdw+2erMl#ZyyV5iOeUd zph1~+vHV}0D>V6T1?)JfI4(_W{=t|A?donnG1m@^F|)|`97gd7O&sQmkJZs~^YEYq z6C$z-V&g91R^4G#TrK!tyNb^(r2AEHt@0PBgdNDEB{hhD5KUiN(o`3mIQa>H5?A7l zrNs8=NxpB z#qp%I<{c(;8Rl{7f7yqnzMF)xMIIfol}UeWrD z9zWw8^r050n32dp5oEK4K8e!cTtfqPdcB^zrEOeX&k zz*a1;Cy}%a&Il8fN-+i&SU87mLD70(!1@?@YxEjzFH-RI0}_Kc^+38Fv3zoEsVp$m zbjEtOdipqY0C+l30y5LmP7rFtzl*(ncK^LL_P!N^r$l|TU#0gl;|kxJ7zF$eLWk!v z_1KxhJKcs=0VExT^2mDxK}%x&7ZVy|(+%*n?*N57GXeD7$Rg@5(*BHRvtpyr8%-Jz z?Yc>8|0l~%wVKS&P}FlFPxQOSP5w6D`2s_MEI>)Tqs525S`-YrC2)+`bhL+h+M z^m!@jxn02<;N>KSkz>KqA7`@8tX9b*3YjZO+=uf+IH|>Xd-r>i2MqW_ zl0R*1F*MAw8JGz7anA(#a{izw2nonOn`vNyB3B`)v3Tn!(d4Me_xsx>S*V4KO4dhR z4d+rClx*+Ay7B3(UBKkqTSxjjW*i=5c-GOb%HaY`^OZ$uw}2b$Z$H1dpNi;eGPE*F zm?v$8#^E!u=p|D1#JL*e6D9pa*j?Bcr>&Q#!-Z>JKwOLT&+4*UwO~o<5HK<<+GtW3 zb0Lbi$J10C6WdaFv&R$e2%KTV`CK1f5Af3hrN`-(_WuRDd5QPR_4U8N#1=pk6647k zWVCcjmU6L#LhE!r5D%SGo&Xa=m7$`Fq(z(?yPx;L+v~v5$0`lM%QGmr?Q5Wvllh4= zKTbVh8UUPOw6}+X+l^Jvt<~uy?OkoLQVby<&-Gr7&>YGVv<0jYW2+J;*CdN_LRGSm7f48=_v|3iT^>29cEc3S zC!Sf2Pn^vwy_Tsa4K1+Hy*3(3Cg=_9!@sw95UY9Dg*jVy?leY@#7q!V5TV_lcVA6L ze99_UTH`G2s44{yxchL3k$S)%7Gdl0KJ7SK>{8n~$>6-u$eAnQ^)ov3Lz?WP=bEg9bL`8r@+V^dCS5M*n5pmOA&x(RmPw_* z*VL>IqEc|VemULVnCCm5w=J(7p5YNmjS{^w!n|{!`(1i`W!29`Q|fOca5LrrN-UTA z&@>`4&Z?E@a<~}%<7;z1OA0MhvI##zY`Z1-A6#{Hb0*SQgCOa3Rh{QOp_cDV#h~}5 z21h}=a^Xi4o&h}xNz@GK#uq2gdyB^cYRSW$b&hHio~~K?9lbU`ZEx8~da-_rOnvb< z6U4tw{#L}DDSvU|ySG?F)ilHJDCCQg{x zefy$)c=EG{3pwBZ^kUtz%ySw}KB7f$M2JD-`H`Z!91#o8UUX5X{h4HzB?919U+|VX zKP)%zCze!FBPJpmilSfsJ&UnwX&y5NvyFnIk1%%&Syx3O+ZMFe?=#{Dujl^FpEb-7WdfL*;S^G{}_r?@oOHVsL zNHKf&@5gDAB!0VW|F$J?iP#6zuWBhI$p~+?%si4P?SA|1t3+kA@k+u(3n6KW`U&4X zL`_7KOs!N`xmpnJvahXVBr4G*Oe7MfP30 z-^BM;<$9iP;$4js=6pYlxQ14VI4`8={5ID71~RzZ6PtBAxBS-0zH7^vS9eu2ag!od zF*L1?8w>|E=0X?mo_Clrv^t&qL%xd*kdn@ta}}=EsZd$(S0e!6ju_zKB?5lZb_w1YhLtUp9uH-1%dz-nof5R#5#N3o4jX z-&lghl;plkMWck|<*D*jVB%__Px3p5xyp(Vk!daJi%%UZ4N?>SV1iFolJEcc(si~@7|oLZ5d`OW?(4(vQmhzT zBAu=%d4kB*X53l5=)0QFb$!tPg?&R@*NfUZi@<_lli(<#5`-eGrRMb`G`AMo^G(8R z!|T5cCOylI6o%!R1M`@Ai2~D;&Fe&V4px57>K`ZOXCFyc1IP>w?773)45MJnbX(^= zn*<_(MzA#OYLNgXK<$#1nJP|Zu66e)jb~Fr!UgsA#OEV%B+yn;LGAz%Si7(2k=zipe3;sPL$A)Ya4|iCKt@8o2iiLB^(RR?Z%P|_aOs)vTay4R^ zI1xWL0b!Hr|39>AS^64kmi`PN!nGw~kIo0()hr66N~H7Pq_{U6x|d~|5~)W~_C|&H6&oC)g>Nf6IV*CsUQN zkWE62liGXgkr>LEUgq?&#MA2mHU6^vasUrr|DYtH$!xF`fZ&VtLq}zbCSfDoOTpvg zRi9x?71hj$zh=xz4jJFTOVcu8Rli#UNb?bkZ|#o#gY#PDVQkk*+fI1#yL)<(goP#d zZw^$SThT;tN{g*euxJtitlK`M%`?cf_3o1sbm=wGf$SJm4;8`egsljM%=LF`TKKE( zC)OEdMD}#i*NwqmT?7i>Eu|GIxU@#BxN#T}37A~EO$*?*c_q+gj0NDroJ9VhFT0SfP6?0s> zqv{`LwarLY%k1wiGNDh~vkt=U=)>^x-X}VWd7AE2;wfk(nmr%gfmm8s<0Ebp!0pTd zx=+!@bV{M(QYb;V@m$^80EXYL3;C%z)WEZLYjt#;WCinx*HAXGA3Jt+cPrA-%i^Ow!cCA*^^Br~0Wb6n!c11qB#S z{Let=7h{*IEG^OgNc{av!Ojcaehc(XaUgD@uW6#12*EprnYlr>?JqdH)lI!=A$zoz z=nR*%NB=|qa1se=M+i{K(5-=ZefR&^z~9qMAj&+G8J7B}(KC|t-|@f`tD3M4ap_3b zVY^msyTrHknrPM?-xlcM-zDf_*`k1s9hLO_m`R3PRm^+c0wTBig)KeXD0ePA8$({6 z(Li%9Z(_lsvBlm$=WLh$ACnG8f;2-CE!&^TVi{{=m-T5Hn@ue-GakxS1@)=W^1KW- z$|v-tVwGc(skQyQ&+fy(!nO9-iB9244D{)}WzhuLpG;IgNXkn! zQU~@;c`T2H_^WBE@B5xyLr+JKe9Om>ls2;+Ac-0Ldf1O)kYJHBh~S%M7bEapW!*itOnCqxY2<_|}X2_5|3 z{J)AmR3Mt!_1?907?k^^f#y3_Ac|Pu&4ws+ybb+D*!>MykNd1|+rn)A<~Qwsom&LP za^IUZ>j*)*LOjlYx!+xZPIi{}J$#?AaPpgUDz=dPylWT)$e)Ueybh=cG(pF^+6fhP z`##L1Z5pqfy03Tciyj`YBXtw|ibV(0;`)|r!p~mq6q<%cSz3v;8=tzzf5~J&gadPp z&|!rYL*?AuPS%CveJT7Qp!0TaR)mTJZl$ANi{V4J+xz#ByDtd?3%5u0KDSlh>^w5D zL|U%hZ}-~DMPAb^z#r*;nWJ;R)n6T$ZI3;gH|StBT#58ISuWc9BKj3~=xe~SzNA-- zZ|58@$2E>DgwF0lqLm)^XSePTc$CW8fBQq}+#Oqrh(G@M+6J+Aj(@xLslRdPZ-{pM z)V8kcZAC7``JpZ{5;Rl`HIi$)W8QzDQCW*r=NNiMz4Lx6x;Wcfc!dN3aFIl@hG-i@ z!hYHmVBYXJG=B`KV_n!WLBB$j=b2LW?m4eK=IxW9aK+qlpW3Ty04ZJTDyp{^%PG(= z$pSNAa9k;UmNA;jPEUiXJSM`MPJ)kRK5L^N^-Jo2`3*Fm2;k_r3ls(qth+nY*j#Ph z#@?+OLf3LV`E{(z=<8==|Ms7bo{v3_jXbb?A5b>1l68O1IT|poSav^FVzV+d#&O^8 zm0ZB&!W>P(!t-Sg8<;BNXM(O+!HI?C`t7}Qa^(yt+-d$UWYx=Zk%G#v7T`H@+oJuJ z2+EZIeow(#f*)#QgG|R+or~_;<#kQU%f~sE`1YzqaocnH`sc(yhl*cQU4BkNk^YT9 z1JK@^lybGqu&J1Rb0+_%YftnDl+kiDDSEKx<*3oRZ@85^ebUwzl~r^kaO5aun90@A z3)wr4c)WTB=j@oiT|M3Q&y7U+HLiFU2%02THeecfJXv5>uHPBIiL4G+iN0MM=1q?o zKw=@H%JOjV_%JXq$S`7A3{otOFkDE`PYBQr9=h2%n%dU!aPXPJ!2I`xlF$n&Bx=#i z$)Hz|y@Nr6?!?SY*v*XHjM>agPW^17%m&=oz=wX$rMnS%1-C!*7V0H_%GZ?mh$e;W zE6LS-RMku1jHet~Zgg~INCz*DqRYw-5pA$B!Se>c$XwB20kB~Kq9zM(iIf*uq=+V+ zK`>Ne!My<|lqi`mKpa{zGD=`j<`MDF=|<0#*&ioySxwd{1sY;OB&*H^E%l<_8K`^742_+qa+P6ySdIkCr5>o zf_U1yJtk|;IY7c*J!)qm-&pAy*QCroyG8V5%gE=)(S0SUBKC}-NRkELvP(RhkabwC zYHmNwausS0D7E{%&R&w9I(X)<`jN~qf)s7&u;Wu{D(j^*bi>+uWxb6tQ0|Pxzrgo! zZlidEaku();GaP`MX&>Fu07knB%13_O9$V1yL27N(*mW2;Wr~b`{sg0sO=G?-eaB; zxJSu)D$~1LSOw!2n&XL4(A9}kTX1N_a2#O&V@?x8V}MeV%;hcz_@BP)hEo{$?w_=M z5xsr6-8b|ar+BSv|67ocH2`9x=JCJm-&^yl-ZJc-PXlrg+ObiS||6o1e&L~l_RgkCro^xhcl$bDdV zDgIFKVyz%$#Oei(g}GMq^R*;7?o4*rZxrX{Z7IQL=?MlDUS5ohc{p~guO2r(IRw1S zc>Qj5b#1%X{JjC4^IZIjyncoR@Ls5?s5p~2IlGykUfdauyio+d{rve;EkZtj9r68z zB3$N6c62-6lW)GK6eL%tFmEds2Ma8`EYx0ex!?Zg)A3xf-)InE@2U$_0`o7&A@OCc zu5?^J?d*>6+Mk}BuC6Q|5$|DDWZR28Z}c~S=RI3RPEXnqDW+m$#(86fnl?R)Uaxgu z`G7`?4WdqGcGnmEQYXK89ZzrN40HsS^%fVZ8<)Hp^AHCxZN+Ko=~ND)j0b;;Dc1^_q?u4le6*4ISfe07Rf;dQA{QPNvhJihW!w|o z=%dm@NBkig$2+Ujh|RkoKS##r)apt@o69O0m+ESenkfA9uL!I`AM^N~Q97H7__dEV zv}J__oRkD=Vr05NvH=`^aD6eX8>7L@1t&aNlysLo=A6P`R}0qe1=ZX?rm!QX{9^U~ zXSIG_6^kobck4pb1*gZ)g4Z2Y6os#qiTQ?7pUkyVxiYsO3EQs{@4__uMR5#?Avs+G z=`Ux}n(X>*KGs_M;$5%%<2wNX5&9comRo_Xu;zWl#bG!Eqs6rPpxz1Wc@?3h?rwGT1m_WTb_5 zw$$jK$2it@C4>>B7Q2OJVk!08U`u$C<)&E_X-=|vSx@KbrS1cIX&tki-V~KX`8&vu#=d-L@9>^P95z; znH$mViqVK?T7KyDS41EzyagthcCD>I42fPFEN*=B*NI<$O;$zuTi-NFi=t7etE<0E z?ya$9T9Yp>*fbjY{_(4r-=D~~CSO@tRMYI-jEvT86|53p32=3NZa6+49(?Y64KUdr z8*{Y#85^<<>F#&iA3Lo8d-%NW?VX*Cvq6w0V6YtR>^)xZ3?|qr_G#QYEe)0zU*G#- zw?Vc^1$C#7`8*u%8jZpW_8=_GikI9aYY+m0;-@!5!w^VEqX+YI!2Sr~9{vT`@jEs* z>Ge#|1ugcLGKbbXZ+eo&0^%tgT}Og}nJG!s!C*6SOB|v3h|W)}6OSPWVZ0&;e1mH& zd$0=CmiA0vR*pkIhKa&qNF~yuQ^YWNZQ=qc_f694)Bq%W?+!eanbS5@i3%9dF*so{ zq~nb-RtMnuDVVC&5Z1MdF(?Ye?5?PazhUPaP-5B#lFPZ=^a#|S9Z}Mp%yMwAxo0gC(9hBF5Fdn+ZM-v2 zP%)GbDRzjTx>6(Dzo{#H9|Vfr@JfQwl!U_-C&~?`k3#>kaJ`+jH|d_{IiU^9x6dE6 z-H)AUDr@s8l^G`1qKZ=p?1#&*i=}iIh>$moXGtAIgP}Uh)9Jd(9c4v{;w+c@O)>EL zTR$Up{jKqa3$)CL_2`B-X2m`{dal~tDamp~bfg@*US(1jb_BVyO#Ik3`T>6^yrQgA zDVMUzB1e#-DLK2CocK#8@dGJZ5K11e(=n$~!eOqiX76OS-(Gw3`Kw@=0Rlz(FPJlZ zjn@FlKBj%h*nJSBXRl{sufq?rg5kH{OvB4=6}3cU)>IH7m|)rNjfDC4Kd~Ta4I&DmqzkPAiOP1wRYWMvL_&kNB^dJCIXjD`K@LY%>B)$V=Xg zeo!{$?&L|XAyO92*c8wi4-V%9qN)*|7#S@Y2#HtzyY-?DxU~FOgwI)5oFDu@|;RobQ`DFv@mJW zWL8`rsl$`hIPuitvMzuhisT>BabmG0yk%ka2rI83P-9vY1No+&dG{PJQ?D>x`MUQ+ z!4q&D?jN{39Xpy|+bHUbpCBklZH|2-lg;4%8DN`i9KjipQI4af>Y+@3TVpI^kmuGo z8nY)KzBTNUqNf;xQ%V~;f^DwLM3nFuHl&;P!uaAi1gQ?TP9`NIvps517ETWs*e84v zDOtk-xep3^><~ZhEeOtXCdE>VB=Lb$X~$0G{>YzM$W^L@9;H#>2EzG*peN@Wk}xZUnG?r;{F?gGUl#60ehGbx1#N>6_Z+!j}0vB;z zN$b%*nF*c?TgsC6m%Th(~4d_9u>;dq_GxVqbGm7UZ_9-l){?G2^A^;V3D>@uogB?u8$+h$7b6p#Rq%i_hy zr4U8M7b6j~0XRb*#otTsrYgu-bFl3*Q>>&2@tmjuf zhQPg*lsrIULE~Us-Zh__+WKwT*7a4Q^VOxL6$(d(SjXSX>jE$3>!eU$)WZ$pi>s?; zBgOf;Rb3}v)kH@}d*kD&|94x@efIm+x3z{gxPUvmy30M9t<^v6-lmpS6anVG9Rc=@ ztLrO^S?9J>_siR-;McK53(*hf=biq~HI0UXPw?46V4-_P!poR9V|+Bu$`_?!N2xZaxteR``Z0jyp^!Y0{y^4BR|*A7IZ z{cUglOjgCd@qbr_f~lbHJ~Yep5z?;qjj9g>XpdKQN8QcbV_%x(O4=YNX?w#zY&%we zw{Ep_H!DYkQDz$wnWI3iK6&^HvovSt5YAmm&l|jv@b5r67DuHaEF$$y3KV*j9pZ47 z_uaV#a6_eVK|Im_C$@Y5g(b;_xhJrNt?1^`#6lkL5i*J~kh9{@#3?7LH@b0t&QcBp za%vdt=)sR2#xlCRenTfUDZE8Ri$`?hmtbfs#`oTr^=H?l-fk05p?Cb;s~FygYek`3 z4oMD15x2%qXng#T8^MDIj}z{PP47L?3;n_3AH*ThSyjGd*18K~cWm?D2vdtVQ+1e( z0if(CoNJ{*dptIcP|ohU9D#=g{k>iCW02dK75Uavi@hoAVV9mV$n7_t;3 z#-*Y;Fbx(YY9d2;YHI*GA95%%EduEHx$yD4-b|qBrob{|puK$qfjBkFDa-)e+ptju zHeNF>00o#D^aC^`4JNzk2}GQSk9ntL7vw68ya$D1P;$#iz>}VE@XnzVz>34Jdl1h? zot9co)G1trYe9|eR5p4L?|{RFDF`lFRyM6xlgz4sk!XUTtq^v7$dnBwK?rf+1I_j$ z0l6@vcO{BS5PTSZBDqfTNc_I64_DWxz=mWkOndVUZ1{AluOcDz9XJFZGnEeu;XQv* zQ=sz_S-A(+#b|Lon$v8Fhm;86D3{pb$uCqdAoGepAn%QypCJN3ytaU{P)q^T2M;X- zHI*ZPBv3|hB84Vn8ni&QNtL3OB&=dHK zEv?-9agQo4qCA?+SiB7v(E(dDHA@ELcBPxOpb6#}7l|(sPM7mEk#okS7*W{LDVY|- z_dRU~!B^ILIkEeE_#VUs!-9~Ji8*Ge z3(bqll6IsXVQhFnz1`Tkg=X>CoJuGgm`z6;s~T`IOig-t!W~8E?=WEROO;XAV6+Rh zn%^~ka>c}OCRR*aTW*U(8Q&z^+$gihPr(?NFopm+(l-%?W~)EcGaCUZ`;F1_bmay+ zrHgw5sqj#13@g8pXG&^_!IaR{4KKGvjF^0v9aUF`CHGj`WTGwzryOxpLzbaZ?c*-1 z?i|>4uVS#FH%~c7-BDl=gw3@r{f@S;Xu3!^qzN%pQF(-lX&=)cNbDYD?y`e=hc7-owl^94P z=(vBi_E8%F^Shf)lsHiSvpoU%bgtddZz=3i5Pbt8NaCD#7=~Nl$YWjt3UM|%7wQ5q zKc3_()^LG{;CJg5i9@!&?BpEpC@ci$gC~hAQ ztjDSAV|843DtyAnLj1jNqutP9WdDb&t9(+5by2d2%@*v1oMh0LS{8^f?@@SGfSGN^FDK%9Wkmv?D}hTQb$-}(97ybRZAPZy&7^x8n0OT^%=Ohd6v*?}<|tJ^ zHAC=Etuh~5q0n1DwVZbh24NP}KC-xqGLn4`v(Z1G^w4P~Dd(v>&5vl0EY-b>A7M(* z9wZSb$6Tg|zKAi8AakfsxZ1r`hs%5I9nglQ>yl8+VjgIiiYdsFuR#rZS#TLs}nJCobov&O+3$E{?RdI7>(> zt_Gy==~7`hbFx%pQ>{94MC`8agkZ}~>jb2x%+G(NGTSV8)58$hGQtUe{A%Nn5`0$! zHg6VDtYr9#ApOpV)LK;n*A?1bJ5xvW2y!D9b7Jp>#zWk>lv)}7H@ip>7Yi%`Y#>W2#OY2K%8Ad<8EoTr*`Yc`L zH#?Y_yx(g{9u#U=c*RXF9JeeaK7ICO!Pr^0ZVY~buXJusXnW`3oF}x>T=E13$7)#+ zTmJ@YfA?2toTC%GNaN~x0cIax6&KBDDATm}-??QFqLw$yGH^qpW0pUoWU=NwJ+I)2 z<7?BEZBP6)G{IpYx%SuLlXt0RqT_Xf0m7B0W5-L{BhcS>`|mmFoDzMVFG-`sURuyP zyR!D(bIuH>>dIwv+nf*Gqr+SUwJ2TF*YAZO={uSiaJ5*Qj5z@r6#2rU+nfn zGr@6I?$naJEvZ zDrqitY)vPhcmpb%gTFT}-R2+K+)etZ{)Qa=o0aoJs_vn}bNB}$J+`fI{jg*?mn|%Q zP{BtIwls7Q<(p_X>P7%7?Bh(Fgd*ol>_tW{f}mY_BCio_)z;BCCDtxDIybr|dV16A zwRR~O{wQgOoOOiKVL5Q7EyHhI=C9$pOsms?x}tFpFYZ}3#q<&}h8iY8eV?RvTc-DA70W*jJ0@j&iss{Rv1 z6Y5IZ{x_9LRQYv1chuG(aE??haK8CVTg%t{lSwt--#}BV5e(E7j%_g45NnFUVReBP zbCXe8QTS!YWa5zU7DVM*b-JzoPB8ykv;b)s>?-O5(A{=Qv6!F2JZmH;rdg|qiwM)R zQUQ(@6D}G7hjB>@gx?pPZ&I1!i>Mq5drS3(V$UTHqj9Dw%4X~qvc=7t3?rB}UP}Z0*ulU!huQ;!{@jdyi5#1+hx4u znB;~`Wz(XI(WZUzwZ!a=Mf`h8BMZD+|0~UPpKaFxZapwGHH5$Fa7d{nSDTs!Nz*Q9 zI3ahTe7XYr+9xY7=WV|}zbxd}(Hc_eOGjQ_-akjnEfhhP0KoTAe$5jey2qb3{ z`oD^fpA~`C5i|HPGBP{kcU&OvSl^s5Pm1xi(7ep3YOsEa1Q|GS}J7fzB1K@NuhpKK- zfl#;xQG!%(B`!-?FTZn~Ou#mbl0;I@jLQoEu)=vT>C96nV>SVLvj!Y;iBMwal=&DA z?^+2Nlp!N9Ww*_Hq=7(s?;iLs!@GxQ2~q{QbTX#&ezj0KWkyL_=bxEkx@4Z-hkHWK zG!kLeg$9i0M8ud|wj_Zl3vFab+ukn7QgbQ?IG!=qc1=@XF#bbW%}O+96RsDmppJ4E z5~Re$RgQ^|G7<7%Bmv`UpV=iqzejH*SPeez3+r z)jN`M1uqdo60oW2dp5{oEK1@J3D?-yfUJUr_anmSB ziOR&xhm<#Nn6d`9UML(FlQ`$*UJRkgehlitP@j2*3cAT7)S39)XK_j>8g(0>M7rj) zjW%FirtD_(6tfn2lsx$IK&`;4qMcWYd%&MqXOhm$P`jOQz?JUvyjvVQ4>8wYR>|KR zewBS`6YIx#@D3X05(T|Rk&XEu$v-@rEnT$<_?EV%86BWc5N8iPn}2u7lkkjEZ2p)~ z^-s_Gp=$PDdglHDDb8rfHh_4}k_4jW~G73Bo;V3jcTPqL6$;!#2* zWP^sUdc%RKgPCTGCrwGL*?$#NhJOJwIUTIha?7A70I>QNQ1NJ}@WwCsFy72Q{@OD^ zrop&<8me$rE+BB)f|r#>$7MY!npE7Kot#C#b3{ zkAV2^Bpmcz3f(W)!N7>XD0r)>e5KTMbg?xtcQCc0RB$vix1%(6F}1SxFsI~X<3P3l e_YJG7sg=3Cv8w>3y|Ig}qpKp9p_vI6>i+_63( 0): + temp = "'" + self.svn_repository.location + "'" + if hasattr(self, "hg_repository") and (not (self.hg_repository is None)) and (len(self.hg_repository.location) > 0): + temp = temp + (", " if len(temp)>0 else "") + "'" + self.hg_repository.location + "'" + if hasattr(self, "darcs_repository") and (not (self.darcs_repository is None)) and (len(self.darcs_repository.location) > 0): + temp = temp + (", " if len(temp)>0 else "") + "'" + self.darcs_repository.location + "'" + if hasattr(self, "bzr_repository") and (not (self.bzr_repository is None)) and (len(self.bzr_repository.location) > 0): + temp = temp + (", " if len(temp)>0 else "") + "'" + self.bzr_repository.location + "'" + if hasattr(self, "arch_repository") and (not (self.arch_repository is None)) and (len(self.arch_repository.location) > 0): + temp = temp + (", " if len(temp)>0 else "") + "'" + self.arch_repository.location + "'" + if hasattr(self, "bk_repository") and (not (self.bk_repository is None)) and (len(self.bk_repository.location) > 0): + temp = temp + (", " if len(temp)>0 else "") + "'" + self.bk_repository.location + "'" + if hasattr(self, "cvs_repository") and (not (self.cvs_repository is None)) and (len(self.cvs_repository.location) > 0): + temp = temp + (", " if len(temp)>0 else "") + "'" + self.cvs_repository.location + "'" + if hasattr(self, "git_repository") and (not (self.git_repository is None)) and (len(self.git_repository.location) > 0): + temp = temp + (", " if len(temp)>0 else "") + "'" + self.git_repository.location + "'" + return "(" + temp + ")" + + + def load_from_doap(self, par_doap, doap_xml = None): + self.idDoap = None + self.name = par_doap.name + self.shortdesc = ("" if (len(par_doap.shortdesc)==0) else par_doap.shortdesc[0]) + self.description = ("" if (len(par_doap.description)==0) else par_doap.description[0]) + self.homepage = "" if (par_doap.homepage is None) else (str(par_doap.homepage.resUri) if hasattr(par_doap.homepage,"resUri") else par_doap.homepage) + self.old_homepage = [] + if len(par_doap.old_homepage) > 0: + for ohp in par_doap.old_homepage: + self.old_homepage.append(ohp) + #if par_doap.created is None DateHelper sets to minvalue + self.created = str(DateHelper(par_doap.created).date) #sometimes dates a wrongly written: "1967-7-10" instead of "1967-07-10"; DateHelper normalizes them + #svn + if hasattr(par_doap, "svn_repository") and (not (par_doap.svn_repository is None)): + r = par_doap.svn_repository + repo = DoapRepository(self) + repo.location = "" if ((not hasattr(r, "location")) or r.location is None) else (str(r.location.resUri) if hasattr(r.location,"resUri") else r.location) + repo.browse = "" if ((not hasattr(r, "svn_browse")) or r.svn_browse is None) else (str(r.svn_browse.resUri) if hasattr(r.svn_browse,"resUri") else r.svn_browse) + repo.anon_root = "" if ((not hasattr(r, "anon_root")) or r.anon_root is None) else (str(r.anon_root.resUri) if hasattr(r.anon_root,"resUri") else r.anon_root) + repo.type = "svn" + self.svn_repository = repo + #bk BitKeeper + if hasattr(par_doap, "bk_repository") and (not (par_doap.bk_repository is None)): + r = par_doap.bk_repository + repo = DoapRepository(self) + repo.location = "" if ((not hasattr(r, "location")) or r.location is None) else (str(r.location.resUri) if hasattr(r.location,"resUri") else r.location) + repo.browse = "" if ((not hasattr(r, "bk_browse")) or r.bk_browse is None) else (str(r.bk_browse.resUri) if hasattr(r.bk_browse,"resUri") else r.bk_browse) + repo.anon_root = "" if ((not hasattr(r, "anon_root")) or r.anon_root is None) else (str(r.anon_root.resUri) if hasattr(r.anon_root,"resUri") else r.anon_root) + repo.type = "bk" + self.bk_repository = repo + #cvs_repository + if hasattr(par_doap, "cvs_repository") and (not (par_doap.cvs_repository is None)): + r = par_doap.cvs_repository + repo = DoapRepository(self) + repo.location = "" if ((not hasattr(r, "location")) or r.location is None) else (str(r.location.resUri) if hasattr(r.location,"resUri") else r.location) + repo.browse = "" if ((not hasattr(r, "cvs_browse")) or r.cvs_browse is None) else (str(r.cvs_browse.resUri) if hasattr(r.cvs_browse,"resUri") else r.cvs_browse) + repo.anon_root = "" if ((not hasattr(r, "anon_root")) or r.anon_root is None) else (str(r.anon_root.resUri) if hasattr(r.anon_root,"resUri") else r.anon_root) + repo.type = "cvs" + self.cvs_repository = repo + #arch + if hasattr(par_doap, "arch_repository") and (not (par_doap.arch_repository is None)): + r = par_doap.arch_repository + repo = DoapRepository(self) + repo.location = "" if ((not hasattr(r, "location")) or r.location is None) else (str(r.location.resUri) if hasattr(r.location,"resUri") else r.location) + repo.browse = "" if ((not hasattr(r, "arch_browse")) or r.arch_browse is None) else (str(r.arch_browse.resUri) if hasattr(r.arch_browse,"resUri") else r.arch_browse) + repo.anon_root = "" if ((not hasattr(r, "anon_root")) or r.anon_root is None) else (str(r.anon_root.resUri) if hasattr(r.anon_root,"resUri") else r.anon_root) + repo.type = "arch" + self.arch_repository = repo + #bzr Bazaar + if hasattr(par_doap, "bzr_repository") and (not (par_doap.bzr_repository is None)): + r = par_doap.bzr_repository + repo = DoapRepository(self) + repo.location = "" if ((not hasattr(r, "location")) or r.location is None) else (str(r.location.resUri) if hasattr(r.location,"resUri") else r.location) + repo.browse = "" if ((not hasattr(r, "bzr_browse")) or r.bzr_browse is None) else (str(r.bzr_browse.resUri) if hasattr(r.bzr_browse,"resUri") else r.bzr_browse) + repo.anon_root = "" if ((not hasattr(r, "anon_root")) or r.anon_root is None) else (str(r.anon_root.resUri) if hasattr(r.anon_root,"resUri") else r.anon_root) + repo.type = "bzr" + self.bzr_repository = repo + #git + if hasattr(par_doap, "git_repository") and (not (par_doap.git_repository is None)): + r = par_doap.git_repository + repo = DoapRepository(self) + repo.location = "" if ((not hasattr(r, "location")) or r.location is None) else (str(r.location.resUri) if hasattr(r.location,"resUri") else r.location) + repo.browse = "" if ((not hasattr(r, "git_browse")) or r.git_browse is None) else (str(r.git_browse.resUri) if hasattr(r.git_browse,"resUri") else r.git_browse) + repo.anon_root = "" if ((not hasattr(r, "anon_root")) or r.anon_root is None) else (str(r.anon_root.resUri) if hasattr(r.anon_root,"resUri") else r.anon_root) + repo.type = "git" + self.git_repository = repo + #hg Mercurial + if hasattr(par_doap, "hg_repository") and (not (par_doap.hg_repository is None)): + r = par_doap.hg_repository + repo = DoapRepository(self) + repo.location = "" if ((not hasattr(r, "location")) or r.location is None) else (str(r.location.resUri) if hasattr(r.location,"resUri") else r.location) + repo.browse = "" if ((not hasattr(r, "hg_browse")) or r.hg_browse is None) else (str(r.hg_browse.resUri) if hasattr(r.hg_browse,"resUri") else r.hg_browse) + repo.anon_root = "" if ((not hasattr(r, "anon_root")) or r.anon_root is None) else (str(r.anon_root.resUri) if hasattr(r.anon_root,"resUri") else r.anon_root) + repo.type = "hg" + self.hg_repository = repo + #darcs + if hasattr(par_doap, "darcs_repository") and (not (par_doap.darcs_repository is None)): + r = par_doap.darcs_repository + repo = DoapRepository(self) + repo.location = "" if ((not hasattr(r, "location")) or r.location is None) else (str(r.location.resUri) if hasattr(r.location,"resUri") else r.location) + repo.browse = "" if ((not hasattr(r, "darcs_browse")) or r.darcs_browse is None) else (str(r.darcs_browse.resUri) if hasattr(r.darcs_browse,"resUri") else r.darcs_browse) + repo.anon_root = "" if ((not hasattr(r, "anon_root")) or r.anon_root is None) else (str(r.anon_root.resUri) if hasattr(r.anon_root,"resUri") else r.anon_root) + repo.type = "darcs" + self.darcs_repository = repo + self.mailing_list = "" #TODO str(par_doap.mailing_list.resUri) + self.category = [] + for ct in par_doap.category: + self.category.append(str(ct.resUri)) + self.release = [] + if not (par_doap.releases is None): + for dr in par_doap.releases: + dv = DoapVersion() + dv.name = dr.name + dv.created = dr.created + dv.revision = dr.revision + if not (dr.file_releases is None): + dv.file_release = [] + for fr in dr.file_releases: + dv.file_release.append(fr) + self.release.append(dv) + self.license = [] + for l in par_doap.license: + self.license.append(str(l.resUri)) + self.download_page = "" if (par_doap.download_page is None) else (str(par_doap.download_page.resUri) if hasattr(par_doap.download_page,"resUri") else par_doap.download_page) + self.download_mirror = [] + if len(par_doap.download_mirror) > 0: + for dl in par_doap.download_mirror: + self.download_mirror.append(str(dl.resUri)) + self.wiki = [] + if len(par_doap.wiki) > 0: + for w in par_doap.wiki: + self.wiki.append(str(w.resUri)) + self.bug_database = "" if (par_doap.bug_database is None) else str(par_doap.bug_database.resUri) + self.developer = [] + #doapfiend has a bug: it does not load foafperson; we must parse it directly + if (not doap_xml is None): + xmldoc = minidom.parseString(doap_xml) + + self.maintainer = [] + maintainers = xmldoc.getElementsByTagName('maintainer') + for maintainer in maintainers : + if len(maintainer.getElementsByTagName('foaf:name')) > 0: + name = maintainer.getElementsByTagName('foaf:name')[0].firstChild.data + try: + mbox = maintainer.getElementsByTagName('foaf:mbox')[0].attributes["rdf:resource"].value + except: + pass + fp = FoafPerson(name, "") + self.maintainer.append(fp) + + self.developer = [] + developers = xmldoc.getElementsByTagName('developer') + for developer in developers : + if len(developer.getElementsByTagName('foaf:name')) > 0: + name = developer.getElementsByTagName('foaf:name')[0].firstChild.data + try: + mbox = developer.getElementsByTagName('foaf:mbox')[0].firstChild.data + except: + pass + fp = FoafPerson(name, "") + self.developer.append(fp) + + self.documenter = [] + documenters = xmldoc.getElementsByTagName('documenter') + for documenter in documenters : + if len(documenter.getElementsByTagName('foaf:name')) > 0: + name = documenter.getElementsByTagName('foaf:name')[0].firstChild.data + try: + mbox = documenter.getElementsByTagName('foaf:mbox')[0].firstChild.data + except: + pass + fp = FoafPerson(name, "") + self.documenter.append(fp) + + self.helper = [] + helpers = xmldoc.getElementsByTagName('helper') + for helper in helpers : + if len(helper.getElementsByTagName('foaf:name')) > 0: + name = helper.getElementsByTagName('foaf:name')[0].firstChild.data + try: + mbox = helper.getElementsByTagName('foaf:mbox')[0].firstChild.data + except: + pass + fp = FoafPerson(name, "") + self.helper.append(fp) + + self.tester = [] + testers = xmldoc.getElementsByTagName('tester') + for tester in testers : + if len(tester.getElementsByTagName('foaf:name')) > 0: + name = tester.getElementsByTagName('foaf:name')[0].firstChild.data + try: + mbox = tester.getElementsByTagName('foaf:mbox')[0].firstChild.data + except: + pass + fp = FoafPerson(name, "") + self.tester.append(fp) + + self.translator = [] + translators = xmldoc.getElementsByTagName('translator') + for translator in translators : + if len(translator.getElementsByTagName('foaf:name')) > 0: + name = translator.getElementsByTagName('foaf:name')[0].firstChild.data + try: + mbox = translator.getElementsByTagName('foaf:mbox')[0].firstChild.data + except: + pass + fp = FoafPerson(name, "") + self.translator.append(fp) + + self.programming_language = [] + for pl in par_doap.programming_language: + self.programming_language.append(pl) + self.os = [] + self.platform = "" + self.service_endpoint = "" + self.language = [] + self.audience = "" + self.blog = "" + + def load_from_db(self, idDoap): + logger = Logger.getInstance() + self.idDoap = idDoap + try: + # expression for modified, modified_release is due to a bug in mysqldb for BIT data type + cursor = CrawlerDatabase.execute_cursor("SELECT name, shortdesc, description, homepage, created, mailing_list, download_page, bug_database, platform, service_endpoint, audience, blog, IF(modified=1,1,0) as modified, old_homepage, category, license, download_mirror, wiki, programming_language, os, language, idDWBatch, idProject, IF(modified_release=1,1,0) as modified_release FROM Doap WHERE idDoap="+str(idDoap)) + result = cursor.fetchone() + #for each project in the batch + if (result is None): + #throw + pass + else: + self.name = result[0] + self.shortdesc = result[1] + self.description = result[2] + self.homepage = result[3] + self.created = str(result[4]) + self.mailing_list = result[5] + self.download_page = result[6] + self.bug_database = result[7] + self.platform = result[8] + self.service_endpoint = result[9] + self.audience = result[10] + self.blog = result[11] + self.modified = True if result[12] == b'\x00' else False + self.modified_release = True if result[23] == b'\x00' else False + self.old_homepage = StringList().load_base64(result[13]).plain + self.category = StringList().load_base64(result[14]).plain + self.license = StringList().load_base64(result[15]).plain + self.download_mirror = StringList().load_base64(result[16]).plain + self.wiki = StringList().load_base64(result[17]).plain + self.programming_language = StringList().load_base64(result[18]).plain + self.os = StringList().load_base64(result[19]).plain + self.language = StringList().load_base64(result[20]).plain + self.idDWBatch = result[21] + self.idProject = result[22] + #DoapVersion Table + self.release = [] + cur = CrawlerDatabase.execute_cursor("SELECT platform, revision, file_release, created, name FROM DoapVersion WHERE idDoap=" + str(self.idDoap)) + results = cur.fetchall() + for record in results: + dv = DoapVersion() + dv.name = record[4] + dv.created = str(record[3]) + dv.revision = record[1] + dv.platform = record[0] + dv.file_release = StringList().load_base64(record[2]).plain + self.release.append(dv) + #DoapRepository Table + cur = CrawlerDatabase.execute_cursor("SELECT browse, anon_root, location, type FROM DoapRepository WHERE idDoap=" + str(self.idDoap)) + results = cur.fetchall() + for record in results: + dr = DoapRepository(self) + dr.browse = record[0] + dr.anon_root = record[1] + dr.location = record[2] + dr.type = record[3] + if dr.type == 'svn': + self.svn_repository = dr + if dr.type == 'bk': + self.bk_repository = dr + if dr.type == 'cvs': + pass + #PATCH doapfiend adds a cvs even if it is not there self.cvs_repository = dr + if dr.type == 'arch': + self.arch_repository = dr + if dr.type == 'bzr': + self.bzr_repository = dr + if dr.type == 'git': + self.git_repository = dr + if dr.type == 'hg': + self.hg_repository = dr + if dr.type == 'darcs': + self.darcs_repository = dr + + self.maintainer = [] + self.developer = [] + self.documenter = [] + self.helper = [] + self.tester = [] + self.translator = [] + parameters = { + 'idDoapProject': idDoap + } + cur = CrawlerDatabase.execute_cursor("SELECT fp.firstName, fp.lastName, fp.login, dpfp.idDoapRole FROM FoafPerson fp JOIN DoapProjectFoafPerson dpfp on fp.idFoafPerson=dpfp.idFoafPerson WHERE idDoapProject=%(idDoapProject)s", parameters) + results = cur.fetchall() + for record in results: + fp = FoafPerson(record[0], record[1], record[2]) + idDoapRole = record[3] + if idDoapRole == 1: + self.maintainer.append(fp) + elif idDoapRole == 2: + self.developer.append(fp) + elif idDoapRole == 3: + self.documenter.append(fp) + elif idDoapRole == 4: + self.tester.append(fp) + elif idDoapRole == 5: + self.translator.append(fp) + elif idDoapRole == 6: + self.helper.append(fp) + except Exception, e: + logger.error(str(e)) + + def save_to_db(self): + logger = Logger.getInstance() + try: + # self.description.encode('utf-8') might give UnicodeDecodeError: 'charmap' codec can't decode byte 0x8d in position 118: character maps to + try: + description = self.description.encode('utf-8') + except: + description = self.description + try: + name = self.name.encode('utf-8') + except: + name = self.name + try: + download_page = self.download_page.encode('utf-8') + except: + download_page = self.download_page + parameters = { + 'idDWBatch': self.id_batch, + 'idProject': self.id_project, + 'name': name, + 'shortdesc': self.shortdesc.encode('utf-8'), + 'description': description, + 'homepage': self.homepage.encode('utf-8'), + 'created': self.created, # format: 2006-03-27 + 'mailing_list': self.mailing_list.encode('utf-8'), + 'download_page': download_page, + 'bug_database': self.bug_database.encode('utf-8'), + 'platform': self.platform.encode('utf-8'), + 'service_endpoint': self.service_endpoint.encode('utf-8'), + 'audience': self.audience.encode('utf-8'), + 'blog': self.blog.encode('utf-8'), + 'old_homepage': StringList().load_plain(self.old_homepage).base64_encoded, + 'category': StringList().load_plain(self.category).base64_encoded, + 'license': StringList().load_plain(self.license).base64_encoded, + 'download_mirror': StringList().load_plain(self.download_mirror).base64_encoded, + 'wiki': StringList().load_plain(self.wiki).base64_encoded, + 'programming_language': StringList().load_plain(self.programming_language).base64_encoded, + 'os': StringList().load_plain(self.os).base64_encoded, + 'language': StringList().load_plain(self.language).base64_encoded, + 'releaseUrl':self.releaseUrl.encode('utf-8') + } + except Exception as ex: + logger.error(' 1 - saving to db (parameters): ' + self.name) + + CrawlerDatabase.execute("START TRANSACTION", False) + try: + # If "idDoap is None" inserts else updates + if ((not hasattr(self, 'idDoap')) or self.idDoap is None): #insert + sqlString = "INSERT INTO Doap (idDWBatch, idProject, Name, Shortdesc, Description, Homepage, Created, Mailing_list, Download_page, Bug_database, Platform, Service_endpoint, Audience, Blog, modified, modified_release, old_homepage, category, license, download_mirror, wiki, programming_language, os, language,releaseUrl) " + sqlString += "VALUES (%(idDWBatch)s, %(idProject)s, %(name)s, %(shortdesc)s, %(description)s, %(homepage)s, %(created)s, %(mailing_list)s, %(download_page)s, %(bug_database)s, %(platform)s, %(service_endpoint)s, %(audience)s, %(blog)s, b'" + ("1" if self.modified else "0") + "', b'" + ("1" if self.modified_release else "0") + "', %(old_homepage)s, %(category)s, %(license)s, %(download_mirror)s, %(wiki)s, %(programming_language)s, %(os)s, %(language)s),%(releaseUrl)s" + self.idDoap = CrawlerDatabase.execute(sqlString, False, parameters) + else: #update + sqlString = "UPDATE Doap SET idDWBatch=%(idDWBatch)s, idProject=%(idProject)s, name=%(name)s, shortdesc=%(shortdesc)s, description=%(description)s, homepage=%(homepage)s, created=%(created)s, mailing_list=%(mailing_list)s, download_page=%(download_page)s, bug_database=%(bug_database)s, platform=%(platform)s, service_endpoint=%(service_endpoint)s, audience=%(audience)s, blog=%(blog)s, modified=b'" + ("1" if self.modified else "0") + "', modified_release=b'" + ("1" if self.modified_release else "0") + "', old_homepage=%(old_homepage)s, category=%(category)s, license=%(license)s, download_mirror=%(download_mirror)s, wiki=%(wiki)s, programming_language=%(programming_language)s, os=%(os)s, language=%(language)s " + sqlString += "WHERE idDoap="+str(self.idDoap) + CrawlerDatabase.execute(sqlString, False, parameters) + except Exception as ex: + logger.error(' 2 - saving to db (Doap table): ' + self.name) + + try: + #delete all releases (I have no way to determine whether it's an update of a record) + CrawlerDatabase.execute("DELETE FROM DoapVersion WHERE idDoap=" + str(self.idDoap), False) + #DoapVersion / releases table + #insert all releases + #created string "2008-04-11" + #platform string + for dv in self.release: #dv = doap version + # + if dv.created == "": + dv.created = null + yyyy = stringdate[0:4] + mm = stringdate[0:2] + dd = stringdate[0:2] + if (length(div.created)!=10): + dv.created =null + else: + dv.created = str(DateHelper(self.created).date) + + parameters = { + 'platform': "", #TODO: ADD dv.platform, + 'revision': dv.revision, + 'file_release': StringList().load_plain(dv.file_release).base64_encoded, + 'idDoap': self.idDoap, + 'created': dv.created, #non standard; found in Apache + 'name': dv.name #non standard; found in Apache + } + CrawlerDatabase.execute("INSERT INTO DoapVersion (Platform, Revision, File_release, idDoap, created, name) VALUES (%(platform)s, %(revision)s, %(file_release)s, %(idDoap)s, %(created)s, %(name)s)", False, parameters) + + except Exception as ex: + logger.error(' 3 - saving DoapVersion for: ' + self.name + + ' with File_release ' + self.release + + ' with idDoap ' + self.idDoap + + ' with created ' + self.created + + ' platform ' + self.platform) + + try: + parameters = { 'idDoap': self.idDoap } + #DoapRepository Table; each project can have just one repository of a specific type + #insert or update all repositories + if hasattr(self, "svn_repository") and (not (self.svn_repository is None)): + self.svn_repository.insert_or_update(False) + else: + CrawlerDatabase.execute("DELETE FROM DoapRepository WHERE idDoap=%(idDoap)s and Type='svn'", True, parameters) + if hasattr(self, "hg_repository") and (not (self.hg_repository is None)): + self.hg_repository.insert_or_update(False) + else: + CrawlerDatabase.execute("DELETE FROM DoapRepository WHERE idDoap=%(idDoap)s and Type='hg'", True, parameters) + if hasattr(self, "darcs_repository") and (not (self.darcs_repository is None)): + self.darcs_repository.insert_or_update(False) + else: + CrawlerDatabase.execute("DELETE FROM DoapRepository WHERE idDoap=%(idDoap)s and Type='darcs'", True, parameters) + if hasattr(self, "bzr_repository") and (not (self.bzr_repository is None)): + self.bzr_repository.insert_or_update(False) + else: + CrawlerDatabase.execute("DELETE FROM DoapRepository WHERE idDoap=%(idDoap)s and Type='bzr'", True, parameters) + if hasattr(self, "arch_repository") and (not (self.arch_repository is None)): + self.arch_repository.insert_or_update(False) + else: + CrawlerDatabase.execute("DELETE FROM DoapRepository WHERE idDoap=%(idDoap)s and Type='arch'", True, parameters) + if hasattr(self, "bk_repository") and (not (self.bk_repository is None)): + self.bk_repository.insert_or_update(False) + else: + CrawlerDatabase.execute("DELETE FROM DoapRepository WHERE idDoap=%(idDoap)s and Type='cvs'", True, parameters) + if hasattr(self, "cvs_repository") and (not (self.cvs_repository is None)): + self.cvs_repository.insert_or_update(False) + else: + CrawlerDatabase.execute("DELETE FROM DoapRepository WHERE idDoap=%(idDoap)s and Type='cvs'", True, parameters) + if hasattr(self, "git_repository") and (not (self.git_repository is None)): + self.git_repository.insert_or_update(False) + else: + CrawlerDatabase.execute("DELETE FROM DoapRepository WHERE idDoap=%(idDoap)s and Type='git'", True, parameters) + except Exception as ex: + logger.error(' 4 - saving DoapRepository for: ' + self.name) + + parameters = { + 'idDoapProject': self.idDoap + } + tmp_sql = "INSERT INTO DoapProjectFoafPerson (idDoapProject, idDoapRole, idFoafPerson) VALUES (%(idDoapProject)s, %(idDoapRole)s, %(idFoafPerson)s)" + for person in self.maintainer: + parameters['idDoapRole'] = 1 + person.save() + parameters['idFoafPerson'] = person.id + CrawlerDatabase.execute(tmp_sql, False, parameters) + for person in self.developer: + parameters['idDoapRole'] = 2 + person.save() + parameters['idFoafPerson'] = person.id + CrawlerDatabase.execute(tmp_sql, False, parameters) + for person in self.documenter: + parameters['idDoapRole'] = 3 + person.save() + parameters['idFoafPerson'] = person.id + CrawlerDatabase.execute(tmp_sql, False, parameters) + for person in self.tester: + parameters['idDoapRole'] = 4 + person.save() + parameters['idFoafPerson'] = person.id + CrawlerDatabase.execute(tmp_sql, False, parameters) + for person in self.translator: + parameters['idDoapRole'] = 5 + person.save() + parameters['idFoafPerson'] = person.id + CrawlerDatabase.execute(tmp_sql, False, parameters) + for person in self.helper: + parameters['idDoapRole'] = 6 + person.save() + parameters['idFoafPerson'] = person.id + CrawlerDatabase.execute(tmp_sql, False, parameters) + + CrawlerDatabase.execute("COMMIT", False) + + @staticmethod + def modified(dp_old, dp_new, attributes): + logger = Logger.getInstance() + #if there are important differences flag the project as changed + for attribute in attributes.keys(): + if (hasattr(dp_old, attribute) and hasattr(dp_new, attribute)): + old = getattr(dp_old, attribute) + new = getattr(dp_new, attribute) + #if they are lists sort them; if they are objects metadata_attributes[attribute] tells by which key(s) they must be sorted + #e.g. Version: sometimes the name is always the same and the revision is different; other times it's the other way around; that's why + # we need to have a list of keys + if type(old)==type([]) and type(new)==type([]): + if (len(old) == len(new)) and len(old)>0: + for sort_key in attributes[attribute]: + old.sort(key=sort_key) + new.sort(key=sort_key) + for iter in range(0, len(old)): + #I added logging but I want to run __eq__ only once so I store the comparison in modified + modified = old[iter] != new[iter] + dp_new.modified = dp_new.modified or (modified) + if modified: + logger.info("Old idDoap=" + str(dp_new.idDoap) + "; name=" + dp_old.name + "; attribute modified=" + attribute + "; old value:" + TrivialJSONEncoder().encode(old[iter]) + "; new value:" + TrivialJSONEncoder().encode(new[iter])) + else: + dp_new.modified = True #arrays of different lengths + else: #not an array + modified = old != new #I added logging but I want to run __eq__ only once + dp_new.modified = dp_new.modified or (modified) #it could be URIRef + if modified: + logger.info("Old idDoap=" + str(dp_new.idDoap) + "; name=" + dp_old.name + "; attribute modified=" + attribute + "; old value:" + TrivialJSONEncoder().encode(old) + "; new value:" + TrivialJSONEncoder().encode(new)) + if (hasattr(dp_old, attribute) != hasattr(dp_new, attribute)): + # attribute added or removed + logger.info("Doap.idProject=" + str(dp_new.id_project) + "; name=" + dp_old.name + "; attribute added or removed=" + attribute) + dp_new.modified = True + return dp_new.modified + + def search_other_doaps(self, id_old_doap, id_other_doaps): + """ + I search other doaps to compare with old ones and set modified* flags and to + set id_project to an existing one if other doaps exist otherwise to a new one + """ + parameters = { + 'name': self.name, + 'homepage': self.homepage + } + #have I found it in the same source? + if not (id_old_doap is None): + dp_old = DoapProject() + dp_old.load_from_db(id_old_doap) + #The comparison between projects is incapsulated in a DoapProject's method + #if the project exists already integrated compare doap and return true if modified + self.modified_release = DoapProject.modified(dp_old, self, DoapProject.release_attributes) + self.modified = DoapProject.modified(dp_old, self, DoapProject.metadata_attributes) + #have I found it in other sources? + if len(id_other_doaps) > 0: + # there could be more than one, but I expect them to be already unified within the same CrawlerProject (e.g. a project with doaps from different sources) + # should we check and raise an exception or log something? + dp_other = DoapProject() + dp_other.load_from_db(id_other_doaps[0]) + self.id_project = dp_other.id_project + else: + self.id_project = CrawlerDatabase.execute("INSERT INTO Project (Name, Homepage) VALUES (%(name)s, %(homepage)s)", True, parameters) + + def load(self, idProject): + logger = Logger.getInstance() + #logger.info("Loading batch " + str(id_batch)) + #cur = CrawlerDatabase.execute_cursor("SELECT idDWBatch, idSource, created, completed, integrated, cancelled, idWFState FROM DWBatch WHERE idDWBatch="+str(id_batch)) + #row = cur.fetchone() + cursor = CrawlerDatabase.execute_cursor("SELECT name, shortdesc, description, homepage, created, mailing_list, download_page, bug_database, platform, service_endpoint, audience, blog, IF(modified=1,1,0) as modified, old_homepage, category, license, download_mirror, wiki, programming_language, os, language, idDWBatch, idDoap, IF(modified_release=1,1,0) as modified_release FROM Doap WHERE idProject="+str(idProject)) + result = cursor.fetchone() + + if not (row is None): + self.name = row[0] + self.shortdesc = row[1] + self.description = row[2] + self.home = row[3] + self.created = row[4] + \ No newline at end of file diff --git a/web-crawler/dw_batch.py b/web-crawler/dw_batch.py new file mode 100755 index 00000000..86401784 --- /dev/null +++ b/web-crawler/dw_batch.py @@ -0,0 +1,99 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +from CrawlerDatabase import CrawlerDatabase +from Utils import Logger, Sources + +class DwBatch(): + def __init__(self): + self.id_batch = 0 + + def load(self, id_batch): + logger = Logger.getInstance() + #logger.info("Loading batch " + str(id_batch)) + cur = CrawlerDatabase.execute_cursor("SELECT idDWBatch, idSource, created, completed, integrated, cancelled, idWFState FROM DWBatch WHERE idDWBatch="+str(id_batch)) + row = cur.fetchone() + if not (row is None): + self.id_batch = id_batch + self.id_source = row[1] + self.created = row[2] + self.completed = row[3] + self.integrated = row[4] + self.cancelled = row[5] + self.id_wf_state = row[6] + + def initialize(self, id_source): + logger = Logger.getInstance() + self.id_source = id_source + self.id_batch = CrawlerDatabase.execute("INSERT into DWBatch (idsource, idWFState) VALUES (" + str(self.id_source) + ", 1)", True) + logger.info("Loading batch " + str(self.id_batch) + " of type " + str(self.id_source)) + + def writeToDB(self): + logger = Logger.getInstance() + logger.info("Writing to DB batch " + str(self.id_batch)) + parameters = { + 'idDWBatch': self.id_batch, + 'id_source': self.id_source, + 'completed': self.completed, + 'integrated': self.integrated, + 'cancelled': self.cancelled, + 'id_wf_state': self.id_wf_state, + 'id_parent': self.id_parent + } + sql = "UPDATE DWBatch SET idSource=%(id_source)s, completed=%(completed)s, " + sql += "completed=%(completed)s, integrated=%(integrated)s, cancelled=%(cancelled)s, " + sql += "idWFState=%(id_wf_state)s, idParent=%(id_parent)s " + sql += "WHERE iddwbatch=%(idDWBatch)s" + CrawlerDatabase.execute(sql, True, parameters) + + def complete(self): + logger = Logger.getInstance() + logger.info("Batch " + str(self.id_batch) + " completed") + CrawlerDatabase.execute("UPDATE DWBatch SET idWFState=2, completed=SYSDATE() WHERE iddwbatch=" + str(self.id_batch), True) + + def cancel(self): + logger = Logger.getInstance() + logger.info("Batch " + str(self.id_batch) + " canceled") + CrawlerDatabase.execute("UPDATE DWBatch SET idWFState=4, cancelled=SYSDATE() WHERE iddwbatch=" + str(self.id_batch), True) + + def integrate(self): + logger = Logger.getInstance() + logger.info("Batch " + str(self.id_batch) + " integrated") + CrawlerDatabase.execute("UPDATE DWBatch SET idWFState=3, integrated=SYSDATE() WHERE iddwbatch=" + str(self.id_batch), True) + + def notify(self): + logger = Logger.getInstance() + logger.info("Batch " + str(self.id_batch) + " notified") + CrawlerDatabase.execute("UPDATE DWBatch SET idWFState=5 WHERE idDWBatch=" + str(self.id_batch), True) + + def latest_batches(self, only_other_sources): + """ + When integrating a batch or looking for other DOAPs belonging to the same project I + need a list of the latest batches coming from a source different from the batch or the main doap I have + This method, when only_other_sources=true, provides the list of IDs of such batches + when only_other_sources=false includes also a batch coming from self.idSource + """ + old_batch_ids = [[] for i in range(Sources.count + 1)] + for iter in range(1, Sources.count + 1): + if not (self.id_source == iter and only_other_sources): + parameters = { 'id_source': iter } + # Most recent integrated or notified + cursor = CrawlerDatabase.execute_cursor("SELECT IDDwBatch, idParent FROM DWBatch JOIN (SELECT max(IDDwBatch) id FROM DWBatch WHERE idSource=%(id_source)s AND (idWFState=3 OR idWFState=4)) max ON max.id=DWBatch.IDDwBatch", parameters) + results = cursor.fetchall() + #just zero or one record + if len(results) > 0: + record = results[0] + # if it has no parent it was not split and there's just one batch to consider + if record[1] == None: + if record[0] != None: + old_batch_ids[iter].append(str(record[0])) + else: + parameters = { 'idParent': record[1] } + cursor = CrawlerDatabase.execute_cursor("SELECT IDDwBatch FROM DWBatch WHERE idParent=%(idParent)s AND (idWFState=3 OR idWFState=4)", parameters) + results = cursor.fetchall() + for record in results: + old_batch_ids[iter].append(str(record[0])) + return old_batch_ids diff --git a/web-crawler/eclipse_data_fetcher.py b/web-crawler/eclipse_data_fetcher.py new file mode 100755 index 00000000..8958a38f --- /dev/null +++ b/web-crawler/eclipse_data_fetcher.py @@ -0,0 +1,236 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +from generic_data_fetcher import GenericDataFetcher +from CrawlerDatabase import CrawlerDatabase +import urllib2, urllib +import json +from Utils import Logger, Configuration, Sources, UrllibHelper +import base64 +from lxml import etree +from lxml import html + +class EclipseDataFetcher(GenericDataFetcher): + ''' + Crawls the site http://projects.eclipse.org/ using the JSON information found here: http://projects.eclipse.org/json/projects/all + Maria Di Girolamo 2020 as reported in the wiki documentation: + The PMI provides a small number of web APIs: NO LONGER SUPPORTED json/projects/all . + Now you can export only a specific project: json/project/% + Export project data for a specific project. + e.g. http://projects.eclipse.org/json/project/technology.egit + If you want export git project : + 1) json/repos/git + Export all Git repos with project association + e.g. http://projects.eclipse.org/json/repos/git + Maria 2020 bisogna attivare queste classi per eclipse: + + 2) json/release/% + Export project simultaneous release participation data + e.g. http://projects.eclipse.org/json/release/kepler + + 3) json/technology/% + Export technology type information + e.g. http://projects.eclipse.org/json/technology/modeling + e.g. http://projects.eclipse.org/json/technology/internet%20of%20things + + 4) jsonp/technology/% + Export technology type information (in JSONP format) + e.g. http://projects.eclipse.org/jsonP/technology/modeling + Iniziamo con json technology IOT + ''' + ''' + MDG luglio 2020 + carichiamo tutti i progetti di qualsiasi tipo di technologia + ''' + def __init__(self, source): + super(EclipseDataFetcher, self).__init__(source) + + def run(self): + ''' + Loads JSON information: http://projects.eclipse.org/json/projects/all + ''' + #response = urllib2.urlopen('https://projects.eclipse.org/list-of-projects/json/projects/all') + req = urllib2.Request("https://projects.eclipse.org/list-of-projects?combine=&field_project_techology_types_tid=All&field_state_value_2=All") + eclipse_html = urllib2.urlopen(req) + self.tree = html.fromstring(eclipse_html.read()) + eclipse_name = self.tree.xpath('//div[@typeof="sioc:Item foaf:Document"]/a/text()') + print eclipse_name +''' +start comment MDG 2020 + decoded = json.loads(eclipse_json) + for key in decoded['projects'].keys(): + p = decoded['projects'][key] + + ep = EclipseProject() + ep.idDWBatch = self.batch.id_batch + ep.name = key + ep.id = p['id'][0]['safe_value'] + ep.documentation_url = "" + if len(p['documentation_url']) > 0: + ep.documentation_url = p['documentation_url'][0]['url'] + ep.wiki_url = "" + if len(p['wiki_url'])>0: + ep.wiki_url = p['wiki_url'][0]['url'] + ep.title = p['title'] + ep.download_url = "" + if len(p['download_url'])>0: + ep.download_url = p['download_url'][0]['url'] + ep.parent_project = "" + if len(p['parent_project'])>0: + ep.parent_project = p['parent_project'][0]['id'] + ep.website_url = "" + if len(p['website_url'])>0: + ep.website_url = p['website_url'][0]['url'] + ep.description = "" + if len(p['description'])>0: + ep.description = p['description'][0]['safe_value'] + ep.bugzilla = "" + if len(p['bugzilla'])>0: + ep.bugzilla = p['bugzilla'][0]['query_url'] + ep.github_repos = "" + if len(p['github_repos'])>0: + ep.github_repos = p['github_repos'][0]['url'] +end comment MDG 2020 +''' +# I take just the first as in a doap file I can have just one repository for github +# "github_repos": [ +# { +# "url": "https:\/\/github.com\/eclipse\/californium", +# "title": null, +# "attributes": [ +# +# ] +# }, +# { +# "url": "https:\/\/github.com\/eclipse\/californium.actinium", +# "title": null, +# "attributes": [ +# +# ] +# }, +''' +start comment MDG 2020 + ep.licenses = [] + if 'licenses' in p.keys(): + for license in p['licenses']: + el = EclipseLicense() + el.name = license['name'] + el.url = license['url']; + ep.licenses.append(el) +end comment MDG 2020 +''' +# "licenses": [ +# { +# "name": "Apache License, Version 2.0", +# "url": "http:\/\/opensource.org\/licenses\/Apache-2.0" +# }, +# { +# "name": "Eclipse Public License 1.0", +# "url": "http:\/\/www.eclipse.org\/org\/documents\/epl-v10.php" +# } +# ], + # releases in Eclipse see more than one project involved +''' +start comment MDG 2020 + ep.releases = [] + if 'releases' in p.keys(): + for release in p['releases']: + er = EclipseRelease() + er.title = release['title'] + ep.releases.append(er) +end commentato oda MDG 2020 +''' + # ep.tags = p['tags'] #mostly empty otherwise IDs of tags (where can I take them from ? I do not see them on project page) + # ep.documentation = p['documentation'] #always empty + # ep.downloads = p['downloads'] + # ep.logo = p['logo'] + # ep.mailing_lists = p['mailing_lists'] + # 'marketplace' + # 'build_technologies' + # 'dev_list' + # 'build_url' + # 'source_repo' + # 'related' + # 'other_links' + # 'forums' + # 'working_group' + # 'state' + # 'build_doc' + # 'scope' + # 'plan_url' + # 'proposal_url' + # 'techology_types' + # 'build_description' + # 'gettingstarted_url' + # 'downloads_message' + # 'contrib_message' + # 'team_project_sets' + # 'update_sites' + #MDG 2020 ep.save() + +class EclipseProject(): + ''' + ''' + def save(self): + ''' + Save the project and related entities associated to current batch + ''' + logger = Logger.getInstance() + try: + parameters = { + 'idDWBatch': self.idDWBatch, + 'name': self.name, + 'id': self.id, + 'description': self.description, + 'download_url': self.download_url, + 'documentation_url': self.documentation_url, + 'wiki_url': self.wiki_url, + 'title': self.title, + 'parent_project': self.parent_project, + 'website_url': self.website_url, + 'bugzilla': self.bugzilla, + 'github_repos': self.github_repos, + } + self.id = CrawlerDatabase.execute("INSERT into RAW_eclipse_Project (name, id, description, download_url, documentation_url, wiki_url, title, parent_project, website_url, bugzilla, github_repos, idDWBatch) VALUES (%(name)s, %(id)s, %(description)s, %(download_url)s, %(documentation_url)s, %(wiki_url)s, %(title)s, %(parent_project)s, %(website_url)s, %(bugzilla)s, %(github_repos)s, %(idDWBatch)s)", True, parameters) + if not self.id is None: + for release in self.releases: + release.id_project = self.id + release.save() + for oss_license in self.licenses: + oss_license.id_project = self.id + oss_license.save() + except Exception as ex: + logger.error("EclipseProject.save() " + self.name + " - "+ str(ex)) + + +class EclipseLicense(): + def save(self): + logger = Logger.getInstance() + try: + parameters = { + 'name': self.name, + 'url': self.url, + 'id_RAW_eclipse_project': self.id_project + } + self.id = CrawlerDatabase.execute("INSERT into RAW_eclipse_license (name, url, id_RAW_eclipse_project) VALUES (%(name)s, %(url)s, %(id_RAW_eclipse_project)s)", True, parameters) + except Exception as ex: + logger.error("EclipseLicense.save() " + self.name + " - "+ str(ex)) + + +class EclipseRelease(): + def save(self): + logger = Logger.getInstance() + try: + parameters = { + 'title': self.title, + 'id_RAW_eclipse_project': self.id_project + } + self.id = CrawlerDatabase.execute("INSERT into RAW_eclipse_releases (title, id_RAW_eclipse_project) VALUES (%(title)s, %(id_RAW_eclipse_project)s)", True, parameters) + except Exception as ex: + logger.error("EclipseRelease.save() " + self.name + " - "+ str(ex)) + + + diff --git a/web-crawler/generic_data_fetcher.py b/web-crawler/generic_data_fetcher.py new file mode 100755 index 00000000..25347ec0 --- /dev/null +++ b/web-crawler/generic_data_fetcher.py @@ -0,0 +1,37 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +from dw_batch import DwBatch +from CrawlerDatabase import CrawlerDatabase +from Utils import Configuration + +class GenericDataFetcher(object): + def __init__(self, id_source): + self.id_source = id_source + self.batch = DwBatch() + self.batch.initialize(id_source) + parameters = { + 'idSource': self.id_source + } + self.date_last_completed = CrawlerDatabase.select_string("SELECT max(completed) from DWBatch WHERE idSource=%(idSource)s", parameters) + + def htmlMonitor(self, html_text): + fetcher_file = open(Configuration.path_html + 'fetcher.html',"w") + fetcher_file.write(html_text) + fetcher_file.close() + + def writeMetadata(self, html_text): + + html_file = Configuration.path_html + '/metadata.html' + if os.path.isfile(html_file): + fetcher_file = open(html_file,"w") + else: + fetcher_file = open(html_file,"w") + os.chmod(Configuration.path_html + '/metadata.html', 0o777) + with fetcher_file as outfile: + fetcher_file.write(html_text) + + diff --git a/web-crawler/github_data_fetcher.py b/web-crawler/github_data_fetcher.py new file mode 100755 index 00000000..4877c556 --- /dev/null +++ b/web-crawler/github_data_fetcher.py @@ -0,0 +1,669 @@ +# This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +# +# Copyright 2014 Bitergium SLL + +#MDG March 2021 +#Owner Engineering Ingegneria Informatica S.p.A. +#Update the code for the MORPHEMIC release 1.5 + +from doapfiend.doaplib import load_graph +from generic_data_fetcher import GenericDataFetcher +from Utils import Configuration, Logger, Sources +from datetime import date, timedelta +from CrawlerDatabase import CrawlerDatabase +from subprocess import Popen +import glob +from calendar import monthrange +import json +import os +import traceback +import time +from dw_batch import DwBatch +import requests +from requests.sessions import session + +from webob import Response, exc, response, request +from __builtin__ import False +session = requests.Session() + + +class GitHubDataFetcher(GenericDataFetcher): + + def __init__(self): + super(GitHubDataFetcher, self).__init__(Sources.Github) + + def run(self): + + months_of_stats = 0 + dt = date.today() + batch = self.batch.id_batch + #print "init dopo batch" + while months_of_stats < Configuration.github_archive_months: + #go to first day of month + dt1 = dt.replace(day=1) + #back one day so I get previous month + dt = dt1 - timedelta(days=1) + year_minus_cursor = dt.year + month_minus_cursor = dt.month + gas = GithubArchiveStats(year_minus_cursor, month_minus_cursor,batch) + #do I have data for this month + parameters = { + 'stat_year': year_minus_cursor, + 'stat_month': month_minus_cursor + } + if CrawlerDatabase.select_int("SELECT COUNT(*) FROM rc_gh_archive WHERE stat_year=%(stat_year)s AND stat_month=%(stat_month)s", parameters) > 0: + months_of_stats = months_of_stats + 1 + elif gas.filesAvailable(): + gas.fetchFiles() + gas.processFiles() + #print "prima di send data" + #gas.send_data() + #print "dopo send data" + #gas.get_response() + gas.limitBatchLength() + months_of_stats = months_of_stats + 1 + + + + +class GithubArchiveStats(): + + """ + We fetch statistics from http://www.githubarchive.org/ + an instance is taking care of a specific month in a year + """ + def __init__(self, y, m,batch): + + logger = Logger.getInstance() + #Maria commented on March 2021 + self.batch = batch + #logger.info("Batch is : " + str(self.batch)) + + ''' + if not (batch is None): + self.batch = DwBatch() + self.batch.id_batch = batch + logger.info("self.batch.id_batch: " + str(self.batch.id_batch)) + self.batch.id_source = 1 + # I create an array of IDs of old batches for comparison; one for each source + self.old_batch_ids = self.batch.latest_batches(False) + ''' + self.y = y + self.m = m + self.mm = "%02d" % m + dt = date.today() + dt1 = dt.replace(day=1) #go to first day of month + dt = dt1 - timedelta(days=31) #add 31 days so I go to next month + #dt = dt1 - timedelta(days=31) #add 31 days so I go to next month + self.mm_next_month = "%02d" % dt.month + self.yyyy_next_month = "%02d" % dt.year + self.url = "http://127.0.0.1:6543/projects?update=0" + + + def htmlMonitor(self, html_text): + print fetcher_file + fetcher_file = open(Configuration.path_html + '/crawler/fetcher.html',"w") + print "dentro htmlMonitor" + os.chmod(Configuration.web_path_html + '/projects.json', 0o777) + fetcher_file.write(html_text) + fetcher_file.close() + + + ''' + def writeJson(self,a): + json_file = Configuration.path_html + '/projects.json' + fetcher_file = open(Configuration.path_html + '/crawler/fetcher.html',"w") + if os.path.isfile(json_file): + os.remove(json_file) + #print("File Removed!Now I'm going to create it") + fetcher_file = open(json_file,"w") + os.chmod(Configuration.path_html + '/projects.json', 0o777) + else: + fetcher_file = open(json_file,"w") + os.chmod(Configuration.path_html + '/projects.json', 0o777) + with fetcher_file as out_file: + json.dump(a, out_file,indent=2) + #json.dumps(a,out_file, indent=4) + + ''' + + #print('write file json ') + + + def send_data(self, updated): + #possiamo dopo inserimento in metadataproject spedire al + #knowledgebase: provare .... + if request.remote_addr != Configuration.KAaddress: + raise _401() + + try: + CrawlerDatabase.connect() + + except mysql.connector.Error as err: + print("Something went wrong: {}".format(err)) + + + + print "estraggo i miei metadata di idProject and updated=0" + + try: + cursor = CrawlerDatabase.execute_cursor("SELECT idProject,project, description, versionUrl,version, deploymentUrl, url, idDWBatch,updated FROM MetadataProject where updated='"+str(updated)+"'") + results = cursor.fetchall() + print results + if (results == ()): + print "There is no metadataproject to send. Please, wait....." + #Attenzione : questa operazione non deve essere simulata la risposta REST + #Spezzare il metodo in due parti restituire True o False o array vuoto + #se non ci sono i metadati allora il codice continua + #altrimenti gestisce con il rest + #return + else: + for record in results: + metadata = { + 'idProject' : record[0], + 'name' : record[1], + 'description' : record[2], + 'versionUrl' : record[3], + 'version' : record[4], + 'deploymentUrl' : record[5], + 'url' : record[6], + 'idDWBatch' : record[7], + 'updated' : record[8] + } + _VALUES['value'].append(metadata) + + l = len (_VALUES['metadata']) + + #soglia_max = 100 + soglia_max = 5 + num = l/soglia_max + resto = l%soglia_max + page = [[]] + i = 0 + num = num+1; + int_max =0; + while (i Configuration.max_batch_size: + logger.info("limitBatchLength Batch " + str(self.batch.id_batch) + " has " + str(batch_length) + " projects. I will be split to reduce its size.") + while batch_length > Configuration.max_batch_size: + # I load the current batch + current_batch = DwBatch() + current_batch.load(self.batch.id_batch) + # I create a new batch with same info as this one + child_batch = current_batch + # new one is a child of current batch + child_batch.id_parent = self.batch.id_batch + child_batch.id_batch = 0 + # insert to DB + child_batch.initialize(child_batch.id_source) + # write to db all fields + child_batch.writeToDB() + # now I have the new child_batch.id_batch + # see here for explanation of following query: http://stackoverflow.com/questions/1513206/update-multiple-rows-using-limit-in-mysql + #Logger.debug("Batch " + str(child_batch.id_batch) + " created as a split of " + str(self.batch.id_batch)) + logger.info("Batch " + str(child_batch.id_batch) + " created as a split of " + str(self.batch.id_batch)) + parUpdate = { + 'idDWBatchOLD': self.batch.id_batch, + 'idDWBatch': child_batch.id_batch, + 'max_batch_size' : Configuration.max_batch_size + } + sqlUpdate = "UPDATE DWBatch SET idDWBatch=%(idDWBatch)s WHERE " + sqlUpdate += " idDWBatch=%(idDWBatchOLD)s LIMIT %(max_batch_size)s) tmp)" + #Logger.debug("Batch " + str(child_batch.id_batch) + " created as a split of " + str(self.batch.id_batch)) + logger.info("Batch " + str(child_batch.id_batch) + " created as a split of " + str(self.batch.id_batch)) + CrawlerDatabase.execute(sqlUpdate, True, parUpdate) + # let's check again if there are too many projects + batch_length = CrawlerDatabase.select_int(sql, parameters) + + + @staticmethod + def statsAvailable(): + """ + Returns true if there are N months of statistics in the local database out of the last N+1 months + where N = Configuration.github_archive_months + we look back N+1 months because testermonth's statistics will not be ready the during the first days + of the month; so it is ok to have the last N available even if yestermonth is not there + """ + logger = Logger.getInstance() + months_of_stats = 0 + how_many = 0 + date_cursor = date.today() + while months_of_stats <= Configuration.github_archive_months: + dt1 = date_cursor.replace(day=1) #go to first day of month + date_cursor = dt1 - timedelta(days=1) #back one day so I get previous month + year_minus_cursor = date_cursor.year + month_minus_cursor = date_cursor.month + #do I have data for this month + parameters = { + 'stat_year': year_minus_cursor, + 'stat_month': month_minus_cursor + } + + if CrawlerDatabase.select_int("SELECT COUNT(*) FROM rc_gh_archive WHERE stat_year=%(stat_year)s AND stat_month=%(stat_month)s", parameters) > 0: + how_many = how_many + 1 + months_of_stats = months_of_stats + 1 + logger.debug("GithubArchiveStats.statsAvailable: Do we have any stats to process?" + str(how_many >= Configuration.github_archive_months)) ## DEBUG + return how_many >= Configuration.github_archive_months + + + def filesAvailable(self): + """ + Are files available at http://data.githubarchive.org + I assume all files for a month are available if first file of next month is available + """ + proc = Popen("wget http://data.githubarchive.org/" + self.yyyy_next_month + "-" + self.mm_next_month + "-01-0.json.gz", shell=True, cwd=Configuration.temporary_directory) + return_code = proc.wait() + if return_code == 8: + return False + return True + + def fetchFiles(self): + """ + Files are per hour with name: YEAR-MONTH-DAY-HOUR.json.gz + """ + logger = Logger.getInstance() + self.html_monitor_text = "Github Integrator
" + + #Pavia: for day_iter in range(1, monthrange(self.y, self.m)[1] + 1): #number of days in this month + for day_iter in range(1, 3): #number of days in this month + #for day_iter in range(1, 2): #number of days in this month + #Maria marzo 2021for day_iter in range(1, 4): + for hour_iter in range(24): + #Pavia: for hour_iter in range(24): + #for hour_iter in range(10, 12): + #Mariafor hour_iter in range(10, 11): + sz_day = "%02d" % day_iter + sz_hour = str(hour_iter) + + if not os.path.isfile(Configuration.github_file_path + "/gh/" + str(self.y) + "-" + self.mm + "-" + sz_day + "-" + sz_hour + ".json.gz"): + proc = Popen("wget http://data.githubarchive.org/" + str(self.y) + "-" + self.mm + "-" + sz_day + "-" + sz_hour + ".json.gz", shell=True, cwd=Configuration.github_file_path + "/gh") + return_code = proc.wait() + if return_code == 8: + logger.error("wget http://data.githubarchive.org/" + str(self.y) + "-" + self.mm + "-" + sz_day + "-" + sz_hour + ".json.gz" + " returned error code 8") + + def processFiles(self): + logger = Logger.getInstance() + project_name = "" + release_id = "" + #logger.info("=== MA siamo dentro processFiles=====") + compressed_files = glob.glob(Configuration.github_file_path + "/gh/" + str(self.y) + "-" + self.mm + "*.json.gz") + for compressed_file in compressed_files: + proc = Popen("gunzip " + compressed_file, shell=True, cwd=Configuration.github_file_path + "/gh") + return_code = proc.wait() + + uncompressed_files = glob.glob(Configuration.github_file_path + "/gh/" + str(self.y) + "-" + self.mm + "*.json") + #logger.info(Configuration.github_file_path + "/gh/" + str(self.y) + "-" + self.mm + "*.json") + for uncompressed_file in uncompressed_files: + with open(uncompressed_file) as f: + content = f.readlines() + for line in content: + try: + decoded = json.loads(line) + # GistEvent lines have no repository + if decoded["type"] != "GistEvent" : #not interested in Gists + #To speed up testing restrict to ReleaseEvent + #if decoded["type"] == "ReleaseEvent": + repo = decoded["repo"] + actor = decoded["actor"] + logger.debug("Parsing event type: " + decoded["type"] + " from project: " + repo["name"]) + try: + if decoded["type"] == "RepositoryEvent" and ( decoded["action"] == "created" or decoded["action"] == "edited" or decoded["action"] == "renamed" ): + try: + project_description = decoded["description"] + logger.debug("Found description:" + project_description + " for project: " + repo["name"]) + except: + project_description = "" + else: + project_description = "" + + #print("DEBUG!!") + #time.sleep(1.5) + ''' + if decoded["type"] == "PullRequestEvent" : + payload = decoded["payload"] + pull_request = payload["pull_request"] + deployments_url = pull_request["deployments_url"] + license = pull_request["license"] + language = pull_request["language"] + logger.debug("deploy " + deployment_url + " license " + license + " language " + language) + ''' + #Pavia: in questo pezzo di codice incrementa gli eventi relativi ad un progetto gia' conosciuto per il periodo preso in considerazione nelle statistiche + #se in precedenza abbiamo trovato una descrizione del progetto aggiorna il relativo campo + #print self.y + #print self.m + #print "BATCH in GitHUBINTEGRATOR ha valore " + #print self.batch + #print "BATCH in GitHUBINTEGRATOR" + #logger.info("At the moment developer is " + str(actor["login"]) + "for project" + repo["name"]) + parameters = { + 'project_name': repo["name"], + 'description': project_description, + 'stat_year': self.y, + 'stat_month': self.m, + 'idDWBatch' : str(self.batch) + + } + #print (str(parameters)) + if CrawlerDatabase.select_int("SELECT COUNT(*) FROM rc_gh_archive WHERE stat_year=%(stat_year)s AND stat_month=%(stat_month)s AND project_name=%(project_name)s", parameters) > 0: + + sqlString = "select stat_year from rc_gh_archive where project_name LIKE %(project_name)s" + + #if (CrawlerDatabase.select_int(sqlString,parameters) != self.y): + if parameters['description'] == "": #if description is empty I do not overwrite it as it might have been there in other events + CrawlerDatabase.execute("UPDATE rc_gh_archive SET event_count=event_count+1 WHERE stat_year=%(stat_year)s AND stat_month=%(stat_month)s AND project_name=%(project_name)s", True, parameters) + #print "esco dall'if di update rc_gh_archive" + else: + CrawlerDatabase.execute("UPDATE rc_gh_archive SET description=%(description)s, event_count=event_count+1 WHERE stat_year=%(stat_year)s AND stat_month=%(stat_month)s AND project_name=%(project_name)s", True, parameters) + #print "esco else di update rc_gh_archive" + #else: + #Maria February 2021 cancello il progetto associandogli l'anno piu recente + #print "boh forse non si deve fare in questo modo?" + # CrawlerDatabase.execute("DELETE FROM rc_gh_archive where stat_year = %(stat_year)s and project_name = %(project_name)s",False, parameters) + # CrawlerDatabase.execute("INSERT INTO rc_gh_archive (project_name, description, event_count, stat_year, stat_month, idDWBatch) VALUES (%(project_name)s, %(description)s, 1, %(stat_year)s, %(stat_month)s, %(idDWBatch)s) ON DUPLICATE KEY UPDATE description = %(description)s, event_count=1 ", False, parameters) + + else: + try: + #CrawlerDatabase.execute("INSERT INTO rc_gh_archive (project_name, description, event_count, stat_year, stat_month, idDWBatch) VALUES (%(project_name)s, %(description)s, 1, %(stat_year)s, %(stat_month)s, %(idDWBatch)s) ON DUPLICATE KEY UPDATE description = %(description)s, event_count=1 ", False, parameters) + CrawlerDatabase.execute("INSERT INTO rc_gh_archive (project_name, description, event_count, stat_year, stat_month, idDWBatch) VALUES (%(project_name)s, %(description)s, 1, %(stat_year)s, %(stat_month)s, %(idDWBatch)s)", True, parameters) + except: + traceback.print_exc() + # except Exception, ex: + #logger.error("Error insert rc_gh_archive : " + str(ex)) + #Pavia: se l'evento e' di tipo ReleaseEvent, qui parsiamo le informazioni necessarie per popolare la rc_gh_archive_release + if decoded["type"] == "ReleaseEvent": + #Pavia: l'"url" e l'"id" della release ora li troviamo sotto payload->release + payload = decoded["payload"] + release = payload["release"] + id = release["id"] + #developer_name': repo["name"].rsplit("/",1)[0], + #MDG : add to verify that the project with specific version doesn't exist yet into DB + parameters = { + 'project_name': repo["name"], + 'stat_year': self.y, + 'stat_month': self.m, + 'url': str(release["url"]), + 'version': str(release["id"]), + } + logger.info("Found release event for project: " + repo["name"] + ", release id: " + str(release["id"]) + ", release url: " + release["url"]) + + try: + #search if project with specific version yet exist in the DB + sql_query = "SELECT rel.project_name " + sql_query = sql_query + " FROM rc_gh_archive_release rel LEFT JOIN" + sql_query = sql_query + "(select project_name, idDWBatch FROM rc_gh_archive GROUP BY project_name)" + sql_query = sql_query + "as arc on rel.project_name=arc.project_name ORDER BY rel.project_name" + #print ("Search if project with specific version exist into the rc_gh_archive join with rc_gh_archive_release : ", sql_query) + project_name = CrawlerDatabase.select_natural(sql_query) + logger.info("project_name " + str(project_name)) + sqlString = "INSERT INTO rc_gh_archive_release (project_name, releaseUrl, version) VALUES (%(project_name)s, %(url)s, %(version)s)" + #sqlString = " ON DUPLICATE KEY UPDATE version=%(version)s, releaseUrl=%(url)s, developer_name=%(developer_name)s" + #logger.info("INSERT into rc_gh_archive_release is " + str(sqlString)) + #logger.info(str(parameters)) + #if the project with the specific version is not loaded then insert else no action is provided + if (project_name != repo["name"]): + logger.info("INSERT INTO rc_gh_archive") + #CrawlerDatabase.execute("INSERT INTO rc_gh_archive_release (project_name, releaseUrl, version, developer_name) VALUES (%(project_name)s, %(url)s, %(version)s, %(developer_name)s) ON DUPLICATE KEY UPDATE version=%(version)s, releaseUrl=%(url)s, developer_name=%(developer_name)s ", True, parameters) + CrawlerDatabase.execute("INSERT INTO rc_gh_archive_release (project_name, releaseUrl, version) VALUES (%(project_name)s, %(url)s, %(version)s)", True, parameters) + + except: + traceback.print_exc() + #here start the integration of the project information stored into rc_gh_archive and rc_gh_archive_release + logger.info("Now store metadata project") + start = time.time() + try: + logger.info("Starting Metadata Project Integration") + #Load the whole batch; for flossmole gh_projects table is dropped and created every time it is downloaded so we do not filter by idDWBatch + #all fields in gh_projects: datasource_id, description, developer_name, fork_number, forked, homepage, last_modified, open_issues, private, project_name, url, watchers, XML + parameters = { 'how_many': Configuration.github_top_projects_event_count } + sql_query = "SELECT rel.project_name, arc.description, rel.releaseUrl, rel.version,arc.idDWBatch " + sql_query = sql_query + " FROM rc_gh_archive_release rel LEFT JOIN" + sql_query = sql_query + " (select project_name, description,idDWBatch FROM rc_gh_archive GROUP BY project_name)" + sql_query = sql_query + " as arc on rel.project_name=arc.project_name ORDER BY rel.project_name" + + logger.info("prima di cursor select rc_gh_archive_release") + + cursor = CrawlerDatabase.execute_cursor(sql_query,parameters) + #logger.info("cursor esegui cursor sqlquery parameters") + results = cursor.fetchall() + #for each project in the batch, sorted by project_name, there might be more than one row per project if there are more releases + current_project_name = "" + for record in results: + #id_batch = self.batch.id_batch + if record[4] != None: + revision = record[3] + release_url = record[2] + release_url = release_url.replace('/releases/tag/', '/archive/') + release_url = release_url.replace('/releases/', '/archive/') + # add .zip + release_url = release_url + ".zip" + #download_page = release_url + ".zip" + deployment_url = "https://api.github.com/repos/" + record[0] + "/deployments" + #repo url + url = "https://api.github.com/repos/" + record[0] + release = record[3].encode('utf-8') + project = record[0] + #developer = record[0] + + idDWBatch = str(record[4]) + #proviamo a creare il campo language : + #example: https://api.github.com/repos/jgtate/Hackathon2018 + #get repo json information : + ''' + r = requests.get(url) + decoded = json.loads(r.content) + language = decoded["language"] + ''' + ''' + languageUrl = decoded["languages_url"] + rl = requests.get(languageUrl) + tot_languages = encoded(rl.content) + ''' + + + parameters = { + 'project' : project, + 'description' : str(record[1]), + 'url' : url, + 'versionUrl' : release_url, + 'version' : release, + 'deploymentUrl' : deployment_url, + 'idDWBatch' : idDWBatch + + } + + #Beofre to insert the new data in MetadataProject , a comparison with the existing data in MetadataProject has been done. + #The following scenario has been identified + #1) if the new data exist and not updated -> no action + #2) if the new data are in a new version compared to old data : update a old data and the flag updated change from 0 to 1 + #3) if the new data don't exist in MEtadataProject -> insert into table and change the flag update from 0 t0 1 + #sqlQueryString = "SELECT project,developer, description, url,versionUrl,version,deploymentUrl FROM MetadataProject " + + try: + sqlQueryString = "SELECT version FROM MetadataProject " + sqlQueryString = sqlQueryString + " WHERE project= '" + str(project) + sqlQueryString = sqlQueryString + "' AND version= '" + str(release) + sqlQueryString = sqlQueryString + "' AND versionUrl = '" + release_url + sqlQueryString = sqlQueryString + "' AND deploymentUrl='" + str(deployment_url) + "' AND url = '" + str(url) + "'" + #logger.info(sqlQueryString) + release = CrawlerDatabase.select_natural(sqlQueryString) + if release !=None: + logger.info ("if release exist for the project " + str(project)) + logger.info("release esiste -> progetto esiste -> guardo il flag update ") + try: + sqlQueryString = "SELECT updated FROM MetadataProject " + sqlQueryString = sqlQueryString + " WHERE project= '" + str(project) + sqlQueryString = sqlQueryString + "' AND version= '" + str(release) + sqlQueryString = sqlQueryString + "' AND versionUrl = '" + release_url + sqlQueryString = sqlQueryString + "' AND deploymentUrl='" + str(deployment_url) + "' AND url = '" + str(url) + "'" + logger.info(sqlQueryString) + updated = CrawlerDatabase.select_int(sqlQueryString) + logger.info ("updated is " + str(updated)) + ''' + BLOCCO PER IL CONTROLLO METADATAPROJECT spedito o no al KNOWLEDGEBASE + ''' + if (updated == 1): + logger.info("allora esiste il progetto ,se flag a 1 i dati sono stati gia spedit e metto flag a 0 " + str(updated) ) + #se updated uguale a zero ed esiste il progetto aggiorno tutto e update=1 pronto per essere spedito: + #if (updated>0): + sqlUpdate = "UPDATE MetadataProject SET project = '"+ str(project)+ "'," + if (len(record[1])>0): + sqlUpdate = sqlUpdate + " description = '"+ str(record[1]) + "', " + sqlUpdate = sqlUpdate + " url = '" + str(url) + "', versionUrl='" +str(release_url) + "', version='" + str(release) + "', deploymentUrl='"+str(deployment_url)+"', idDWBatch='"+str(idDWBatch)+"'" + sqlUpdate = sqlUpdate + ", updated = 0 WHERE project = '" +str(project) + "'" + logger.info(sqlUpdate) + try: + CrawlerDatabase.execute(sqlUpdate,True) + logger.info("UPDATE SUCCESS ") + except: + traceback.print_exc() + except Exception as ex: + traceback.print_exc() + else: + logger.info("a questo punto inseriamo il nostro project e flag updated a 0 , i dati sono pronti per essere spediti") + logger.info(project + " NOT EXIST. INSERT IT !") + sqlInsertMetadataProject = [] + sqlParamMetadataProject = [] + sqlStringMetadataProject = "" + #se il progetto non esiste allora fevo inserirlo e metto update a 0 + #qui fai insert coi dati nuovi e metti a 1 il flag updated + sqlInsertMetadataProject.insert(0,"INSERT INTO MetadataProject (project,") + sqlParamMetadataProject.insert(0,"VALUES (%(project)s,") + + if (len(record[1])>0): + sqlInsertMetadataProject.extend(" description,") + sqlParamMetadataProject.extend(" %(description)s, ") + + sqlInsertMetadataProject.extend(" url, versionUrl, version, deploymentUrl,idDWBatch,updated) ") + sqlParamMetadataProject.extend(" %(url)s, %(versionUrl)s, %(version)s, %(deploymentUrl)s, %(idDWBatch)s, 0)"); + sqlStringMetadataProject = ''.join (sqlInsertMetadataProject)+''.join (sqlParamMetadataProject) + logger.info("--- SQL STRING ---") + logger.info(sqlStringMetadataProject) + #time.sleep(5) + try: + logger.info("I'M GOING TO INSERT NEW METADATA PROJECT INFORMATION") + CrawlerDatabase.execute(sqlStringMetadataProject, True, parameters) + logger.info("INSERT SUCCESS") + #time.sleep(5) + except: + # printing stack trace + traceback.print_exc() + #self.send_data() + #self.get_response(updated) + # response = session.get('http://127.0.0.1:6543/project?updated='+str(updated)) + #response = request.patch('http://127.0.0.1:6543/projects?updated=0') + #print response.status_code + #response = requests.request(method='post','http://127.0.0.1:6543/configuration/knowdata') + + #if response.status_code == 200: + # data = response.json_body + # idProject = data['idProject'] + # cursor = CrawlerDatabase.execute_cursor("SELECT idProject FROM MetadataProject WHERE updated=0 ") + # results = cursor.fetchall() + # for record in results: + # CrawlerDatabase.execute("UPDATE MetadataProject SET updated=1 WHERE idProject ='"+str(record[0])+"'", True) + # return response.status_code + ''' + listOfProjects = [] + listOfProjects.append(parameters) + ''' + #json_data = TrivialJSONEncoder().encode(listOfProjects) + #print json_data + #self.writeJson(json_data) + + + except Exception as ex: + logger.ino(str(ex)) + + + end = time.time() + print ("time is ", str(end - start)) + #self.send_data() + + except Exception, ex: + logger.error(str(ex)) + # do nothing; sometimes repository is missing + + except Exception, ex: + logger.info(str(ex) + " missing in " + line) + except Exception, ex: + logger.info(str(ex) + " missing in " + line) + \ No newline at end of file diff --git a/web-crawler/jquery_plugin_data_fetcher.py b/web-crawler/jquery_plugin_data_fetcher.py new file mode 100755 index 00000000..845dd2b3 --- /dev/null +++ b/web-crawler/jquery_plugin_data_fetcher.py @@ -0,0 +1,342 @@ +#This Source Code Form is subject to the terms of the Mozilla Public +# License, v. 2.0. If a copy of the MPL was not distributed with this +# file, You can obtain one at http://mozilla.org/MPL/2.0/. +#Copyright 2014 Bitergium SLL + +from generic_data_fetcher import GenericDataFetcher +from CrawlerDatabase import CrawlerDatabase +from Utils import Logger, Configuration, Sources, UrllibHelper +from lxml import html +import urllib2 +#from subprocess import Popen +import sys +from lxml import etree +from _mysql import NULL +from asn1crypto._ffi import null + +class JQueryPluginDataFetcher(GenericDataFetcher): + ''' + Crawls the site http://plugins.jquery.com/ + maria 2020 https://www.npmjs.com/search?q=keywords:jquery-plugin + ''' + #commented by maria : def __init__(self, source): + def __init__(self): + #commented by maria super(JQueryPluginDataFetcher, self).__init__(source) + super(JQueryPluginDataFetcher, self).__init__(Sources.JQueryPlugin) + def run(self): + ''' + There seems not to be a listing of all projects on the site; we use very generic queries to list all + the files: "i" and "e" the query is run against name and description so it is very unlikely to have + a project that lies outside these two queries + ''' + logger = Logger.getInstance() + logger.info("run run ma dove siamo?") + #Maria giugno 2020 self.parseQuery("i") + #Maria giugno 2020 self.parseQuery("e") + #uso questa query https://www.npmjs.com/search?q=keywords:jquery-plugin + self.parseTag("i") + #https://plugins.jquery.com/?s= + #self.parseQuery("jquery") + + def parseTag(self, tag_string): + ''' + ''' + self.parsePage(tag_string) + + + def parsePage(self, tag_string, is_a_tag = True): + ''' + A paginated list of projects. It can be the result of a query, the list of projects with a tag, ... + ''' + logger = Logger.getInstance() + logger.info("Starting JQuery Plugin Project Page") + try: + # I extract the number of pages the results are split into + # and I loop parsing each page + jqplp = JQueryPluginListPage(tag_string, 1, is_a_tag) + for i in range(1, jqplp.numberOfSubPages() + 1): + current_plp = JQueryPluginListPage(jqplp.search_string_or_tag, i, is_a_tag) + if current_plp.project_urls != None : + for project_url in current_plp.project_urls: + #print "project url : " + jqppp = JQueryPluginProjectPage(project_url) + p = jqppp.project() + p.idDWBatch = self.batch.id_batch + p.save() + except TypeError as e: + print e + print sys.exc_type + +class JQueryPluginListPage(): + ''' + ''' + def __init__(self, search_string_or_tag, page_number = 1, is_a_tag = True): + ''' + is_a_query = False t --> http://plugins.jquery.com/tag/ui/page/2/ + is_a_query = True http://plugins.jquery.com/?s=i --> http://plugins.jquery.com/page/2/?s=i + Loads the page and stores the tree for future xpath searches; it stores also the list of urls pointing to project pages + ''' + '''maria 2020 : https://www.npmjs.com/search?q=keywords:jquery-plugin + ''' + self.is_a_tag = is_a_tag + self.search_string_or_tag = search_string_or_tag + self.page_number = page_number + self.url = self.urlOfNthPage(str(self.page_number)) + hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', + 'Accept-Encoding': 'none', + 'Accept-Language': 'en-US,en;q=0.8', + 'Connection': 'keep-alive'} + req = urllib2.Request(self.url, headers=hdr) + html_page = urllib2.urlopen(req) + self.tree = html.fromstring(html_page.read()) + self.project_urls = self.tree.xpath('//h2[@class="entry-title"]/a/@href') + + def numberOfSubPages(self): + + ''' +
4 + + 250 <<<<<<<<<<Next » + ''' + page_numbers = self.tree.xpath('//a[@class="page-numbers"]/text()') + max_n = 1 + for page_number in page_numbers: + max_n = max(max_n, int(page_number)) + return max_n + + def urlOfNthPage(self, i): + + ''' + self.is_a_query = False http://plugins.jquery.com/tag/ui/ --> http://plugins.jquery.com/tag/ui/page/2/ + self.is_a_query = True http://plugins.jquery.com/?s=i --> http://plugins.jquery.com/page/2/?s=i + ''' + if self.is_a_tag: + return "https://plugins.jquery.com/tag/ui/page/" + str(i) + "/" + else: + return "https://jquery.com/" + + + +class JQueryPluginProjectPage(): + ''' + ''' + def __init__(self, url): + ''' + ''' + self.url = "http:" + url + #Maria 2020 questo sembra non funzionare html_page = UrllibHelper.urlopen(self.url) + #Maria 2020 questo sembra non funzionare self.tree = html.fromstring(html_page) + #Maria 2020 : quando estraimo dall'index.html il link al plugin di jquery ui , viene + #memorizzato solo cioche riporta l'href. In questo caso solo + #il path al plugin senza il protocollo http: aggiungo alla string self.url il protocollo http + ''' + ERROR: HTTP Error 403: Forbidden + aggiungo header al link + ''' + hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', + 'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', + 'Accept-Encoding': 'none', + 'Accept-Language': 'en-US,en;q=0.8', + 'Connection': 'keep-alive'} + + req = urllib2.Request(self.url,"",hdr) + html_page = urllib2.urlopen(req) + self.tree = html.fromstring(html_page.read()) + + def project(self): + logger = Logger.getInstance() + logger.info("-----> Starting JQuery Plugin Project Page") + try: + self.tree.xpath('//h2[@class="entry-title"]/a/text()') + jqpp = JQueryPluginProject() + jqpp.title = self.tree.xpath('//h1[@class="entry-title"]/text()')[0] + try: + jqpp.attribution = self.tree.xpath('//p[@class="attribution"]/a/text()')[0] + except: + jqpp.attribution = self.tree.xpath('//p[@class="attribution"]/text()')[0][3:] + # some descriptions have lots of \t and \n which sometimes give trouble when encoded in utf8; lets replace them with blanks + jqpp.description = self.tree.xpath('//div[@class="block description"]/text()')[0].replace("\n"," ").replace("\t"," ") + jqpp.download_link = self.tree.xpath('//a[@class="download"]/@href')[0] + tags = self.tree.xpath('//a[@class="tag icon-tag"]/text()') + jqpp.tags = [] + for tag in tags: + jqpt = JQueryPluginTag(tag) + jqpp.tags.append(jqpt) + #versions = self.tree.xpath('//div[@class="block versions"]/table/tbody/tr') + versions = self.tree.xpath('//div[@class="block versions"]/table/tbody/tr/td[@class="version"]//text()') + release_dates = self.tree.xpath('//div[@class="block versions"]/table/tbody/tr/td[@class="release-date"]//text()') + jqpp.versions = [] + for i in range(0, len(versions)): + jqpv = JQueryPluginVersion(versions[i],release_dates[i]) + jqpp.versions.append(jqpv) + jqpp.watchers = self.tree.xpath('//div[@class="info-block watchers"]/div[@class="number"]/text()')[0] + jqpp.forks = self.tree.xpath('//div[@class="info-block forks"]/div[@class="number"]/text()')[0] + try: + jqpp.author = self.tree.xpath('//aside[@class="widget author-info"]/ul/li/a/text()')[0] + except: + jqpp.author = self.tree.xpath('//aside[@class="widget author-info"]/ul/li/text()')[0] + maintainers = self.tree.xpath('//aside[@class="widget maintainer-info"]/ul/li/a/text()') + jqpp.maintainers = [] + for maintainer in maintainers: + jqpm = JQueryPluginMaintainer(maintainer) + jqpp.maintainers.append(jqpm) + jqpp.licenses = [] + oss_licenses = self.tree.xpath('//aside[@class="widget licenses"]/ul/li/a/text()') + for oss_license in oss_licenses: + jqpl = JQueryPluginLicense(oss_license) + jqpp.licenses.append(jqpl) + except Exception as ex: + print("JQueryPluginProjectPage.project() ") + return jqpp + + +class JQueryPluginProject(): + ''' + ''' + def save(self): + logger = Logger.getInstance() + logger.info("Starting JQuery Plugin Project") + ''' + Save the project and related entities associated to current batch + ''' + ''' + http://plugins.jquery.com doesn't have a directory that I can browse to reach all projects + so I have to use some heuristics to find them all; the implication is that I have to go + through the same project more than once; so I first check whether it's been saved already + in the context of current batch. + ''' + try: + parameters = { + 'idDWBatch': self.idDWBatch, + 'entry_title': self.title, + 'attribution': self.attribution, + 'description': self.description, + 'download_link': self.download_link + } + if CrawlerDatabase.select_natural("SELECT Count(*) FROM RAW_JQ_Project WHERE idDWBatch=%(idDWBatch)s AND entry_title=%(entry_title)s", parameters) == 0: + self.id = CrawlerDatabase.execute("INSERT into RAW_JQ_Project (entry_title, attribution, description, idDWBatch, download_link) VALUES (%(entry_title)s, %(attribution)s, %(description)s, %(idDWBatch)s, %(download_link)s)", True, parameters) + if not self.id is None: + for tag in self.tags: + tag.id_project = self.id + tag.save() + for version in self.versions: + version.id_project = self.id + version.save() + for maintainer in self.maintainers: + maintainer.id_project = self.id + maintainer.save() + for oss_license in self.licenses: + oss_license.id_project = self.id + oss_license.save() + except Exception as ex: + print("JQueryPluginProject.save() " + self.title + " - "+ str(ex)) + +class JQueryPluginTag(): + ''' + Tags used on http://plugins.jquery.com + ''' + def __init__(self, tag): + self.tag = tag + + def save(self): + logger = Logger.getInstance() + logger.info("Starting JQuery Plugin Tag") + ''' + Save a tag on the database; On crawler's database tags are not associated to a batch. + Save the association to its project + ''' + parameters = { + 'name': self.tag, + 'id_project': self.id_project + } + self.id = CrawlerDatabase.select_natural("SELECT idRAW_JQ_Tag FROM RAW_JQ_Tag WHERE name=%(name)s", parameters) + if self.id is None: + self.id = CrawlerDatabase.execute("INSERT into RAW_JQ_Tag (name) VALUES (%(name)s)", True, parameters) + parameters = { + 'id_tag': self.id, + 'id_project': self.id_project + } + CrawlerDatabase.execute("INSERT into RAW_JQ_ProjectTag (idRAW_JQ_Project, idRAW_JQ_Tag) VALUES (%(id_project)s, %(id_tag)s)", True, parameters) + + +class JQueryPluginVersion(): + ''' + A version on the site is identified by its composite number (e.g. 1.0.4) and its date + ''' + def __init__(self, version, date): + self.version = version + self.date = date + + def save(self): + logger = Logger.getInstance() + logger.info("Starting JQuery Plugin Version") + ''' + Save a version on the database; On crawler's database versions are not associated to a batch. + Save the association to its project + ''' + parameters = { + 'version': self.version, + 'date': self.date, + 'id_project': self.id_project + } + self.id = CrawlerDatabase.execute("INSERT into RAW_JQ_Version (idRAW_JQ_Project, Version, date) VALUES (%(id_project)s, %(version)s, %(date)s)", True, parameters) + +class JQueryPluginLicense(): + ''' + A license on the site is identified by its name (e.g. MIT) + ''' + def __init__(self, name): + self.name = name + + def save(self): + logger = Logger.getInstance() + logger.info("Starting JQuery Plugin License") + ''' + Save a license on the database; On crawler's database licenses are not associated to a batch. + Save the association to its project + ''' + parameters = { + 'name': self.name + } + self.id = CrawlerDatabase.select_natural("SELECT idRAW_JQ_License FROM RAW_JQ_License WHERE name=%(name)s", parameters) + if self.id is None: + self.id = CrawlerDatabase.execute("INSERT into RAW_JQ_License (name) VALUES (%(name)s)", True, parameters) + parameters = { + 'id_license': self.id, + 'id_project': self.id_project + } + CrawlerDatabase.execute("INSERT into RAW_JQ_ProjectLicense (idRAW_JQ_Project, idRAW_JQ_License) VALUES (%(id_project)s, %(id_license)s)", True, parameters) + +class JQueryPluginMaintainer(): + ''' + A Maintainer on the site is identified by its name (e.g. Scott Gonzalez) + ''' + def __init__(self, name): + self.name = name + + def save(self): + logger = Logger.getInstance() + logger.info("Starting JQuery Plugin Maintainer") + ''' + Save a Maintainer on the database; On crawler's database Maintainers are not associated to a batch. + Save the association to its project + ''' + parameters = { + 'name': self.name + } + self.id = CrawlerDatabase.select_natural("SELECT idRAW_JQ_Maintainer FROM RAW_JQ_Maintainer WHERE name=%(name)s", parameters) + if self.id is None: + self.id = CrawlerDatabase.execute("INSERT into RAW_JQ_Maintainer (name) VALUES (%(name)s)", True, parameters) + parameters = { + 'id_maintainer': self.id, + 'id_project': self.id_project + } + CrawlerDatabase.execute("INSERT into RAW_JQ_ProjectMaintainer (idRAW_JQ_Project, idRAW_JQ_Maintainer) VALUES (%(id_project)s, %(id_maintainer)s)", True, parameters) + + +# jqpdf = JQueryPluginDataFetcher(Sources.JQueryPlugin) +# jqpdf.run() diff --git a/web-crawler/lib/README.txt b/web-crawler/lib/README.txt new file mode 100755 index 00000000..36d0d6ec --- /dev/null +++ b/web-crawler/lib/README.txt @@ -0,0 +1,2 @@ +Please put in this folder all the required libraries for the correct operation of the Crawler. +Add also a table to declare which is the license used by each used library. \ No newline at end of file diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/.pylintrc b/web-crawler/lib/doapfiend/doapfiend-0.3.3/.pylintrc new file mode 100755 index 00000000..de89af7c --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/.pylintrc @@ -0,0 +1,309 @@ +# lint Python modules using external checkers. +# +# This is the main checker controling the other ones and the reports +# generation. It is itself both a raw checker and an astng checker in order +# to: +# * handle message activation / deactivation at the module level +# * handle some basic but necessary stats'data (number of classes, methods...) +# +[MASTER] + +# Specify a configuration file. +#rcfile= + +# Python code to execute, usually for sys.path manipulation such as +# pygtk.require(). +#init-hook= + +# Profiled execution. +profile=no + +# Add to the black list. It should be a base name, not a +# path. You may set this option multiple times. +ignore=CVS + +# Pickle collected data for later comparisons. +persistent=yes + +# Set the cache size for astng objects. +cache-size=500 + +# List of plugins (as comma separated values of python modules names) to load, +# usually to register additional checkers. +load-plugins= + + +[MESSAGES CONTROL] + +# Enable only checker(s) with the given id(s). This option conflicts with the +# disable-checker option +#enable-checker= + +# Enable all checker(s) except those with the given id(s). This option +# conflicts with the enable-checker option +#disable-checker= + +# Enable all messages in the listed categories. +#enable-msg-cat= + +# Disable all messages in the listed categories. +#disable-msg-cat= + +# Enable the message(s) with the given id(s). +#enable-msg= + +# Disable the message(s) with the given id(s). +#disable-msg=R0801 + + +[REPORTS] + +# set the output format. Available formats are text, parseable, colorized, msvs +# (visual studio) and html +output-format=text + +# Include message's id in output +include-ids=yes + +# Put messages in a separate file for each module / package specified on the +# command line instead of printing them on stdout. Reports (if any) will be +# written in a file name "pylint_global.[txt|html]". +files-output=no + +# Tells wether to display a full report or only the messages +reports=yes + +# Python expression which should return a note less than 10 (10 is the highest +# note).You have access to the variables errors warning, statement which +# respectivly contain the number of errors / warnings messages and the total +# number of statements analyzed. This is used by the global evaluation report +# (R0004). +evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10) + +# Add a comment according to your evaluation note. This is used by the global +# evaluation report (R0004). +comment=no + +# Enable the report(s) with the given id(s). +#enable-report= + +# Disable the report(s) with the given id(s). +#disable-report= + + +# checks for +# * unused variables / imports +# * undefined variables +# * redefinition of variable from builtins or from an outer scope +# * use of variable before assigment +# +[VARIABLES] + +# Tells wether we should check for unused import in __init__ files. +init-import=no + +# A regular expression matching names used for dummy variables (i.e. not used). +dummy-variables-rgx=_|dummy + +# List of additional names supposed to be defined in builtins. Remember that +# you should avoid to define new builtins when possible. +additional-builtins= + + +# checks for : +# * doc strings +# * modules / classes / functions / methods / arguments / variables name +# * number of arguments, local variables, branchs, returns and statements in +# functions, methods +# * required module attributes +# * dangerous default values as arguments +# * redefinition of function / method / class +# * uses of the global statement +# +[BASIC] + +# Required attributes for module, separated by a comma +required-attributes= + +# Regular expression which should only match functions or classes name which do +# not require a docstring +no-docstring-rgx=__.*__ + +# Regular expression which should only match correct module names +module-rgx=(([a-z_][a-z0-9_]*)|([A-Z][a-zA-Z0-9]+))$ + +# Regular expression which should only match correct module level names +const-rgx=(([A-Z_][A-Z1-9_]*)|(__.*__))$ + +# Regular expression which should only match correct class names +class-rgx=[A-Z_][a-zA-Z0-9]+$ + +# Regular expression which should only match correct function names +function-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct method names +method-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct instance attribute names +attr-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct argument names +argument-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct variable names +variable-rgx=[a-z_][a-z0-9_]{2,30}$ + +# Regular expression which should only match correct list comprehension / +# generator expression variable names +inlinevar-rgx=[A-Za-z_][A-Za-z0-9_]*$ + +# Good variable names which should always be accepted, separated by a comma +good-names=i,j,k,ex,Run,_ + +# Bad variable names which should always be refused, separated by a comma +bad-names=foo,bar,baz,toto,tutu,tata + +# List of builtins function names that should not be used, separated by a comma +bad-functions=map,filter,apply,input + + +# try to find bugs in the code using type inference +# +[TYPECHECK] + +# Tells wether missing members accessed in mixin class should be ignored. A +# mixin class is detected if its name ends with "mixin" (case insensitive). +ignore-mixin-members=yes + +# List of classes names for which member attributes should not be checked +# (useful for classes with attributes dynamicaly set). +ignored-classes=SQLObject + +# When zope mode is activated, consider the acquired-members option to ignore +# access to some undefined attributes. +zope=no + +# List of members which are usually get through zope's acquisition mecanism and +# so shouldn't trigger E0201 when accessed (need zope=yes to be considered). +acquired-members=REQUEST,acl_users,aq_parent + + +# checks for +# * external modules dependencies +# * relative / wildcard imports +# * cyclic imports +# * uses of deprecated modules +# +[IMPORTS] + +# Deprecated modules which should not be used, separated by a comma +deprecated-modules=regsub,string,TERMIOS,Bastion,rexec + +# Create a graph of every (i.e. internal and external) dependencies in the +# given file (report R0402 must not be disabled) +import-graph= + +# Create a graph of external dependencies in the given file (report R0402 must +# not be disabled) +ext-import-graph= + +# Create a graph of internal dependencies in the given file (report R0402 must +# not be disabled) +int-import-graph= + + +# checks for sign of poor/misdesign: +# * number of methods, attributes, local variables... +# * size, complexity of functions, methods +# +[DESIGN] + +# Maximum number of arguments for function / method +max-args=5 + +# Maximum number of locals for function / method body +max-locals=15 + +# Maximum number of return / yield for function / method body +max-returns=6 + +# Maximum number of branch for function / method body +max-branchs=12 + +# Maximum number of statements in function / method body +max-statements=50 + +# Maximum number of parents for a class (see R0901). +max-parents=7 + +# Maximum number of attributes for a class (see R0902). +max-attributes=7 + +# Minimum number of public methods for a class (see R0903). +min-public-methods=2 + +# Maximum number of public methods for a class (see R0904). +max-public-methods=20 + + +# checks for : +# * methods without self as first argument +# * overridden methods signature +# * access only to existant members via self +# * attributes not defined in the __init__ method +# * supported interfaces implementation +# * unreachable code +# +[CLASSES] + +# List of interface methods to ignore, separated by a comma. This is used for +# instance to not check methods defines in Zope's Interface base class. +ignore-iface-methods=isImplementedBy,deferred,extends,names,namesAndDescriptions,queryDescriptionFor,getBases,getDescriptionFor,getDoc,getName,getTaggedValue,getTaggedValueTags,isEqualOrExtendedBy,setTaggedValue,isImplementedByInstancesOf,adaptWith,is_implemented_by + +# List of method names used to declare (i.e. assign) instance attributes. +defining-attr-methods=__init__,__new__,setUp + + +# checks for: +# * warning notes in the code like FIXME, XXX +# * PEP 263: source code with non ascii character but no encoding declaration +# +[MISCELLANEOUS] + +# List of note tags to take in consideration, separated by a comma. +notes=FIXME,XXX,TODO + + +# checks for similarities and duplicated code. This computation may be +# memory / CPU intensive, so you should disable it if you experiments some +# problems. +# +[SIMILARITIES] + +# Minimum lines number of a similarity. +min-similarity-lines=24 + +# Ignore comments when computing similarities. +ignore-comments=yes + +# Ignore docstrings when computing similarities. +ignore-docstrings=yes + + +# checks for : +# * unauthorized constructions +# * strict indentation +# * line length +# * use of <> instead of != +# +[FORMAT] + +# Maximum number of characters on a single line. +max-line-length=80 + +# Maximum number of lines in a module +max-module-lines=1000 + +# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1 +# tab). +indent-string=' ' diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/AUTHORS b/web-crawler/lib/doapfiend/doapfiend-0.3.3/AUTHORS new file mode 100755 index 00000000..e70ac07a --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/AUTHORS @@ -0,0 +1,8 @@ + +Rob Cakebread - Lead developer + + +The plugin system is based on nose's plugin system. nose is copyright Jason Pellerin jpellerin+nose at gmail dot com. +http://www.somethingaboutorange.com/mrl/projects/nose/ + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/FAQ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/FAQ new file mode 100755 index 00000000..df9b6c26 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/FAQ @@ -0,0 +1,9 @@ + +doapfiend FAQ +============= + +Q: Why do you have functions named 'get_by_pkg_index' and 'query_by_homepage'? + +A: The get_* functions actually fetch a DOAP file. The query_* functions fetch a list of URLs or other information about DOAP. + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/INSTALL b/web-crawler/lib/doapfiend/doapfiend-0.3.3/INSTALL new file mode 100755 index 00000000..2b2612d6 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/INSTALL @@ -0,0 +1,48 @@ +Installation +============ + +Easy Install +------------ + +:: + + $ easy_install doapfiend + +For the development version (uses subversion trunk): + +:: + + $ easy_install doapfiend==dev + + +Distutils +--------- + +In the unpacked archive directory: +$ python setup.py install + + +Gentoo Linux +------------ + +:: + + $ emerge dev-python/doapfiend + + +The ebuild can be found in the 'pythonhead' official Gentoo overlay, use: + +:: + + $ layman --add pythonhead + + +Subversion +---------- + +You can checkout the trunk from our `subversion repository `_ + +:: + + svn co https://svn.doapspace.org/doapfiend/trunk doapfiend + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/LICENSE b/web-crawler/lib/doapfiend/doapfiend-0.3.3/LICENSE new file mode 100755 index 00000000..0c81b8b3 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/LICENSE @@ -0,0 +1,14 @@ +Copyright (c) 2007,2008 Rob Cakebread + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, +publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, +subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE +FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/MANIFEST.in b/web-crawler/lib/doapfiend/doapfiend-0.3.3/MANIFEST.in new file mode 100755 index 00000000..b68d9879 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/MANIFEST.in @@ -0,0 +1,5 @@ +include examples/* +include tests/* +include tests/data/* +include docs/* +include docs/api/* diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/NEWS b/web-crawler/lib/doapfiend/doapfiend-0.3.3/NEWS new file mode 100755 index 00000000..a5422104 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/NEWS @@ -0,0 +1,7 @@ +News +==== + +Apr 22, 2008 +-------------- + + * First release - 0.1.0 diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/PKG-INFO b/web-crawler/lib/doapfiend/doapfiend-0.3.3/PKG-INFO new file mode 100755 index 00000000..b0038ce8 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/PKG-INFO @@ -0,0 +1,121 @@ +Metadata-Version: 1.0 +Name: doapfiend +Version: 0.3.3 +Summary: Command-line tool and library for DOAP (Description of a Project) RDF. +Home-page: http://trac.doapspace.org/doapfiend +Author: Rob Cakebread +Author-email: +License: BSD-2 +Download-URL: http://doapspace.org/static/doapfiend/dist/doapfiend-0.3.3.tar.gz +Description: doapfiend + ========= + + Doapfiend is a command-line client and library for fetching, displaying, creating, and manipulating `DOAP `_ (Description of a Project) files. + + Doapfiend uses RDFAlchemy/rdflib as an ORM to query and create DOAP. + + See `docs/api `_ for Epydoc generated API documentation. + + Features + -------- + + * Display metadata in DOAP in a human-readable format + * Get DOAP by a !SourceForge, Freshmeat, PyPI, Ohloh etc. project name + * Search all DOAP spidered on the web (`pingthesemanticweb.com `_) + * Access the metadata in the DOAP file using Python dot notation + * Fetch DOAP by using the short name of a DOAP PURL `doapurl.org `_ + * Fetch and display DOAP mentioned in FOAF by project homepage + * Written in Python (Yes, that's a feature.) + * Extensible with plugins using setuptools + + Optional Plugins available + -------------------------- + + * Generate a web page with CSS from a DOAP file (using `hdoap `_) + * Search for DOAP using a Gentoo Linux pacakge name (Thanks `GLEP 46 `_) + + Plugins in development + ---------------------- + + * Search for DOAP by names you know, such as your distribution's package names + * Create a Gentoo ebuild from a DOAP file + + Installaion + ----------- + + You can InstallDoapfiend using distutils, easy_install or subversion. There is also a Gentoo Linux ebuild available. + + *Example command-line usage* + + Fetch DOAP for SourceForge project named 'nut': + :: + + $ doapfiend --sf nut + + + You have a project's homepage, display DOAP if any has been spidered: + :: + + $ doapfiend -o http://librdf.org/raptor/ + + + You have the URL or local path to a DOAP file: + :: + + $ doapfiend -d http://librdf.org/raptor/raptor.rdf + + + Display DOAP as RDF/XML for the SourceForge project 'nut': + :: + + $ doapfiend -x --sf nut + + + Display DOAP as N3 (Notation Three) for the SourceForge project 'nut': + :: + + $ doapfiend -n --sf nut + + *Example library usage* + + Use a Freshmeat project name to fetch and display DOAP: + :: + + >>> from doapfiend.doaplib import get_by_pkg_index + + >>> print get_by_pkg_index('fm', 'nut') + + Get some DOAP and access its metadata: + :: + + >>> from doapfiend.doaplib import get_by_pkg_index, load_graph + >>> doap = load_graph(get_by_pkg_index('fm', 'nut')) + >>> print doap.name + 'nut' + >>> print doap.created + '2008-04-19' + + + Use a project's homepage to fetch DOAP and print the RDF/XML. + Note there may be more than one DOAP profile out there; we just use the first found in our example. + :: + + >>> from doapfiend.doaplib import query_by_homepage, print_doap + >>> url = query_by_homepage('http://news.tiker.net/software/tagpy')[0][1] + >>> print fetch_doap(url) + + + Same as above but display DOAP in human readable text: + :: + + >>> print_doap(fetch_doap(url)) + + +Keywords: doap rdf semantic web +Platform: UNKNOWN +Classifier: Development Status :: 2 - Pre-Alpha +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: End Users/Desktop +Classifier: License :: OSI Approved :: BSD License +Classifier: Programming Language :: Python +Classifier: Topic :: Software Development :: Libraries :: Python Modules diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/README b/web-crawler/lib/doapfiend/doapfiend-0.3.3/README new file mode 100755 index 00000000..5d19c651 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/README @@ -0,0 +1,103 @@ +doapfiend +========= + +Doapfiend is a command-line client and library for fetching, displaying, creating, and manipulating `DOAP `_ (Description of a Project) files. + +Doapfiend uses RDFAlchemy/rdflib as an ORM to query and create DOAP. + +See `docs/api `_ for Epydoc generated API documentation. + +Features +-------- + + * Display metadata in DOAP in a human-readable format + * Get DOAP by a !SourceForge, Freshmeat, PyPI, Ohloh etc. project name + * Search all DOAP spidered on the web (`pingthesemanticweb.com `_) + * Access the metadata in the DOAP file using Python dot notation + * Fetch DOAP by using the short name of a DOAP PURL `doapurl.org `_ + * Fetch and display DOAP mentioned in FOAF by project homepage + * Written in Python (Yes, that's a feature.) + * Extensible with plugins using setuptools + +Optional Plugins available +-------------------------- + + * Generate a web page with CSS from a DOAP file (using `hdoap `_) + * Search for DOAP using a Gentoo Linux pacakge name (Thanks `GLEP 46 `_) + +Plugins in development +---------------------- + + * Search for DOAP by names you know, such as your distribution's package names + * Create a Gentoo ebuild from a DOAP file + +Installaion +----------- + +You can InstallDoapfiend using distutils, easy_install or subversion. There is also a Gentoo Linux ebuild available. + +*Example command-line usage* + +Fetch DOAP for SourceForge project named 'nut': +:: + + $ doapfiend --sf nut + + +You have a project's homepage, display DOAP if any has been spidered: +:: + + $ doapfiend -o http://librdf.org/raptor/ + + +You have the URL or local path to a DOAP file: +:: + + $ doapfiend -d http://librdf.org/raptor/raptor.rdf + + +Display DOAP as RDF/XML for the SourceForge project 'nut': +:: + + $ doapfiend -x --sf nut + + +Display DOAP as N3 (Notation Three) for the SourceForge project 'nut': +:: + + $ doapfiend -n --sf nut + +*Example library usage* + +Use a Freshmeat project name to fetch and display DOAP: +:: + + >>> from doapfiend.doaplib import get_by_pkg_index + + >>> print get_by_pkg_index('fm', 'nut') + +Get some DOAP and access its metadata: +:: + + >>> from doapfiend.doaplib import get_by_pkg_index, load_graph + >>> doap = load_graph(get_by_pkg_index('fm', 'nut')) + >>> print doap.name + 'nut' + >>> print doap.created + '2008-04-19' + + +Use a project's homepage to fetch DOAP and print the RDF/XML. +Note there may be more than one DOAP profile out there; we just use the first found in our example. +:: + + >>> from doapfiend.doaplib import query_by_homepage, print_doap + >>> url = query_by_homepage('http://news.tiker.net/software/tagpy')[0][1] + >>> print fetch_doap(url) + + +Same as above but display DOAP in human readable text: +:: + + >>> print_doap(fetch_doap(url)) + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/THANKS b/web-crawler/lib/doapfiend/doapfiend-0.3.3/THANKS new file mode 100755 index 00000000..5f7e77cf --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/THANKS @@ -0,0 +1,2 @@ + +Thanks to Edd Dumbill for DOAP. diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/TODO b/web-crawler/lib/doapfiend/doapfiend-0.3.3/TODO new file mode 100755 index 00000000..082e3d52 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/TODO @@ -0,0 +1,6 @@ +TODO +==== + +Please see doapfiend's project tracker: + +http://trac.doapspace.org/doapfiend/report/1 diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build.vel b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build.vel new file mode 100755 index 00000000..871270ce --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build.vel @@ -0,0 +1,40 @@ + +options (default "lint") + +imports [] + +depends ( + lint ['status'] +) + +targets( + + test $ ~/bin/noseed + + list-tests $ nosetests --list-test-names + + lint [ + $ pylint --rcfile=.pylintrc -r n doapfiend + ] + + status [ + $ svn st + $ pyflakes doapfiend/*py doapfiend/plugins/*py examples/*py + py 'from doapfiend.__init__ import __version__;print "doapfiend version " + __version__' + ] + test_dist [ + $ epydoc -o docs/api/ -v doapfiend --html --docformat=epytext --name=DoapFiend --url=http://trac.doapspace.org/doapfiend + $ sudo rm -f dist/* + $ sudo python setup.py develop --uninstall + $ sudo python setup.py sdist bdist_egg + $ scp dist/* cakebread@doapspace.org:webapps/static/doapfiend/dist/ + ] + dist [ + $ sudo rm -f dist/* + $ sudo python setup.py develop --uninstall + $ sudo python setup.py register sdist bdist_egg upload + $ scp dist/* cakebread@doapspace.org:webapps/static/doapfiend/dist/ + ] +) + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/PKG-INFO b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/PKG-INFO new file mode 100755 index 00000000..1a88d8e1 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/PKG-INFO @@ -0,0 +1,122 @@ +Metadata-Version: 1.2 +Name: doapfiend +Version: 0.3.3 +Summary: Command-line tool and library for DOAP (Description of a Project) RDF. +Home-page: http://trac.doapspace.org/doapfiend +Author: Rob Cakebread +Author-email: +Maintainer: Rob Cakebread +License: BSD-2 +Download-URL: http://doapspace.org/static/doapfiend/dist/doapfiend-0.3.3.tar.gz +Description: doapfiend + ========= + + Doapfiend is a command-line client and library for fetching, displaying, creating, and manipulating `DOAP `_ (Description of a Project) files. + + Doapfiend uses RDFAlchemy/rdflib as an ORM to query and create DOAP. + + See `docs/api `_ for Epydoc generated API documentation. + + Features + -------- + + * Display metadata in DOAP in a human-readable format + * Get DOAP by a !SourceForge, Freshmeat, PyPI, Ohloh etc. project name + * Search all DOAP spidered on the web (`pingthesemanticweb.com `_) + * Access the metadata in the DOAP file using Python dot notation + * Fetch DOAP by using the short name of a DOAP PURL `doapurl.org `_ + * Fetch and display DOAP mentioned in FOAF by project homepage + * Written in Python (Yes, that's a feature.) + * Extensible with plugins using setuptools + + Optional Plugins available + -------------------------- + + * Generate a web page with CSS from a DOAP file (using `hdoap `_) + * Search for DOAP using a Gentoo Linux pacakge name (Thanks `GLEP 46 `_) + + Plugins in development + ---------------------- + + * Search for DOAP by names you know, such as your distribution's package names + * Create a Gentoo ebuild from a DOAP file + + Installaion + ----------- + + You can InstallDoapfiend using distutils, easy_install or subversion. There is also a Gentoo Linux ebuild available. + + *Example command-line usage* + + Fetch DOAP for SourceForge project named 'nut': + :: + + $ doapfiend --sf nut + + + You have a project's homepage, display DOAP if any has been spidered: + :: + + $ doapfiend -o http://librdf.org/raptor/ + + + You have the URL or local path to a DOAP file: + :: + + $ doapfiend -d http://librdf.org/raptor/raptor.rdf + + + Display DOAP as RDF/XML for the SourceForge project 'nut': + :: + + $ doapfiend -x --sf nut + + + Display DOAP as N3 (Notation Three) for the SourceForge project 'nut': + :: + + $ doapfiend -n --sf nut + + *Example library usage* + + Use a Freshmeat project name to fetch and display DOAP: + :: + + >>> from doapfiend.doaplib import get_by_pkg_index + + >>> print get_by_pkg_index('fm', 'nut') + + Get some DOAP and access its metadata: + :: + + >>> from doapfiend.doaplib import get_by_pkg_index, load_graph + >>> doap = load_graph(get_by_pkg_index('fm', 'nut')) + >>> print doap.name + 'nut' + >>> print doap.created + '2008-04-19' + + + Use a project's homepage to fetch DOAP and print the RDF/XML. + Note there may be more than one DOAP profile out there; we just use the first found in our example. + :: + + >>> from doapfiend.doaplib import query_by_homepage, print_doap + >>> url = query_by_homepage('http://news.tiker.net/software/tagpy')[0][1] + >>> print fetch_doap(url) + + + Same as above but display DOAP in human readable text: + :: + + >>> print_doap(fetch_doap(url)) + + +Keywords: doap rdf semantic web +Platform: UNKNOWN +Classifier: Development Status :: 2 - Pre-Alpha +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: End Users/Desktop +Classifier: License :: OSI Approved :: BSD License +Classifier: Programming Language :: Python +Classifier: Topic :: Software Development :: Libraries :: Python Modules diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/SOURCES.txt b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/SOURCES.txt new file mode 100755 index 00000000..aa733053 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/SOURCES.txt @@ -0,0 +1,166 @@ +.pylintrc +AUTHORS +FAQ +INSTALL +LICENSE +MANIFEST.in +NEWS +README +THANKS +TODO +build.vel +make_api_docs.sh +setup.cfg +setup.py +doapfiend/__init__.py +doapfiend/cli.py +doapfiend/doap2html.xsl +doapfiend/doaplib.py +doapfiend/lexers.py +doapfiend/model.py +doapfiend/utils.py +doapfiend.egg-info/PKG-INFO +doapfiend.egg-info/SOURCES.txt +doapfiend.egg-info/dependency_links.txt +doapfiend.egg-info/entry_points.txt +doapfiend.egg-info/not-zip-safe +doapfiend.egg-info/requires.txt +doapfiend.egg-info/top_level.txt +doapfiend/plugins/__init__.py +doapfiend/plugins/base.py +doapfiend/plugins/fields.py +doapfiend/plugins/freshmeat.py +doapfiend/plugins/homepage.py +doapfiend/plugins/n3.py +doapfiend/plugins/ohloh.py +doapfiend/plugins/pkg_index.py +doapfiend/plugins/pypi.py +doapfiend/plugins/sourceforge.py +doapfiend/plugins/text.py +doapfiend/plugins/url.py +doapfiend/plugins/xml.py +docs/usage.txt +docs/api/api-objects.txt +docs/api/class-tree.html +docs/api/crarr.png +docs/api/doapfiend-module.html +docs/api/doapfiend-pysrc.html +docs/api/doapfiend.cli-module.html +docs/api/doapfiend.cli-pysrc.html +docs/api/doapfiend.cli.DoapFiend-class.html +docs/api/doapfiend.doaplib-module.html +docs/api/doapfiend.doaplib-pysrc.html +docs/api/doapfiend.lexers-module.html +docs/api/doapfiend.lexers-pysrc.html +docs/api/doapfiend.lexers.Notation3Lexer-class.html +docs/api/doapfiend.lexers.SparqlLexer-class.html +docs/api/doapfiend.model-module.html +docs/api/doapfiend.model-pysrc.html +docs/api/doapfiend.model.CVSRepository-class.html +docs/api/doapfiend.model.Project-class.html +docs/api/doapfiend.model.Release-class.html +docs/api/doapfiend.model.SVNRepository-class.html +docs/api/doapfiend.plugins-module.html +docs/api/doapfiend.plugins-pysrc.html +docs/api/doapfiend.plugins.base-module.html +docs/api/doapfiend.plugins.base-pysrc.html +docs/api/doapfiend.plugins.base.Plugin-class.html +docs/api/doapfiend.plugins.fields-module.html +docs/api/doapfiend.plugins.fields-pysrc.html +docs/api/doapfiend.plugins.fields.OutputPlugin-class.html +docs/api/doapfiend.plugins.freshmeat-module.html +docs/api/doapfiend.plugins.freshmeat-pysrc.html +docs/api/doapfiend.plugins.freshmeat.FreshmeatPlugin-class.html +docs/api/doapfiend.plugins.homepage-module.html +docs/api/doapfiend.plugins.homepage-pysrc.html +docs/api/doapfiend.plugins.homepage.OutputPlugin-class.html +docs/api/doapfiend.plugins.n3-module.html +docs/api/doapfiend.plugins.n3-pysrc.html +docs/api/doapfiend.plugins.n3.OutputPlugin-class.html +docs/api/doapfiend.plugins.ohloh-module.html +docs/api/doapfiend.plugins.ohloh-pysrc.html +docs/api/doapfiend.plugins.ohloh.OhlohPlugin-class.html +docs/api/doapfiend.plugins.pkg_index-module.html +docs/api/doapfiend.plugins.pkg_index-pysrc.html +docs/api/doapfiend.plugins.pypi-module.html +docs/api/doapfiend.plugins.pypi-pysrc.html +docs/api/doapfiend.plugins.pypi.PyPIPlugin-class.html +docs/api/doapfiend.plugins.sourceforge-module.html +docs/api/doapfiend.plugins.sourceforge-pysrc.html +docs/api/doapfiend.plugins.sourceforge.SourceForgePlugin-class.html +docs/api/doapfiend.plugins.text-module.html +docs/api/doapfiend.plugins.text-pysrc.html +docs/api/doapfiend.plugins.text.DoapPrinter-class.html +docs/api/doapfiend.plugins.text.OutputPlugin-class.html +docs/api/doapfiend.plugins.url-module.html +docs/api/doapfiend.plugins.url-pysrc.html +docs/api/doapfiend.plugins.url.UrlPlugin-class.html +docs/api/doapfiend.plugins.xml-module.html +docs/api/doapfiend.plugins.xml-pysrc.html +docs/api/doapfiend.plugins.xml.OutputPlugin-class.html +docs/api/doapfiend.utils-module.html +docs/api/doapfiend.utils-pysrc.html +docs/api/doapfiend.utils.DoapPrinter-class.html +docs/api/doapfiend.utils.NotFoundError-class.html +docs/api/epydoc.css +docs/api/epydoc.js +docs/api/frames.html +docs/api/help.html +docs/api/identifier-index.html +docs/api/index.html +docs/api/module-tree.html +docs/api/pygments.lexer.RegexLexerMeta-class.html +docs/api/redirect.html +docs/api/toc-doapfiend-module.html +docs/api/toc-doapfiend.cli-module.html +docs/api/toc-doapfiend.doaplib-module.html +docs/api/toc-doapfiend.lexers-module.html +docs/api/toc-doapfiend.model-module.html +docs/api/toc-doapfiend.plugins-module.html +docs/api/toc-doapfiend.plugins.base-module.html +docs/api/toc-doapfiend.plugins.fields-module.html +docs/api/toc-doapfiend.plugins.freshmeat-module.html +docs/api/toc-doapfiend.plugins.homepage-module.html +docs/api/toc-doapfiend.plugins.n3-module.html +docs/api/toc-doapfiend.plugins.ohloh-module.html +docs/api/toc-doapfiend.plugins.pkg_index-module.html +docs/api/toc-doapfiend.plugins.pypi-module.html +docs/api/toc-doapfiend.plugins.sourceforge-module.html +docs/api/toc-doapfiend.plugins.text-module.html +docs/api/toc-doapfiend.plugins.url-module.html +docs/api/toc-doapfiend.plugins.xml-module.html +docs/api/toc-doapfiend.utils-module.html +docs/api/toc-everything.html +docs/api/toc.html +examples/details.py +examples/doap-doap.rdf +examples/doapfiend.rdf +examples/doapfile.py +examples/dotclear-sioc.rdf +examples/edd-dumbill-foaf-2.rdf +examples/edd-dumbill-foaf.rdf +examples/foaf-no-doap.rdf +examples/foaf.rdf +examples/gnome-bluetooth-doap.rdf +examples/homepage.py +examples/moap.doap +examples/redland-doap.rdf +examples/sourceforge.py +images/doapfiend1.png +images/get.xcf +images/getdoapfiend.jpg +images/getdoapfiend.png +images/getdoapfiend.xcf +tests/.gitignore +tests/functional.sh +tests/test_cli.py +tests/test_cli.pyc +tests/test_doaplib.py +tests/test_doaplib.pyc +tests/test_n3.py +tests/test_n3.pyc +tests/test_utils.py +tests/test_utils.pyc +tests/test_xml.py +tests/test_xml.pyc +tests/data/doapfiend.rdf \ No newline at end of file diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/dependency_links.txt b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/dependency_links.txt new file mode 100755 index 00000000..8b137891 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/entry_points.txt b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/entry_points.txt new file mode 100755 index 00000000..73fb0f86 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/entry_points.txt @@ -0,0 +1,3 @@ +[console_scripts] +doapfiend = doapfiend.cli:main + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/not-zip-safe b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/not-zip-safe new file mode 100755 index 00000000..8b137891 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/not-zip-safe @@ -0,0 +1 @@ + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/requires.txt b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/requires.txt new file mode 100755 index 00000000..7ca74c07 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/requires.txt @@ -0,0 +1,2 @@ +setuptools +RDFAlchemy diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/top_level.txt b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/top_level.txt new file mode 100755 index 00000000..58293030 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend.egg-info/top_level.txt @@ -0,0 +1 @@ +doapfiend diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/__init__.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/__init__.py new file mode 100755 index 00000000..2ee7493f --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/__init__.py @@ -0,0 +1,36 @@ + +#pylint: disable-msg=C0103 +""" +doapfiend +========= + +U{http://trac.doapspace.org/doapfiend} + +Description +----------- +doapfiend is a command-line client and library for querying, creating and +displaying DOAP (Description of a Project) RDF profiles. + +doapfiend uses RDFAlchemy and rdflib to parse and serialize DOAP. + +Plugins +------- +Plugins can be written for editing DOAP, scraping websites and creating DOAP, +searching for DOAP in SPARQL endpoints, displaying DOAP in various formats such +as HTML etc. + + +""" + + +#Hack to get around warning in RDFAlchemy, bug filed upstream +import logging +log = logging.getLogger() +log.setLevel(logging.ERROR) + +__docformat__ = 'epytext' +__version__ = '0.3.3' +__author__ = 'Rob Cakebread ' +__copyright__ = '(C) 2007-2008 Rob Cakebread' +__license__ = 'BSD-2' + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/cli.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/cli.py new file mode 100755 index 00000000..1488d6fe --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/cli.py @@ -0,0 +1,242 @@ + +# pylint: disable-msg=C0103 +''' + +cli.py +====== + +Command-line tool for querying, serializing and displaying DOAP + +Author: Rob Cakebread + +License : BSD-2 + +''' + +__docformat__ = 'epytext' +__revision__ = '$Revision: $'[11:-1].strip() + + +import sys +import logging +import optparse + +from doapfiend.plugins import load_plugins +from doapfiend.utils import COLOR +from doapfiend.__init__ import __version__ as VERSION +from doapfiend.doaplib import print_doap, follow_homepages, show_links + + +class DoapFiend(object): + + '''`DoapFiend` class''' + + def __init__(self): + '''Initialize attributes, set logger''' + self.doap = None + self.options = None + self.log = logging.getLogger('doapfiend') + self.log.addHandler(logging.StreamHandler()) + #Cache list of all plugins + self.plugins = list(load_plugins(others=True)) + self.serializer = None + + def get_plugin(self, method): + """ + Return plugin object if CLI option is activated and method exists + + @param method: name of plugin's method we're calling + @type method: string + + @returns: list of plugins with `method` + + """ + all_plugins = [] + for plugin_obj in self.plugins: + plugin = plugin_obj() + plugin.configure(self.options, None) + if plugin.enabled: + if not hasattr(plugin, method): + plugin = None + else: + all_plugins.append(plugin) + return all_plugins + + def set_log_level(self): + '''Set log level according to command-line options''' + if self.options.verbose: + self.log.setLevel(logging.INFO) + elif self.options.quiet: + self.log.setLevel(logging.ERROR) + elif self.options.debug: + self.log.setLevel(logging.DEBUG) + else: + self.log.setLevel(logging.WARN) + + def print_doap(self, doap_xml): + ''' + Print doap as n3, rdf/xml, plain text or using serialization plugin + + @param doap_xml: DOAP in RDF/XML serialization + @type doap_xml: text + + @rtype: None + @return: Just displays DOAP + + ''' + if self.options.write: + filename = self.options.write + else: + filename = None + print_doap(doap_xml, serializer=self.serializer, filename=filename, + color=not self.options.no_color) + + def get_search_plugin(self): + ''' + Return active search plugin callable + + @rtype: callable + @returns: A callable object that fetches for DOAP + ''' + plugins = self.get_plugin('search') + if len(plugins) == 1: + return plugins[0].search + + def run(self): + ''' + Run doapfiend command + + Find the active plugin that has a 'search' method and run it, + then output the DOAP with print_doap, using the active plugin + with a 'serializer' method. + + + @rtype: int + @returns: 0 success or 1 failure + + ''' + opt_parser = self.setup_opt_parser() + (self.options, remaining_args) = opt_parser.parse_args() + self.set_serializer() + if not self.serializer and remaining_args: + opt_parser.print_help() + return 1 + self.set_log_level() + + if self.options.doapfiend_version: + return doapfiend_version() + + if self.options.no_color: + for this in COLOR: + COLOR[this] = '\x1b[0m' + search_func = self.get_search_plugin() + if search_func: + doap_xml = search_func() + if doap_xml: + if self.options.follow: + #Search for additional DOAP by looking up all doap:homepage + #found and then print all found. This may be used if the + #DOAP you've found isn't rich enough or with FOAF, where a + #person lists multiple projects they are affiliated with + #and you want to find DOAP based on the Projec homepages + #found in FOAF. + self.print_doap(doap_xml) + return follow_homepages(doap_xml) + elif self.options.show_links: + return show_links(doap_xml) + else: + return self.print_doap(doap_xml) + else: + opt_parser.print_help() + return 1 + + def set_serializer(self): + ''' + Find all plugins that are enabled on the command-line and have a + `serialize` method. If none are enabled, default to plain text + ''' + plugins = self.get_plugin('serialize') + if len(plugins) == 0: + self.serializer = None + else: + #Choose first serializer in case they try more than one + self.serializer = plugins[0].serialize + + def setup_opt_parser(self): + ''' + Setup the optparser + + @rtype: opt_parser.OptionParser + @return: Option parser + + ''' + usage = 'usage: %prog [options]' + opt_parser = optparse.OptionParser(usage=usage) + group_search = optparse.OptionGroup(opt_parser, + 'Search options', + 'Options for searching for DOAP') + + opt_parser.add_option('--version', action='store_true', + dest='doapfiend_version', default=False, + help='Show doapfiend version and exit.') + + opt_parser.add_option('-P', '--http-proxy', action='store', + dest='proxy', default=False, + help='Specify http proxy URL if you use one.') + + group_output = optparse.OptionGroup(opt_parser, + 'Output options', + 'Choose these options to change default output behavior') + + group_output.add_option('--debug', action='store_true', + dest= 'debug', default=False, + help='Show debugging information') + + group_output.add_option('-f', '--follow-links', action='store_true', + dest='follow', default=False, + help='Search for and show additional DOAP.', + metavar='FILENAME') + + group_output.add_option('-s', '--show-links', action='store_true', + dest='show_links', default=False, + help='Search for and show links to additional DOAP.', + metavar='FILENAME') + + group_output.add_option('-w', '--write', action='store', + dest='write', default=False, + help='Write DOAP to a file instead of displaying it.', + metavar='FILENAME') + + group_output.add_option('-C', '--no-color', action='store_true', + dest='no_color', default=False, + help="Don't use color in output") + + group_output.add_option('-q', '--quiet', action='store_true', + dest='quiet', default=False, help="Show less output") + + group_output.add_option('-v', '--verbose', action='store_true', + dest='verbose', default=False, help="Show more output") + + # add opts from plugins + for plugcls in self.plugins: + plug = plugcls() + plug.add_options(opt_parser, group_output, group_search) + opt_parser.add_option_group(group_search) + opt_parser.add_option_group(group_output) + return opt_parser + + +def doapfiend_version(): + '''Print doapfiend version''' + print VERSION + + +def main(): + '''Let's do it.''' + my_doapfiend = DoapFiend() + return my_doapfiend.run() + + +if __name__ == '__main__': + sys.exit(main()) + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/doap2html.xsl b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/doap2html.xsl new file mode 100755 index 00000000..ffb94d29 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/doap2html.xsl @@ -0,0 +1,186 @@ + + + + + + + + + + + + + <xsl:value-of select="doap:name/text()"/> + + + + +

+

Project:

+
+ +
+ + + + + + + + +
+ + + + + + + +

+ : + + + + + + + +

+
+ + + +

+ : + + + + + + + +

+
+ + + +
+

Release

+
+ +
+
+
+ + + +
+

Maintainer

+
+ +
+
+
+ + + +
+

Developer

+
+ +
+
+
+ + + +
+

Documenter

+
+ +
+
+
+ + + +
+

Translator

+
+ +
+
+
+ + + +
+

Tester

+
+ +
+
+
+ + + +
+

Helper

+
+ +
+
+
+ + + +
+

Repository

+
+ + +
+
+
+ + + +
+

Maker of DOAP Profile

+
+ +
+
+
+ + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/doaplib.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/doaplib.py new file mode 100755 index 00000000..d537af1a --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/doaplib.py @@ -0,0 +1,339 @@ +#!/usr/bin/env python +#pylint: disable-msg=C0103 + +""" + +Library for parsing, displaying, querying and serializing DOAP + +""" + +import sys +import logging +import xmlrpclib +from cStringIO import StringIO +from xml.sax._exceptions import SAXParseException + +from rdfalchemy import rdfSubject +from rdflib import ConjunctiveGraph, Namespace + +from doapfiend.utils import fetch_file +from doapfiend.model import Project +from doapfiend.plugins import load_plugins + +LOG = logging.getLogger('doapfiend') +XMLRPC_SERVER = xmlrpclib.ServerProxy('http://doapspace.org/xmlrpc/') +DOAP_NS = Namespace('http://usefulinc.com/ns/doap#') + + +def follow_homepages(rdf_xml): + ''' + If there is a 'doap:Project homepage' it will be looked up + on doapspace.org using get_by_homepage to find any other + DOAP. This is useful if we're looking at FOAF and a project + is mentioned by homepage. It can also be used on DOAP files + to search for additional DOAP files about the same project. + + @param rdf_xml: RDF serialized as XML + @type : string + + @rtype: int + @returns: 0 on sucess or 1 if there was no DOAP in the RDF + ''' + homepages = list(get_homepages(rdf_xml)) + nbr_homepage_urls = len(homepages) + if nbr_homepage_urls >= 1: + print_doap_by_homepages(homepages) + else: + print 'No DOAP found in that RDF.' + return 1 + + +def show_links(rdf): + ''' + If there is a 'doap:Project homepage' it will be looked up + on doapspace.org using get_by_homepage to find any other + DOAP. This is useful if we're looking at FOAF and a project + is mentioned by homepage. It can also be used on DOAP files + to search for additional DOAP files about the same project. + + @param rdf: RDF serialized as XML + @type : string + + @rtype: int + @returns: 0 on sucess or 1 if there was no DOAP in the RDF + ''' + homepages = list(get_homepages(rdf)) + nbr_homepage_urls = len(homepages) + if nbr_homepage_urls >= 1: + for hpage_url in homepages: + print "Found project homepage:", hpage_url + #Search for DOAP by project homepage. + hpages = query_by_homepage(hpage_url) + for _src, hpage_url in hpages: + print ' Found DOAP: ', hpage_url + else: + print 'No DOAP found in that RDF.' + return 1 + + +def print_doap_by_homepages(homepages): + ''' + Given a list of homepage URLs, search for DOAP for each and print + + @param homepages: Project homepage + @type : list + + @rtype: None + @returns: None + ''' + for hpage_url in homepages: + print "Found project homepage", hpage_url + #Search for DOAP by project homepage. There may be none, one or multiple + hpages = query_by_homepage(hpage_url) + for _src, hpage_url in hpages: + print 'Found DOAP at ', hpage_url + doap_xml = fetch_doap(hpage_url) + print_doap(doap_xml) + +def get_homepages(rdf, format='xml'): + ''' + Find all doap:homepage in RDF + + @param rdf: RDF + @type rdf: string + + @param format: Serialization format + @type format: string + + @rtype: generator + @returns: homepages + ''' + store = ConjunctiveGraph() + store.parse(StringIO(rdf), publicID=None, format=format) + if rdf_has_doap(store): + for _s, o in store.subject_objects(DOAP_NS["homepage"]): + yield(str(o)) + +def rdf_has_doap(store): + ''' + Returns True if triplestore has the DOAP namespace defined + + @param store: triplestore + @type store: rdflib ConjunctiveGraph + + @rtype: boolean + @returns: True if triplestore contains DOAP namespace + + ''' + for namespace in store.namespaces(): + if namespace[1] == DOAP_NS: + return True + +def load_graph(doap, format="xml", get_list=False): + ''' + Load a DOAP profile into a RDFAlchemy/rdflib graph + + Supports any serialization format rdflib can parse (xml, n3, etc.) + + @param doap: DOAP + @type doap: string + + @param format: Serialization format we're parsing + @type format: string + + @param get_list: Return list of Projects if True + @type doap: list + + @rtype: Project + @returns: a Project{rdfSubject} + + ''' + rdfSubject.db = ConjunctiveGraph() + try: + rdfSubject.db.parse(StringIO(doap), format) + except SAXParseException: + sys.stderr.write("Error: Can't parse RDF/XML.\n") + raise Exception("Error: Can't parse RDF/XML.") + # sys.exit(2) + #If a serializer works on an entire graph, it doesn't matter which + #Project instance we give it. This is true for N3, XML/RDF etc. + #The 'text' serializer, on the other hand, prints out a separate + #description for each Project found in a graph. This is useful for + #'arbitrary' RDF, or FOAF where there may be several Projects listed. + #Ideally exactly one Project should be specified in an .rdf file. + #In the future load_graph will probably always return a list and let the + #plugins determine what to do when there are more than one Project + #found. + if get_list: + LOG.debug("doaplib: list of Projects") + try: + projs = list(Project.ClassInstances()) + LOG.debug("Found %s Projects." % len(projs)) + if len(projs) == 0: + sys.stderr.write('No DOAP found in that RDF.\n') + return projs + except StopIteration: + sys.stderr.write('No DOAP found in that RDF.\n') + sys.exit(2) + + else: + try: + LOG.debug("doaplib: single Project") + return Project.ClassInstances().next() + except StopIteration: + sys.stderr.write('No DOAP found in that RDF.\n') + sys.exit(2) + sys.stderr.write('No DOAP found in that RDF.\n') + +def get_by_pkg_index(index, project_name, proxy=None): + ''' + Get DOAP for a package index project name + + Builtin indexes: + + - 'sf' SourceForge + - 'fm' Freshmeat + - 'py' Python Package Index + + Note there can be other package indexes available by + third party plugins. + + @param index: Package index two letter abbreviation + @type index: string + + @param project_name: project name + @type project_name: string + + @param proxy: Optional HTTP proxy URL + @type proxy: string + + @rtype: string + @return: text of file retrieved + + ''' + for plugin_obj in list(load_plugins()): + plugin = plugin_obj() + if hasattr(plugin, 'prefix'): + if plugin.prefix == index: + plugin.query = project_name + return plugin.search(proxy) + + +def query_by_homepage(url): + ''' + Get list of URL's for DOAP given a project's homepage. + The list can contain zero or multiple URLs. + + The return format is: + [(source, URL), (source, URL)...] + + 'source' is the two letter package index abbreviation or 'ex' for external. + 'external' meaning the DOAP was spidered on the web. + Possible package indexes: + + Current indexes: + + - 'sf' SourceForge + - 'fm' Freshmeat + - 'py' Python Package Index + - 'oh' Packages listed on Ohloh + + @param url: URL of homepage of a project + @type url: string + + @rtype: list + @return: A list of tuples containing URLs for DOAP found by homepage + + ''' + #Should check for env variable for alternate xmplrpc server for testing? + return XMLRPC_SERVER.query_by_homepage(url) + + +def print_doap(doap_xml, color=None, format='text', serializer=None, + filename=None): + ''' + Print DOAP as text, xml, or n3 etc. or to stdout or a file + A callable serializer object may be passed or a name of a serializer + plugin. + + @param doap_xml: DOAP profile in RDF/XML + @type doap_xml: string + + @param format: Serialization syntax formatter name + @type format: string + + @param serializer: Instance of a serializer + @type serializer: callable + + @param filename: Optional filename to write to + @type filename: string + + @return: `serializer` or 1 if invalid serialization request + + ''' + #If we were passed a callable serializer object use it, + #otherwise lookup serializer by name in list of plugins + if not serializer: + serializer = get_serializer(format) + if not serializer: + sys.stderr.write('Unknown serialization requested: %s\n' % format) + return 1 + + doap = serializer(doap_xml, color) + if filename: + try: + open(filename, 'w').write(doap.encode('utf-8')) + except UnicodeDecodeError: + open(filename, 'w').write(doap) + else: + print doap + + +def get_serializer(format): + ''' + Return a serializer instance given its name + + @param format: Name of serializer + @type format: string + + @rtype: function + @returns: Instance of a serializer + ''' + #Get all plugins with a `serialize` method + for plugin_obj in get_plugin('serialize'): + plugin = plugin_obj() + if plugin.name == format: + return plugin.serialize + + +def get_plugin(method): + """ + Return plugin object if `method` exists + + @param method: name of plugin's method we're calling + @type method: string + + @returns: list of plugins with `method` + + """ + all_plugins = [] + for plugin in load_plugins(): + #plugin().configure(None, None) + if not hasattr(plugin, method): + plugin = None + else: + all_plugins.append(plugin) + return all_plugins + + +def fetch_doap(url, proxy=None): + ''' + Fetch DOAP by its URL or filename + + @param url: URL of DOAP profile in RDF/XML serialization + @type url: string + + @rtype: text + @return: DOAP + ''' + return fetch_file(url, proxy) diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/lexers.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/lexers.py new file mode 100755 index 00000000..e86a9a75 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/lexers.py @@ -0,0 +1,148 @@ + +#pylint: disable-msg=C0301 + +""" + pygments.lexers.sw + ================== + + Lexers for semantic web languages. + + :copyright: 2007 by Philip Cooper . + :license: BSD, see LICENSE for more details. +""" + +import re + +from pygments.lexer import RegexLexer, include, bygroups +from pygments.token import Text, Comment, Operator, Keyword, Name, Literal + + +__all__ = ['Notation3Lexer', 'SparqlLexer'] + + +class Notation3Lexer(RegexLexer): + """ + Lexer for the N3 / Turtle / NT + """ + name = 'N3' + aliases = ['n3', 'turtle'] + filenames = ['*.n3', '*.ttl', '*.NT'] + mimetypes = ['text/rdf+n3', 'application/x-turtle', 'application/n3'] + + tokens = { + 'comments': [ + (r'(\s*#.*)', Comment) + ], + 'root': [ + include('comments'), + (r'(\s*@(?:prefix|base|keywords)\s*)(\w*:\s+)?(<[^> ]*>\s*\.\s*)', + bygroups(Keyword,Name.Variable,Name.Namespace)), + (r'\s*(<[^>]*\>)', Name.Class, ('triple','predObj')), + (r'(\s*[a-zA-Z_:][a-zA-Z0-9\-_:]*\s)', + Name.Class, ('triple','predObj')), + (r'\s*\[\]\s*', Name.Class, ('triple','predObj')), + ], + 'triple' : [ + (r'\s*\.\s*', Text, '#pop') + ], + 'predObj': [ + include('comments'), + (r'(\s*[a-zA-Z_:][a-zA-Z0-9\-_:]*\b\s*)', Operator, 'object'), + (r'\s*(<[^>]*\>)', Operator, 'object'), + (r'\s*\]\s*', Text, '#pop'), + (r'(?=\s*\.\s*)', Keyword, '#pop'), + ], + 'objList': [ + (r'\s*\)', Text, '#pop'), + include('object') + ], + 'object': [ + (r'\s*\[', Text, 'predObj'), + (r'\s*<[^> ]*>', Name.Attribute), + (r'\s*("""(?:.|\n)*?""")(\@[a-z]{2-4}|\^\^?)?\s*', + bygroups(Literal.String,Text)), + (r'\s*".*?[^\\]"(?:\@[a-z]{2-4}|\^\^?)?\s*', + Literal.String), + (r'\s*[a-zA-Z0-9\-_\:]\s*', Name.Attribute), + (r'\s*\(', Text, 'objList'), + (r'\s*;\s*\n?', Text, '#pop'), + (r'(?=\s*\])', Text, '#pop'), + (r'(?=\s*\.)', Text, '#pop'), + ], + } + + +class SparqlLexer(RegexLexer): + """ + Lexer for SPARQL Not Complete + """ + name = 'SPARQL' + aliases = ['sparql'] + filenames = ['*.sparql'] + mimetypes = ['text/x-sql'] + flags = re.IGNORECASE + tokens = { + 'comments': [ + (r'(\s*#.*)', Comment) + ], + 'root': [ + include('comments'), + (r'(\s*(?:PREFIX|BASE)\s+)(\w*:\w*)?(\s*<[^> ]*>\s*)', + bygroups(Keyword,Name.Variable,Name.Namespace)), + (r'(\s*#.*)', Comment), + (r'((?:SELECT|ASK|CONSTRUCT|DESCRIBE)\s*(?:DISTINCT|REDUCED)?\s*)((?:\?[a-zA-Z0-9_-]+\s*)+|\*)(\s*)', + bygroups(Keyword,Name.Variable,Text)), + (r'(FROM\s*(?:NAMED)?)(\s*.*)', bygroups(Keyword,Text)), + (r'(WHERE)?\s*({)', bygroups(Keyword,Text), 'graph'), + (r'(LIMIT|OFFSET)(\s*[+-]?[0-9]+)', + bygroups(Keyword,Literal.String)), + ], + 'graph':[ + (r'\s*(<[^>]*\>)', Name.Class, ('triple','predObj')), + (r'(\s*[a-zA-Z_0-9\-]*:[a-zA-Z0-9\-_]*\s)', + Name.Class, ('triple','predObj')), + (r'(\s*\?[a-zA-Z0-9_-]*)', Name.Variable, ('triple','predObj')), + (r'\s*\[\]\s*', Name.Class, ('triple','predObj')), + (r'\s*(FILTER\s*)((?:regex)?\()',bygroups(Keyword,Text),'filterExp'), + (r'\s*}', Text, '#pop'), + ], + 'triple' : [ + (r'(?=\s*})', Text, '#pop'), + (r'\s*\.\s*', Text, '#pop'), + ], + 'predObj': [ + include('comments'), + (r'(\s*\?[a-zA-Z0-9_-]*\b\s*)', Name.Variable,'object'), + (r'(\s*[a-zA-Z_:][a-zA-Z0-9\-_:]*\b\s*)', Operator, 'object'), + (r'\s*(<[^>]*\>)', Operator, 'object'), + (r'\s*\]\s*', Text, '#pop'), + (r'(?=\s*\.\s*)', Keyword, '#pop'), + ], + 'objList': [ + (r'\s*\)', Text, '#pop'), + include('object'), + ], + 'object': [ + include('variable'), + (r'\s*\[', Text, 'predObj'), + (r'\s*<[^> ]*>', Name.Attribute), + (r'\s*("""(?:.|\n)*?""")(\@[a-z]{2-4}|\^\^?)?\s*', bygroups(Literal.String,Text)), + (r'\s*".*?[^\\]"(?:\@[a-z]{2-4}|\^\^?)?\s*', Literal.String), + (r'\s*[a-zA-Z0-9\-_\:]\s*', Name.Attribute), + (r'\s*\(', Text, 'objList'), + (r'\s*;\s*', Text, '#pop'), + (r'(?=\])', Text, '#pop'), + (r'(?=\.)', Text, '#pop'), + ], + 'variable':[ + (r'(\?[a-zA-Z0-9\-_]+\s*)', Name.Variable), + ], + 'filterExp':[ + include('variable'), + include('object'), + (r'\s*[+*/<>=~!%&|-]+\s*', Operator), + (r'\s*\)', Text, '#pop'), + ], + + } + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/model.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/model.py new file mode 100755 index 00000000..9b935b51 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/model.py @@ -0,0 +1,83 @@ + +''' + +Model of a DOAP profile using RDFAlchemy + +''' + +from rdfalchemy import rdfSubject, rdfSingle, rdfMultiple +from rdfalchemy.orm import mapper +from rdflib import Namespace + +DOAP = Namespace("http://usefulinc.com/ns/doap#") +FOAF = Namespace("http://xmlns.com/foaf/0.1/") +DC = Namespace("http://purl.org/dc/elements/1.1/") + + +class Project(rdfSubject): + + """ + DOAP Project Class + """ + + rdf_type = DOAP.Project + + category = rdfMultiple(DOAP.category) + created = rdfSingle(DOAP.created) + shortname = rdfSingle(DOAP.shortname) + description = rdfMultiple(DOAP.description) + bug_database = rdfSingle(DOAP['bug-database']) + developer = rdfMultiple(DOAP.developer, range_type=FOAF.Person) + documenter = rdfMultiple(DOAP.documenter, range_type=FOAF.Person) + download_mirror = rdfMultiple(DOAP['downoad-mirror']) + download_page = rdfSingle(DOAP['download-page']) + helper = rdfMultiple(DOAP.helper, range_type=FOAF.Person) + homepage = rdfSingle(DOAP.homepage) + license = rdfMultiple(DOAP['license']) + maintainer = rdfMultiple(DOAP.maintainer, range_type=FOAF.Person) + developer = rdfMultiple(DOAP.developer, range_type=FOAF.Person) + translator = rdfMultiple(DOAP.translator, range_type=FOAF.Person) + helper = rdfMultiple(DOAP.helper, range_type=FOAF.Person) + tester = rdfMultiple(DOAP.tester, range_type=FOAF.Person) + documenter = rdfMultiple(DOAP.documenter, range_type=FOAF.Person) + module = rdfSingle(DOAP.module) + name = rdfSingle(DOAP.name) + old_homepage = rdfMultiple(DOAP['old-homepage']) + programming_language = rdfMultiple(DOAP['programming-language']) + releases = rdfMultiple(DOAP.release, range_type=DOAP.Version) + svn_repository = rdfSingle(DOAP.repository, 'svn_repository', + range_type=DOAP.SVNRepository) + cvs_repository = rdfSingle(DOAP.repository, 'cvs_repository', + range_type=DOAP.CVSRepository) + oper_sys = rdfMultiple(DOAP['os']) + screenshots = rdfMultiple(DOAP.screenshots) + shortdesc = rdfMultiple(DOAP.shortdesc) + tester = rdfMultiple(DOAP.tester, range_type=FOAF.Person) + translator = rdfMultiple(DOAP.translator, range_type=FOAF.Person) + wiki = rdfMultiple(DOAP.wiki) + +class Release(rdfSubject): + """A release class""" + rdf_type = DOAP.Version + revision = rdfSingle(DOAP.revision) + name = rdfSingle(DOAP.name) + created = rdfSingle(DOAP.created) + changelog = rdfSingle(DC.description) + file_releases = rdfMultiple(DOAP['file-release']) + +class SVNRepository(rdfSubject): + """Subversion repository classs""" + rdf_type = DOAP.SVNRepository + location = rdfSingle(DOAP.location) + svn_browse = rdfSingle(DOAP.browse) + +class CVSRepository(rdfSubject): + """CVS repository class""" + rdf_type = DOAP.CVSRepository + anon_root = rdfSingle(DOAP['anon-root']) + cvs_browse = rdfSingle(DOAP.browse) + module = rdfSingle(DOAP.module) + + +mapper(Project, Release, CVSRepository, SVNRepository) + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/__init__.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/__init__.py new file mode 100755 index 00000000..35ae07c8 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/__init__.py @@ -0,0 +1,144 @@ + +# pylint: disable-msg=W0142,C0103 + + +""" +Writing Plugins +=============== + +doapfiend supports setuptools_ entry point plugins. + +There are two basic rules for plugins: + + - Plugin classes should subclass `doapfiend.plugins.Plugin`_. + - Plugins may implement any of the methods described in the class + PluginInterface in doapfiend.plugins.base. Please note that this class is for + documentary purposes only; plugins may not subclass PluginInterface. + +Setuptools: http://peak.telecommunity.com/DevCenter/setuptools +Doapfiend Plugins: http://trac.doapspace.org/doapfiend/wiki/DoapfiendPlugins + +Registering +----------- + +For doapfiend to find a plugin, it must be part of a package that uses +setuptools, and the plugin must be included in the entry points defined +in the setup.py for the package:: + + setup(name='Some plugin', + ... + entry_points = { + 'doapfiend.plugins': [ + 'someplugin = someplugin:SomePlugin' + ] + }, + ... + ) + +Once the package is installed with install or develop, doapfiend will be able +to load the plugin. + +Defining options +---------------- + +All plugins must implement the methods ``add_options(self, parser, env)`` +and ``configure(self, options, conf)``. Subclasses of doapfiend.plugins.Plugin +that want the standard options should call the superclass methods. + +doapfiend uses optparse.OptionParser from the standard library to parse +arguments. A plugin's ``add_options()`` method receives a parser +instance. It's good form for a plugin to use that instance only to add +additional arguments that take only long arguments (--like-this). Most +of doapfiend's built-in arguments get their default value from an environment +variable. This is a good practice because it allows options to be +utilized when run through some other means than the doapfiendtests script. + +A plugin's ``configure()`` method receives the parsed ``OptionParser`` options +object, as well as the current config object. Plugins should configure their +behavior based on the user-selected settings, and may raise exceptions +if the configured behavior is nonsensical. + +Logging +------- + +doapfiend uses the logging classes from the standard library. To enable users +to view debug messages easily, plugins should use ``logging.getLogger()`` to +acquire a logger in the ``doapfiend.plugins`` namespace. + +""" + +import logging +import pkg_resources +from warnings import warn +from inspect import isclass +from doapfiend.plugins.base import Plugin + +LOG = logging.getLogger('doapfiend') + +# +==== IMPORTANT ====+ +#If you add any builtin plugins in doapfiend.plugins you must add them +#to this list for them to be loaded. It's okay to add other Python modules +#in the doapfiend.plugins namespace, but they won't be recognized as a plugin +#unless listed here: + +builtin_plugins = ['url', 'homepage', 'n3', 'xml', 'text', 'sourceforge', + 'pypi', 'freshmeat', 'ohloh', 'fields'] + +def call_plugins(plugins, method, *arg, **kw): + """Call all method on plugins in list, that define it, with provided + arguments. The first response that is not None is returned. + """ + for plug in plugins: + func = getattr(plug, method, None) + if func is None: + continue + LOG.debug("call plugin %s: %s", plug.name, method) + result = func(*arg, **kw) + if result is not None: + return result + return None + +def load_plugins(builtin=True, others=True): + """Load plugins, either builtin, others, or both. + """ + loaded = [] + if builtin: + for name in builtin_plugins: + try: + parent = __import__(__name__, globals(), locals(), [name]) + #print name + pmod = getattr(parent, name) + for entry in dir(pmod): + obj = getattr(pmod, entry) + if (isclass(obj) + and issubclass(obj, Plugin) + and obj is not Plugin + and not obj in loaded): + #LOG.debug("load builtin plugin %s (%s)" % (name, obj)) + #print "load builtin plugin %s (%s)" % (name, obj) + yield obj + loaded.append(obj) + except KeyboardInterrupt: + raise + except Exception, e: + warn("Unable to load builtin plugin %s: %s" % (name, e), + RuntimeWarning) + for entry_point in pkg_resources.iter_entry_points('doapfiend.plugins'): + LOG.debug("load plugin %s" % entry_point) + try: + plugin = entry_point.load() + except KeyboardInterrupt: + raise + except Exception, err_msg: + # never want a plugin load to exit doapfiend + # but we can't log here because the logger is not yet + # configured + warn("Unable to load plugin %s: %s" % \ + (entry_point, err_msg), RuntimeWarning) + continue + if plugin.__module__.startswith('doapfiend.plugins'): + if builtin: + yield plugin + elif others: + yield plugin + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/base.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/base.py new file mode 100755 index 00000000..3cd03d4a --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/base.py @@ -0,0 +1,78 @@ + +# pylint: disable-msg=W0201,W0511 +#XXX Attribute 'conf' defined outside __init__ + +""" + +Base plugin class +================= + +All plugins should inherit doapfiend.plugins.Plugin + +""" + + +import textwrap + +class Plugin(object): + """Base class for doapfiend plugins. It's not necessary to subclass this + class to create a plugin; however, all plugins must implement + `add_options(self, parser)` and `configure(self, options, + conf)`, and must have the attributes `enabled` and `name`. + + Plugins should not be enabled by default. + + Subclassing Plugin will give your plugin some friendly default + behavior: + + - A --with-$name option will be added to the command line + interface to enable the plugin. The plugin class's docstring + will be used as the help for this option. + - The plugin will not be enabled unless this option is selected by + the user. + """ + enabled = False + enable_opt = None + name = None + + def __init__(self): + self.conf = None + if self.name is None: + self.name = self.__class__.__name__.lower() + if self.enable_opt is None: + self.enable_opt = "enable_plugin_%s" % self.name + + def add_options(self, parser): + """Add command-line options for this plugin. + + The base plugin class adds --with-$name by default, used to enable the + plugin. + """ + parser.add_option("--with-%s" % self.name, + action="store_true", + dest=self.enable_opt, + help="Enable plugin %s: %s" % + (self.__class__.__name__, self.help()) + ) + + def configure(self, options, conf): + """Configure the plugin and system, based on selected options. + + The base plugin class sets the plugin to enabled if the enable option + for the plugin (self.enable_opt) is true. + """ + self.conf = conf + self.options = options + if hasattr(options, self.enable_opt): + self.enabled = getattr(options, self.enable_opt) + + def help(self): + """Return help for this plugin. This will be output as the help + section of the --with-$name option that enables the plugin. + """ + if self.__class__.__doc__: + # doc sections are often indented; compress the spaces + return textwrap.dedent(self.__class__.__doc__) + return "(no help available)" + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/fields.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/fields.py new file mode 100755 index 00000000..d462437a --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/fields.py @@ -0,0 +1,215 @@ +#!/usr/bin/env python + +# pylint: disable-msg=W0221,R0201 +""" + +Plain text serializer +===================== + +This plugin outputs DOAP in human-readable plain text + +""" + +__docformat__ = 'epytext' + +import logging + +from rdflib import Namespace +from rdfalchemy import rdfSubject + +from doapfiend.plugins.base import Plugin +from doapfiend.utils import COLOR +from doapfiend.doaplib import load_graph + + +FOAF = Namespace("http://xmlns.com/foaf/0.1/") + +LOG = logging.getLogger('doapfiend') + + +class OutputPlugin(Plugin): + + """Class for formatting DOAP output""" + + #This will be the opt_parser option (--fields) + name = "fields" + enabled = False + enable_opt = name + + def __init__(self): + '''Setup Plain Text OutputPlugin class''' + super(OutputPlugin, self).__init__() + self.options = None + + def add_options(self, parser, output, search): + """Add plugin's options to doapfiend's opt parser""" + output.add_option('--%s' % self.name, + action='store', + dest=self.enable_opt, + help='Output specific DOAP fields as plain text') + return parser, output, search + + def serialize(self, doap_xml, color=False): + ''' + Serialize RDF/XML DOAP as N3 syntax + + @param doap_xml: DOAP in RDF/XML serialization + @type doap_xml: string + + @rtype: unicode + @return: DOAP in plain text + ''' + if hasattr(self.options, 'no_color'): + color = not self.options.no_color + if not color: + #This has already been done if we're called from cli.py + #Fix me: Need to think on this. + for this in COLOR: + COLOR[this] = '\x1b[0m' + + if hasattr(self.options, 'quiet'): + brief = self.options.quiet + else: + brief = False + + doap = load_graph(doap_xml) + fields = self.options.fields.split(',') + #print fields + out = '' + for field in fields: + if '-' in field: + field = field.replace('-', '_') + field = field.strip() + if '.' in field: + repo, field = field.split('.') + text = print_repos(doap, repo, field) + elif field == 'releases': + text = get_releases(doap, brief) + elif field in ['maintainer', 'developer', 'documenter', 'helper', + 'tester', 'translator']: + text = get_people(doap, field) + else: + try: + text = getattr(doap, field) + except AttributeError: + LOG.warn("No such attribute: %s" % field) + text = None + if not text: + continue + if isinstance(text, list): + text = print_list(doap, field) + else: + text = print_field(doap, field) + out += text + '\n' + return out.rstrip() + +def print_list(doap, field): + ''' + Print list of DOAP attributes + + @param doap: DOAP in RDF/XML + @type doap: text + + @param field: DOAP attribute to be printed + @type field: text + + @rtype: text + @returns: Field to be printed + ''' + #Can have multiple values per attribute + text = "" + for thing in getattr(doap, field): + if isinstance(thing, rdfSubject): + text += thing.resUri + else: + #unicode object + thing = thing.strip() + text += thing + return text + +def print_field(doap, field): + ''' + Print single field + + @param doap: DOAP in RDF/XML + @type doap: text + + @param field: DOAP attribute to be printed + @type field: text + + @rtype: text + @returns: Field to be printed + ''' + text = getattr(doap, field) + if isinstance(text, rdfSubject): + return text.resUri.strip() + else: + return text.strip() + +def print_repos(doap, repo, field): + '''Prints DOAP repository metadata''' + if repo == 'cvs': + if hasattr(doap.cvs_repository, field): + return getattr(doap.cvs_repository, field) + + if repo == 'svn': + if field == 'browse': + field = 'svn_browse' + if hasattr(doap.svn_repository, field): + text = getattr(doap.svn_repository, field) + if text: + if isinstance(text, rdfSubject): + return text.resUri + else: + return text.strip() + return '' + +def get_people(doap, job): + '''Print people for a particular job ''' + out = '' + if hasattr(doap, job): + attribs = getattr(doap, job) + if len(attribs) > 0: + peeps = [] + for attr in attribs: + if attr[FOAF.mbox] is None: + person = "%s" % attr[FOAF.name] + else: + mbox = attr[FOAF.mbox].resUri + if mbox.startswith('mailto:'): + mbox = mbox[7:] + person = "%s <%s>" % (attr[FOAF.name], mbox) + else: + LOG.debug("mbox is invalid: %s" % mbox) + person = "%s" % attr[FOAF.name] + peeps.append(person) + out += ", ".join([p for p in peeps]) + return out + + +def get_releases(doap, brief=False): + '''Print DOAP package release metadata''' + out = '' + if hasattr(doap, 'releases') and len(doap.releases) != 0: + if not brief: + out += COLOR['bold'] + "Releases:" + COLOR['normal'] + '\n' + for release in doap.releases: + if release.name: + out += COLOR['bold'] + COLOR['cyan'] + release.name + \ + COLOR['normal'] + '\n' + if hasattr(release, 'created') and release.created is not None: + created = release.created + else: + created = '' + out += COLOR['cyan'] + ' ' + release.revision + ' ' + \ + COLOR['normal'] + created + '\n' + if not brief: + if hasattr(release, 'changelog'): + if release.changelog: + out += COLOR['yellow'] + release.changelog + \ + COLOR['normal'] + '\n' + + for frel in release.file_releases: + out += ' %s' % frel.resUri + '\n' + return out + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/freshmeat.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/freshmeat.py new file mode 100755 index 00000000..d79a768c --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/freshmeat.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +# pylint: disable-msg=W0221,R0201 + +""" + +freshmeat +========= + +Currently this plugin uses http://doapspace.org/ to fetch DOAP for Freshmeat + + +""" + +__docformat__ = 'epytext' + + +from doapfiend.utils import NotFoundError +from doapfiend.plugins.base import Plugin +from doapfiend.plugins.pkg_index import get_by_pkg_index + + +class FreshmeatPlugin(Plugin): + + """Get DOAP from Freshmeat package index""" + + #This will be the opt_parser option (--fm) in the output group + name = "fm" + enabled = False + enable_opt = name + + def __init__(self): + '''Setup RDF/XML OutputPlugin class''' + super(FreshmeatPlugin, self).__init__() + self.options = None + self.query = None + + def add_options(self, parser, output, search): + """Add plugin's options to doapfiend's opt parser""" + search.add_option('--%s' % self.name, + action='store', + dest=self.enable_opt, + help='Get DOAP by its Freshmeat project name.', + metavar='PROJECT_NAME') + return parser, output, search + + def search(self, proxy=None): + ''' + Get Freshmeat DOAP + + @param proxy: URL of optional HTTP proxy + @type proxy: string + + @rtype: unicode + @returns: Single DOAP + + ''' + if hasattr(self.options, self.name): + self.query = getattr(self.options, self.name) + #Else self.query was set directly, someone not using the CLI + try: + return get_by_pkg_index(self.name, self.query, proxy) + except NotFoundError: + print "Not found: %s" % self.query + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/homepage.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/homepage.py new file mode 100755 index 00000000..91b71028 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/homepage.py @@ -0,0 +1,84 @@ +#!/usr/bin/env python + +# pylint: disable-msg=W0221,R0201 + +""" + +homepage +======== + +Fetches DOAP by searching doapspace.org by a project's homepage. + +""" + +__docformat__ = 'epytext' + +import logging + +from doapfiend.plugins.base import Plugin +from doapfiend.doaplib import fetch_doap, query_by_homepage + +LOG = logging.getLogger("doapfiend") + +class OutputPlugin(Plugin): + + """Class for formatting DOAP output""" + + #This will be the opt_parser option (--xml) in the output group + name = "homepage" + enabled = False + enable_opt = name + + def __init__(self): + '''Setup RDF/XML OutputPlugin class''' + super(OutputPlugin, self).__init__() + self.options = None + + def add_options(self, parser, output, search): + """Add plugin's options to doapfiend's opt parser""" + search.add_option('-o', '--%s' % self.name, + action='store', + dest=self.enable_opt, + help="Search for DOAP by a project's homepage", + metavar='HOMEPAGE_URL') + return parser, output, search + + def search(self): + ''' + Get DOAP given a project's homepage + + @rtype: unicode + @return: DOAP + ''' + return do_search(self.options.homepage) + +def do_search(homepage): + ''' + Get DOAP given a project's homepage + + @param homepage: Project homepage URL + + @rtype: unicode + @return: DOAP + ''' + resp = query_by_homepage(homepage) + LOG.debug(resp) + if len(resp) == 0: + LOG.error("Not found: %s" % homepage) + return + elif len(resp) == 1: + url = resp[0][1] + else: + #Multiple, send warning and use first 'external' if any + LOG.warn("Warning: Multiple DOAP found.") + url = None + for this in resp: + LOG.warn(this) + if not url: + #Keep first one if there is no external DOAP + url = this[1] + if this[0] == 'ex': + url = this[1] + LOG.warn("Using %s" % url) + return fetch_doap(url) + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/n3.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/n3.py new file mode 100755 index 00000000..88e25a2f --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/n3.py @@ -0,0 +1,91 @@ +#!/usr/bin/env python + +# pylint: disable-msg=W0221,R0201 + +""" + +Serializer for N3 (Notation 3) +============================== + +This is a plugin for formatting DOAP output as N3 (Notation 3) syntax. + +""" + +__docformat__ = 'epytext' + +import logging +from cStringIO import StringIO + +from rdflib import ConjunctiveGraph + +from doapfiend.plugins.base import Plugin + +LOG = logging.getLogger(__name__) + + +def get_n3(xml_text, color=False): + ''' + Return N3 (Notation 3) text + Note: Returns string for non-color and unicode for colored text + + @param xml_text: XML/RDF + @type xml_text: string + + @rtype: unicode or string + @return: DOAP in Notation 3 + ''' + store = ConjunctiveGraph() + graph = store.parse(StringIO(xml_text), publicID=None, format="xml") + notation3 = graph.serialize(format="n3") + + if color: + #pygments plugin fools pylint + # pylint: disable-msg=E0611 + try: + from pygments import highlight + from doapfiend.lexers import Notation3Lexer + from pygments.formatters import TerminalFormatter + except ImportError: + return notation3 + return highlight(notation3, + Notation3Lexer(), + TerminalFormatter(full=False)) + else: + return notation3 + +class OutputPlugin(Plugin): + + """Class for formatting DOAP output""" + + #This will be the opt_parser option (--n3) + name = "n3" + enabled = False + enable_opt = None + + def __init__(self): + '''Setup N3 OutputPlugin class''' + super(OutputPlugin, self).__init__() + self.options = None + + def serialize(self, doap_xml, color=False): + ''' + Serialize RDF/XML DOAP as N3 syntax + + @param doap_xml: DOAP in RDF/XML serialization + @type doap_xml: string + + @rtype: unicode + @return: DOAP in Notation 3 + ''' + if hasattr(self, 'options') and hasattr(self.options, 'no_color'): + color = not self.options.no_color + return get_n3(doap_xml, color) + + def add_options(self, parser, output, search): + """Add plugin's options to doapfiend's opt parser""" + output.add_option('-n', '--%s' % self.name, + action='store_true', + dest=self.enable_opt, + help='Output DOAP as Notation 3') + return parser, output, search + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/ohloh.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/ohloh.py new file mode 100755 index 00000000..64a25f99 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/ohloh.py @@ -0,0 +1,66 @@ +#!/usr/bin/env python + +# pylint: disable-msg=W0221,R0201 + +""" + +ohloh +===== + +This plugin uses http://rdfohloh.wikier.org/ to fetch DOAP for +projects listed on Ohlohh. + + +""" + +__docformat__ = 'epytext' + + +from doapfiend.utils import NotFoundError +from doapfiend.plugins.base import Plugin +from doapfiend.plugins.pkg_index import get_by_pkg_index + + +class OhlohPlugin(Plugin): + + """Get DOAP from Ohloh package index""" + + #This will be the opt_parser option (--oh) in the output group + name = "oh" + enabled = False + enable_opt = name + + def __init__(self): + '''Setup RDF/XML OutputPlugin class''' + super(OhlohPlugin, self).__init__() + self.options = None + self.query = None + + def add_options(self, parser, output, search): + """Add plugin's options to doapfiend's opt parser""" + search.add_option('--%s' % self.name, + action='store', + dest=self.enable_opt, + help='Get DOAP by its Ohloh project name or id #.', + metavar='PROJECT_NAME') + return parser, output, search + + def search(self, proxy=None): + ''' + Get Ohloh DOAP + + @param proxy: Option HTTP proxy URL + @type proxy: string + + @rtype: unicode + @returns: Single DOAP + + ''' + if hasattr(self.options, self.name): + self.query = getattr(self.options, self.name) + #Else self.query was set directly, someone not using the CLI + try: + return get_by_pkg_index(self.name, self.query, proxy) + except NotFoundError: + print "Not found: %s" % self.query + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/pkg_index.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/pkg_index.py new file mode 100755 index 00000000..49050e38 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/pkg_index.py @@ -0,0 +1,46 @@ + +''' + +Plugin helper to fetch a single DOAP file from doapspace.org +by Package Index + +''' + +from doapfiend.utils import fetch_file + +PKG_INDEX_URI = 'http://doapspace.org/doap' +OHLOH_URI = 'http://rdfohloh.wikier.org/project/' + + +def get_by_pkg_index(index, project_name, proxy=None): + ''' + Get DOAP for a package index project name from doapspace.org + + Builtin indexes: + + - 'sf' SourceForge + - 'fm' Freshmeat + - 'py' Python Package Index + - 'oh' Project listed on Ohlo + + Raises doaplib.utils.NotFound exception on HTTP 404 error + + @param index: Package index two letter abbreviation + @type index: string + + @param project_name: project name + @type project_name: string + + @param proxy: Optional HTTP proxy URL + @type proxy: string + + @rtype: string + @return: text of file retrieved + + ''' + if index == 'oh': + url = '%s/%s/rdf' % (OHLOH_URI, project_name) + else: + url = '%s/%s/%s' % (PKG_INDEX_URI, index, project_name) + return fetch_file(url, proxy) + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/pypi.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/pypi.py new file mode 100755 index 00000000..5f0f4b17 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/pypi.py @@ -0,0 +1,64 @@ +#!/usr/bin/env python + +# pylint: disable-msg=W0221,R0201 + +""" +pypi +==== + +Currently this plugin uses http://doapspace.org/ to fetch DOAP for PyPI +(The Python Package Index) + +""" + +__docformat__ = 'epytext' + + +from doapfiend.utils import NotFoundError +from doapfiend.plugins.base import Plugin +from doapfiend.plugins.pkg_index import get_by_pkg_index + + +class PyPIPlugin(Plugin): + + """Get DOAP from PyPI package index""" + + #This will be the opt_parser option (--py) in the output group + name = 'py' + enabled = False + enable_opt = name + + def __init__(self): + '''Setup RDF/XML OutputPlugin class''' + super(PyPIPlugin, self).__init__() + self.options = None + self.query = None + + def add_options(self, parser, output, search): + """Add plugin's options to doapfiend's opt parser""" + search.add_option('--%s' % self.name, + action='store', + dest=self.enable_opt, + help='Get DOAP by its PyPI project name.', + metavar='PROJECT_NAME') + return parser, output, search + + def search(self, proxy=None): + ''' + Get PyPI DOAP + + @param proxy: URL of optional HTTP proxy + @type proxy: string + + @rtype: unicode + @returns: Single DOAP + + ''' + if hasattr(self.options, self.name): + self.query = getattr(self.options, self.name) + #Else self.query was set directly, someone not using the CLI + try: + return get_by_pkg_index(self.name, self.query, proxy) + except NotFoundError: + print "Not found: %s" % self.query + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/sourceforge.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/sourceforge.py new file mode 100755 index 00000000..09b06328 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/sourceforge.py @@ -0,0 +1,65 @@ +#!/usr/bin/env python + +# pylint: disable-msg=W0221,R0201 + +""" + +sourceforge +=========== + +Currently this plugin uses http://doapspace.org/ to fetch DOAP for SourceForge + + +""" + +__docformat__ = 'epytext' + + +from doapfiend.utils import NotFoundError +from doapfiend.plugins.base import Plugin +from doapfiend.plugins.pkg_index import get_by_pkg_index + + +class SourceForgePlugin(Plugin): + + """Get DOAP from SourceForge package index""" + + #This will be the opt_parser option (--sf) in the output group + name = "sf" + enabled = False + enable_opt = name + + def __init__(self): + '''Setup RDF/XML OutputPlugin class''' + super(SourceForgePlugin, self).__init__() + self.options = None + self.query = None + + def add_options(self, parser, output, search): + """Add plugin's options to doapfiend's opt parser""" + search.add_option('--%s' % self.name, + action='store', + dest=self.enable_opt, + help='Get DOAP by its SourceForge project name.', + metavar='PROJECT_NAME') + return parser, output, search + + def search(self, proxy=None): + ''' + Get SourceForge DOAP + + @param proxy: Option HTTP proxy URL + @type proxy: string + + @rtype: unicode + @returns: Single DOAP + + ''' + if hasattr(self.options, self.name): + self.query = getattr(self.options, self.name) + #Else self.query was set directly, someone not using the CLI + try: + return get_by_pkg_index(self.name, self.query, proxy) + except NotFoundError: + print "Not found: %s" % self.query + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/text.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/text.py new file mode 100755 index 00000000..83e6b48c --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/text.py @@ -0,0 +1,290 @@ +#!/usr/bin/env python + +# pylint: disable-msg=W0221,R0201 +""" + +Plain text serializer +===================== + +This plugin outputs DOAP in human-readable plain text + +""" + +__docformat__ = 'epytext' + +import logging +import textwrap +from cStringIO import StringIO + +from rdflib import Namespace +from rdfalchemy import rdfSubject + +from doapfiend.plugins.base import Plugin +from doapfiend.utils import COLOR +from doapfiend.doaplib import load_graph + + +FOAF = Namespace("http://xmlns.com/foaf/0.1/") + +LOG = logging.getLogger(__name__) + + +class OutputPlugin(Plugin): + + """Class for formatting DOAP output""" + + #This will be the opt_parser option (--text) + name = "text" + enabled = False + enable_opt = None + + def __init__(self): + '''Setup Plain Text OutputPlugin class''' + super(OutputPlugin, self).__init__() + self.options = None + + def add_options(self, parser, output, search): + """Add plugin's options to doapfiend's opt parser""" + output.add_option('--%s' % self.name, + action='store_true', + dest=self.enable_opt, + help='Output DOAP as plain text (Default)') + return parser, output, search + + def serialize(self, doap_xml, color=False): + ''' + Serialize RDF/XML DOAP as plain text + + @param doap_xml: DOAP in RDF/XML serialization + @type doap_xml: string + + @rtype: unicode + @return: DOAP in plain text + ''' + if hasattr(self.options, 'no_color'): + color = not self.options.no_color + if not color: + #This has already been done if we're called from cli.py + #Fix me: Need to think on this. + for this in COLOR: + COLOR[this] = '\x1b[0m' + if hasattr(self.options, 'quiet'): + brief = self.options.quiet + else: + brief = False + + printer = DoapPrinter(load_graph(doap_xml, get_list=True), brief, color) + return printer.print_doap() + + +class DoapPrinter(object): + + '''Prints DOAP in human readable text''' + + def __init__(self, doap, brief=False, color=False): + '''Initialize attributes''' + self.brief = brief + self.doap_list = doap + self.doap = None + self.text = StringIO() + self.color = color + + def write(self, text): + ''' + Write to DOAP output file object + ''' + self.text.write(text.encode('utf-8') + '\n') + + def print_doap(self): + ''' + Serialize DOAP in human readable text, optionally colorized + + @rtype: unicode + @return: DOAP as plain text + ''' + for doap in self.doap_list: + self.doap = doap + self.print_misc() + if self.brief: + return + self.print_people() + self.print_repos() + self.print_releases() + doap = self.text.getvalue() + self.text.close() + return doap + + def print_misc(self): + '''Prints basic DOAP metadata''' + #We should be able to get this from model.py automatically, + #but this lets us print in the order we like. + #Maybe move this to that model.py so we don't forget to sync + #when the DOAP schema changes. + fields = ('name', 'shortname', 'homepage', 'shortdesc', + 'description', 'old_homepage', 'created', + 'download_mirror') + + fields_verbose = ('license', 'programming_language', + 'bug_database', 'screenshots', 'oper_sys', + 'wiki', 'download_page', 'mailing_list') + + for fld in fields: + self.print_field(fld) + if not self.brief: + for fld in fields_verbose: + self.print_field(fld) + + def print_repos(self): + '''Prints DOAP repository metadata''' + if hasattr(self.doap.cvs_repository, 'module') and \ + self.doap.cvs_repository.module is not None: + self.write(misc_field('CVS Module:', + self.doap.cvs_repository.module)) + self.write(misc_field('CVS Anon:', + self.doap.cvs_repository.anon_root)) + self.write(misc_field('CVS Browse:', + self.doap.cvs_repository.cvs_browse.resUri)) + + if hasattr(self.doap.svn_repository, 'location') and \ + self.doap.svn_repository.location is not None: + self.write(misc_field('SVN Location:', + self.doap.svn_repository.location.resUri)) + + if hasattr(self.doap.svn_repository, 'svn_browse') and \ + self.doap.svn_repository.svn_browse is not None: + self.write(misc_field('SVN Browse:', + self.doap.svn_repository.svn_browse.resUri)) + + def print_releases(self): + '''Print DOAP package release metadata''' + if hasattr(self.doap, 'releases') and len(self.doap.releases) != 0: + self.write(COLOR['bold'] + "Releases:" + COLOR['normal']) + for release in self.doap.releases: + if release.name: + self.write(COLOR['bold'] + COLOR['cyan'] + release.name + \ + COLOR['normal']) + if hasattr(release, 'created') and release.created is not None: + created = release.created + else: + created = '' + self.write(COLOR['cyan'] + ' ' + release.revision + ' ' + \ + COLOR['normal'] + created) + if hasattr(release, 'changelog'): + if release.changelog: + self.write(COLOR['yellow'] + \ + release.changelog + + COLOR['normal'] + ) + for frel in release.file_releases: + self.write(' %s' % frel.resUri) + + def print_people(self): + '''Print all people involved in the project''' + people = ['maintainer', 'developer', 'documenter', 'helper', + 'tester', 'translator'] + for job in people: + if hasattr(self.doap, job): + attribs = getattr(self.doap, job) + if len(attribs) > 0: + peeps = [] + for attr in attribs: + if attr[FOAF.mbox] is None: + person = "%s" % attr[FOAF.name] + else: + mbox = attr[FOAF.mbox].resUri + if mbox.startswith('mailto:'): + mbox = mbox[7:] + person = "%s <%s>" % (attr[FOAF.name], mbox) + else: + LOG.debug("mbox is invalid: %s" % mbox) + person = "%s" % attr[FOAF.name] + peeps.append(person) + label = job.capitalize() + "s:" + #label = label.ljust(13) + self.write(misc_field(label, + ", ".join([p for p in peeps]))) + + def print_field(self, name): + ''' + Print a DOAP element + + @param name: A misc DOAP element + @type name: string, list or RDFSubject + + @rtype: None + @return: Nothing + ''' + if not hasattr(self.doap, name): + return + attr = getattr(self.doap, name) + if attr is [] or attr is None: + return + + label = '%s' % COLOR['bold'] + pretty_name(name) + \ + COLOR['normal'] + ':' + label = label.ljust(21) + if isinstance(attr, list): + #Can have multiple values per attribute + text = "" + for thing in getattr(self.doap, name): + if isinstance(thing, rdfSubject): + text += thing.resUri + "\n" + else: + #unicode object + thing = thing.strip() + text += thing + "\n" + else: + text = getattr(self.doap, name) + if isinstance(text, rdfSubject): + text = text.resUri + else: + text = text.strip() + if text: + if text.startswith('http://'): + self.write('%s %s' % (label, text.strip())) + else: + self.write(textwrap.fill('%s %s' % (label, text), + initial_indent='', + subsequent_indent = ' ')) + + +def pretty_name(field): + """ + Convert DOAP element name to pretty printable label + Shorten some labels for formatting purposes + + @param field: Text to be formatted + @type field: C{string} + + @return: formatted string + @rtype: string + """ + if field == 'programming_language': + field = 'Prog. Lang.' + elif field == 'created': + field = 'DOAP Created' + else: + field = field.capitalize() + field = field.replace('_', ' ') + field = field.replace('-', ' ') + return field + + +def misc_field(label, text): + ''' + Print colorized and justified single label value pair + + @param label: A label + @type label: string + + @param text: Text to print + @type text: string + + @rtype: string + @return: Colorized, left-justified text with label + ''' + label = label.ljust(13) + label = COLOR['bold'] + label + COLOR['normal'] + return '%s %s' % (label, text) + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/url.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/url.py new file mode 100755 index 00000000..43b55b91 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/url.py @@ -0,0 +1,58 @@ +#!/usr/bin/env python + +# pylint: disable-msg=W0221,R0201 + +""" + +url.py +====== + +This plugin loads DOAP by its URL or path to a filename. + + +""" + +__docformat__ = 'epytext' + + +from doapfiend.plugins.base import Plugin +from doapfiend.utils import NotFoundError +from doapfiend.doaplib import fetch_doap + + +class UrlPlugin(Plugin): + + """Class for formatting DOAP output""" + + #This will be the opt_parser option (--url) in the 'search' group + name = 'url' + enabled = False + enable_opt = name + + def __init__(self): + '''Setup RDF/XML OutputPlugin class''' + super(UrlPlugin, self).__init__() + self.options = None + + def add_options(self, parser, output, search): + """Add plugin's options to doapfiend's opt parser""" + search.add_option('-u', '--%s' % self.name, + action='store', + dest=self.enable_opt, + help='Get DOAP by its URL or by filename.', + metavar='URL') + return parser, output, search + + def search(self): + ''' + Get DOAP by its URL or file path + This can be any RDF as long as it has the DOAP namespace. + + @rtype: unicode + @return: DOAP + ''' + try: + return fetch_doap(self.options.url, self.options.proxy) + except NotFoundError: + print "Not found: %s" % self.options.url + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/xml.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/xml.py new file mode 100755 index 00000000..4d084fb5 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/plugins/xml.py @@ -0,0 +1,80 @@ +#!/usr/bin/env python + +# pylint: disable-msg=W0221,R0201 + +""" + +Serialize DOAP as XML/RDF +========================= + +This plugin outputs DOAP in RDF/XML +It basically does nothing because all DOAP today is in RDF/XML. +In the future this may take N3, Turtle, RDFa etc. and convert it to RDF/XML. + +""" + +__docformat__ = 'epytext' + +from elementtree import ElementTree + +from doapfiend.plugins.base import Plugin + + +class OutputPlugin(Plugin): + + """Class for formatting DOAP output""" + + #This will be the opt_parser option (--xml) in the output group + name = "xml" + enabled = False + enable_opt = None + + def __init__(self): + '''Setup RDF/XML OutputPlugin class''' + super(OutputPlugin, self).__init__() + self.options = None + + def add_options(self, parser, output, search): + """Add plugin's options to doapfiend's opt parser""" + output.add_option('-x', '--%s' % self.name, + action='store_true', + dest=self.enable_opt, + help='Output DOAP as RDF/XML') + return parser, output, search + + def serialize(self, doap_xml, color=False): + ''' + Serialize RDF/XML DOAP as N3 syntax + + Since the only input we currently have is XML, all this really does + is parse the XML and raise an exception if it's invalid. + When we do content negotiation/accept N3 etc., this will serialize. + + @param doap_xml: DOAP in RDF/XML serialization + @type doap_xml: string + + @rtype: unicode + @returns: DOAP + ''' + #This will raise ExpatError if we have invalid XML + #(from xml.parsers.expat import ExpatError) + #We could format/pretty print here but we just return exactly what + #was fetched. + ElementTree.fromstring(doap_xml) + if hasattr(self.options, 'no_color'): + color = not self.options.no_color + if color: + #pygments plugin fools pylint + # pylint: disable-msg=E0611 + try: + from pygments import highlight + from pygments.lexers import XmlLexer + from pygments.formatters import TerminalFormatter + except ImportError: + return doap_xml + return highlight(doap_xml, + XmlLexer(), + TerminalFormatter(full=False)) + else: + return doap_xml + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/utils.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/utils.py new file mode 100755 index 00000000..b727ae1a --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/utils.py @@ -0,0 +1,216 @@ + +""" + +utils.py +======== + +Misc utilities for doapfiend +---------------------------- + +General purpose helper functions and classes for doapfiend +You'll probably want to use doaplib for most cases. + +License: BSD-2 + +""" + +#pylint: disable-msg=C0103 + +import urllib +import logging +import urlparse +from httplib import HTTPConnection +from urllib2 import build_opener, HTTPError, ProxyHandler, URLError + + +__docformat__ = 'epytext' + +LOG = logging.getLogger('doapfiend') + +COLOR = {'normal': "\033[0m", + 'bold': "\033[1m", + 'underline': "\033[4m", + 'blink': "\033[5m", + 'reverse': "\033[7m", + 'black': "\033[30m", + 'red': "\033[31m", + 'green': "\033[32m", + 'yellow': "\033[33m", + 'blue': "\033[34m", + 'magenta': "\033[35m", + 'cyan': "\033[36m", + 'white': "\033[37m"} + + +class NotFoundError(Exception): + + '''DOAP not found''' + + #pylint: disable-msg=W0231 + def __init__(self, err_msg): + '''Initialize attributes''' + self.err_msg = err_msg + + def __str__(self): + return repr(self.err_msg) + + +def http_filesize(url): + """ + Get the size of file without downloading it. + bla bla bla + blaba + + @param url: URL of file + @type url: string + + @rtype: string + @return: Size of file + + Usage: + + >>> http_filesize('http://trac.doapspace.org/test_file.txt') + '160' + """ + + host, path = urlparse.urlsplit(url)[1:3] + if ':' in host: + # port specified, try to use it + host, port = host.split(':', 1) + try: + port = int(port) + except ValueError: + LOG.error('invalid port number %r' % port) + return False + else: + # no port specified, use default port + port = None + connection = HTTPConnection(host, port=port) + connection.request("HEAD", path) + resp = connection.getresponse() + return resp.getheader('content-length') + + +def http_exists(url): + """ + A quick way to check if a file exists on the web. + + @param url: URL of the document + @type url: string + @rtype: boolean + @return: True or False + + Usage: + + >>> http_exists('http://www.python.org/') + True + >>> http_exists('http://www.python.org/PenguinOnTheTelly') + False + """ + + host, path = urlparse.urlsplit(url)[1:3] + if ':' in host: + #port specified, try to use it + host, port = host.split(':', 1) + try: + port = int(port) + except ValueError: + LOG.error('invalid port number %r' % port) + return False + else: + #no port specified, use default port + port = None + connection = HTTPConnection(host, port=port) + connection.request("HEAD", path) + resp = connection.getresponse() + if resp.status == 200: # normal 'found' status + found = True + elif resp.status == 302: # recurse on temporary redirect + found = http_exists(urlparse.urljoin(url, + resp.getheader('location', ''))) + else: # everything else -> not found + LOG.info("Status %d %s : %s" % (resp.status, resp.reason, url)) + found = False + return found + + +def is_content_type(url_or_file, content_type): + """ + Tells whether the URL or pseudofile from urllib.urlopen is of + the required content type. + + @param url_or_file: URL or file path + @type url_or_file: string + @param content_type: Content type we're looking for + @type content_type: string + + @rtype: boolean + @returns: True if it can return the Content type we want + + Usage: + + >>> is_content_type('http://doapspace.org/doap/sf/nlyrics.rdf', \ + 'application/rdf+xml') + True + >>> is_content_type('http://doapspace.org/', 'application/rdf+xml') + False + """ + try: + if isinstance(url_or_file, str): + thefile = urllib.urlopen(url_or_file) + else: + thefile = url_or_file + result = thefile.info().gettype() == content_type.lower() + if thefile is not url_or_file: + thefile.close() + except IOError: + result = False + return result + + +def fetch_file(url, proxy=None): + ''' + Download file by URL + + @param url: URL of a file + @type url: string + + @param proxy: URL of HTTP Proxy + @type proxy: string + + @return: File + @rtype: string + + ''' + if not url.startswith('http://') and not url.startswith('ftp://'): + try: + return open(url, 'r').read() + except IOError, errmsg: + LOG.error(errmsg) + return '' + LOG.debug('Fetching ' + url) + if proxy: + opener = build_opener(ProxyHandler({'http': proxy})) + else: + opener = build_opener() + opener.addheaders = [('Accept', 'application/rdf+xml'), + ('User-agent', + 'Mozilla/5.0 (compatible; doapfiend ' + + 'http://trac.doapspace.org/doapfiend)')] + try: + result = opener.open(url) + except HTTPError, err_msg: + if err_msg.code == 404: + raise NotFoundError('Not found: %s' % url) + else: + LOG.error(err_msg) + except URLError, err_msg: + LOG.error(err_msg) + return + return result.read() + + +if __name__ == '__main__': + import doctest + doctest.testmod() + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/api-objects.txt b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/api-objects.txt new file mode 100755 index 00000000..9836a706 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/api-objects.txt @@ -0,0 +1,267 @@ +doapfiend doapfiend-module.html +doapfiend.log doapfiend-module.html#log +doapfiend.cli doapfiend.cli-module.html +doapfiend.cli.load_plugins doapfiend.plugins-module.html#load_plugins +doapfiend.cli.__revision__ doapfiend.cli-module.html#__revision__ +doapfiend.cli.follow_homepages doapfiend.doaplib-module.html#follow_homepages +doapfiend.cli.print_doap doapfiend.doaplib-module.html#print_doap +doapfiend.cli.doapfiend_version doapfiend.cli-module.html#doapfiend_version +doapfiend.cli.show_links doapfiend.doaplib-module.html#show_links +doapfiend.cli.main doapfiend.cli-module.html#main +doapfiend.doaplib doapfiend.doaplib-module.html +doapfiend.doaplib.load_plugins doapfiend.plugins-module.html#load_plugins +doapfiend.doaplib.LOG doapfiend.doaplib-module.html#LOG +doapfiend.doaplib.get_serializer doapfiend.doaplib-module.html#get_serializer +doapfiend.doaplib.load_graph doapfiend.doaplib-module.html#load_graph +doapfiend.doaplib.fetch_doap doapfiend.doaplib-module.html#fetch_doap +doapfiend.doaplib.rdf_has_doap doapfiend.doaplib-module.html#rdf_has_doap +doapfiend.doaplib.get_homepages doapfiend.doaplib-module.html#get_homepages +doapfiend.doaplib.XMLRPC_SERVER doapfiend.doaplib-module.html#XMLRPC_SERVER +doapfiend.doaplib.print_doap_by_homepages doapfiend.doaplib-module.html#print_doap_by_homepages +doapfiend.doaplib.get_by_pkg_index doapfiend.doaplib-module.html#get_by_pkg_index +doapfiend.doaplib.query_by_homepage doapfiend.doaplib-module.html#query_by_homepage +doapfiend.doaplib.show_links doapfiend.doaplib-module.html#show_links +doapfiend.doaplib.DOAP_NS doapfiend.doaplib-module.html#DOAP_NS +doapfiend.doaplib.print_doap doapfiend.doaplib-module.html#print_doap +doapfiend.doaplib.fetch_file doapfiend.utils-module.html#fetch_file +doapfiend.doaplib.get_plugin doapfiend.doaplib-module.html#get_plugin +doapfiend.doaplib.follow_homepages doapfiend.doaplib-module.html#follow_homepages +doapfiend.lexers doapfiend.lexers-module.html +doapfiend.model doapfiend.model-module.html +doapfiend.model.FOAF doapfiend.model-module.html#FOAF +doapfiend.model.DOAP doapfiend.model-module.html#DOAP +doapfiend.model.DC doapfiend.model-module.html#DC +doapfiend.plugins doapfiend.plugins-module.html +doapfiend.plugins.load_plugins doapfiend.plugins-module.html#load_plugins +doapfiend.plugins.LOG doapfiend.plugins-module.html#LOG +doapfiend.plugins.call_plugins doapfiend.plugins-module.html#call_plugins +doapfiend.plugins.builtin_plugins doapfiend.plugins-module.html#builtin_plugins +doapfiend.plugins.base doapfiend.plugins.base-module.html +doapfiend.plugins.fields doapfiend.plugins.fields-module.html +doapfiend.plugins.fields.FOAF doapfiend.plugins.fields-module.html#FOAF +doapfiend.plugins.fields.LOG doapfiend.plugins.fields-module.html#LOG +doapfiend.plugins.fields.get_people doapfiend.plugins.fields-module.html#get_people +doapfiend.plugins.fields.print_field doapfiend.plugins.fields-module.html#print_field +doapfiend.plugins.fields.load_graph doapfiend.doaplib-module.html#load_graph +doapfiend.plugins.fields.get_releases doapfiend.plugins.fields-module.html#get_releases +doapfiend.plugins.fields.print_list doapfiend.plugins.fields-module.html#print_list +doapfiend.plugins.fields.print_repos doapfiend.plugins.fields-module.html#print_repos +doapfiend.plugins.freshmeat doapfiend.plugins.freshmeat-module.html +doapfiend.plugins.freshmeat.get_by_pkg_index doapfiend.plugins.pkg_index-module.html#get_by_pkg_index +doapfiend.plugins.homepage doapfiend.plugins.homepage-module.html +doapfiend.plugins.homepage.LOG doapfiend.plugins.homepage-module.html#LOG +doapfiend.plugins.homepage.fetch_doap doapfiend.doaplib-module.html#fetch_doap +doapfiend.plugins.homepage.do_search doapfiend.plugins.homepage-module.html#do_search +doapfiend.plugins.homepage.query_by_homepage doapfiend.doaplib-module.html#query_by_homepage +doapfiend.plugins.n3 doapfiend.plugins.n3-module.html +doapfiend.plugins.n3.LOG doapfiend.plugins.n3-module.html#LOG +doapfiend.plugins.n3.get_n3 doapfiend.plugins.n3-module.html#get_n3 +doapfiend.plugins.ohloh doapfiend.plugins.ohloh-module.html +doapfiend.plugins.ohloh.get_by_pkg_index doapfiend.plugins.pkg_index-module.html#get_by_pkg_index +doapfiend.plugins.pkg_index doapfiend.plugins.pkg_index-module.html +doapfiend.plugins.pkg_index.OHLOH_URI doapfiend.plugins.pkg_index-module.html#OHLOH_URI +doapfiend.plugins.pkg_index.get_by_pkg_index doapfiend.plugins.pkg_index-module.html#get_by_pkg_index +doapfiend.plugins.pkg_index.fetch_file doapfiend.utils-module.html#fetch_file +doapfiend.plugins.pkg_index.PKG_INDEX_URI doapfiend.plugins.pkg_index-module.html#PKG_INDEX_URI +doapfiend.plugins.pypi doapfiend.plugins.pypi-module.html +doapfiend.plugins.pypi.get_by_pkg_index doapfiend.plugins.pkg_index-module.html#get_by_pkg_index +doapfiend.plugins.sourceforge doapfiend.plugins.sourceforge-module.html +doapfiend.plugins.sourceforge.get_by_pkg_index doapfiend.plugins.pkg_index-module.html#get_by_pkg_index +doapfiend.plugins.text doapfiend.plugins.text-module.html +doapfiend.plugins.text.FOAF doapfiend.plugins.text-module.html#FOAF +doapfiend.plugins.text.LOG doapfiend.plugins.text-module.html#LOG +doapfiend.plugins.text.pretty_name doapfiend.plugins.text-module.html#pretty_name +doapfiend.plugins.text.load_graph doapfiend.doaplib-module.html#load_graph +doapfiend.plugins.text.misc_field doapfiend.plugins.text-module.html#misc_field +doapfiend.plugins.url doapfiend.plugins.url-module.html +doapfiend.plugins.url.fetch_doap doapfiend.doaplib-module.html#fetch_doap +doapfiend.plugins.xml doapfiend.plugins.xml-module.html +doapfiend.utils doapfiend.utils-module.html +doapfiend.utils.LOG doapfiend.utils-module.html#LOG +doapfiend.utils.COLOR doapfiend.utils-module.html#COLOR +doapfiend.utils.http_exists doapfiend.utils-module.html#http_exists +doapfiend.utils.is_content_type doapfiend.utils-module.html#is_content_type +doapfiend.utils.fetch_file doapfiend.utils-module.html#fetch_file +doapfiend.utils.http_filesize doapfiend.utils-module.html#http_filesize +doapfiend.cli.DoapFiend doapfiend.cli.DoapFiend-class.html +doapfiend.cli.DoapFiend.get_plugin doapfiend.cli.DoapFiend-class.html#get_plugin +doapfiend.cli.DoapFiend.get_search_plugin doapfiend.cli.DoapFiend-class.html#get_search_plugin +doapfiend.cli.DoapFiend.run doapfiend.cli.DoapFiend-class.html#run +doapfiend.cli.DoapFiend.set_serializer doapfiend.cli.DoapFiend-class.html#set_serializer +doapfiend.cli.DoapFiend.print_doap doapfiend.cli.DoapFiend-class.html#print_doap +doapfiend.cli.DoapFiend.setup_opt_parser doapfiend.cli.DoapFiend-class.html#setup_opt_parser +doapfiend.cli.DoapFiend.set_log_level doapfiend.cli.DoapFiend-class.html#set_log_level +doapfiend.cli.DoapFiend.__init__ doapfiend.cli.DoapFiend-class.html#__init__ +doapfiend.lexers.Notation3Lexer doapfiend.lexers.Notation3Lexer-class.html +pygments.lexer.RegexLexer.__metaclass__ pygments.lexer.RegexLexerMeta-class.html +doapfiend.lexers.Notation3Lexer.aliases doapfiend.lexers.Notation3Lexer-class.html#aliases +doapfiend.lexers.Notation3Lexer.mimetypes doapfiend.lexers.Notation3Lexer-class.html#mimetypes +doapfiend.lexers.Notation3Lexer.filenames doapfiend.lexers.Notation3Lexer-class.html#filenames +doapfiend.lexers.Notation3Lexer.tokens doapfiend.lexers.Notation3Lexer-class.html#tokens +doapfiend.lexers.Notation3Lexer.name doapfiend.lexers.Notation3Lexer-class.html#name +doapfiend.lexers.SparqlLexer doapfiend.lexers.SparqlLexer-class.html +pygments.lexer.RegexLexer.__metaclass__ pygments.lexer.RegexLexerMeta-class.html +doapfiend.lexers.SparqlLexer.aliases doapfiend.lexers.SparqlLexer-class.html#aliases +doapfiend.lexers.SparqlLexer.mimetypes doapfiend.lexers.SparqlLexer-class.html#mimetypes +doapfiend.lexers.SparqlLexer.filenames doapfiend.lexers.SparqlLexer-class.html#filenames +doapfiend.lexers.SparqlLexer.tokens doapfiend.lexers.SparqlLexer-class.html#tokens +doapfiend.lexers.SparqlLexer.name doapfiend.lexers.SparqlLexer-class.html#name +doapfiend.lexers.SparqlLexer.flags doapfiend.lexers.SparqlLexer-class.html#flags +doapfiend.model.CVSRepository doapfiend.model.CVSRepository-class.html +doapfiend.model.CVSRepository.anon_root doapfiend.model.CVSRepository-class.html#anon_root +doapfiend.model.CVSRepository.rdf_type doapfiend.model.CVSRepository-class.html#rdf_type +doapfiend.model.CVSRepository.module doapfiend.model.CVSRepository-class.html#module +doapfiend.model.CVSRepository.cvs_browse doapfiend.model.CVSRepository-class.html#cvs_browse +doapfiend.model.Project doapfiend.model.Project-class.html +doapfiend.model.Project.wiki doapfiend.model.Project-class.html#wiki +doapfiend.model.Project.maintainer doapfiend.model.Project-class.html#maintainer +doapfiend.model.Project.helper doapfiend.model.Project-class.html#helper +doapfiend.model.Project.download_page doapfiend.model.Project-class.html#download_page +doapfiend.model.Project.module doapfiend.model.Project-class.html#module +doapfiend.model.Project.screenshots doapfiend.model.Project-class.html#screenshots +doapfiend.model.Project.developer doapfiend.model.Project-class.html#developer +doapfiend.model.Project.category doapfiend.model.Project-class.html#category +doapfiend.model.Project.svn_repository doapfiend.model.Project-class.html#svn_repository +doapfiend.model.Project.bug_database doapfiend.model.Project-class.html#bug_database +doapfiend.model.Project.shortname doapfiend.model.Project-class.html#shortname +doapfiend.model.Project.rdf_type doapfiend.model.Project-class.html#rdf_type +doapfiend.model.Project.homepage doapfiend.model.Project-class.html#homepage +doapfiend.model.Project.cvs_repository doapfiend.model.Project-class.html#cvs_repository +doapfiend.model.Project.programming_language doapfiend.model.Project-class.html#programming_language +doapfiend.model.Project.description doapfiend.model.Project-class.html#description +doapfiend.model.Project.releases doapfiend.model.Project-class.html#releases +doapfiend.model.Project.tester doapfiend.model.Project-class.html#tester +doapfiend.model.Project.documenter doapfiend.model.Project-class.html#documenter +doapfiend.model.Project.oper_sys doapfiend.model.Project-class.html#oper_sys +doapfiend.model.Project.translator doapfiend.model.Project-class.html#translator +doapfiend.model.Project.download_mirror doapfiend.model.Project-class.html#download_mirror +doapfiend.model.Project.name doapfiend.model.Project-class.html#name +doapfiend.model.Project.license doapfiend.model.Project-class.html#license +doapfiend.model.Project.created doapfiend.model.Project-class.html#created +doapfiend.model.Project.old_homepage doapfiend.model.Project-class.html#old_homepage +doapfiend.model.Project.shortdesc doapfiend.model.Project-class.html#shortdesc +doapfiend.model.Release doapfiend.model.Release-class.html +doapfiend.model.Release.rdf_type doapfiend.model.Release-class.html#rdf_type +doapfiend.model.Release.file_releases doapfiend.model.Release-class.html#file_releases +doapfiend.model.Release.revision doapfiend.model.Release-class.html#revision +doapfiend.model.Release.name doapfiend.model.Release-class.html#name +doapfiend.model.Release.changelog doapfiend.model.Release-class.html#changelog +doapfiend.model.Release.created doapfiend.model.Release-class.html#created +doapfiend.model.SVNRepository doapfiend.model.SVNRepository-class.html +doapfiend.model.SVNRepository.svn_browse doapfiend.model.SVNRepository-class.html#svn_browse +doapfiend.model.SVNRepository.rdf_type doapfiend.model.SVNRepository-class.html#rdf_type +doapfiend.model.SVNRepository.location doapfiend.model.SVNRepository-class.html#location +doapfiend.plugins.base.Plugin doapfiend.plugins.base.Plugin-class.html +doapfiend.plugins.base.Plugin.add_options doapfiend.plugins.base.Plugin-class.html#add_options +doapfiend.plugins.base.Plugin.help doapfiend.plugins.base.Plugin-class.html#help +doapfiend.plugins.base.Plugin.enabled doapfiend.plugins.base.Plugin-class.html#enabled +doapfiend.plugins.base.Plugin.configure doapfiend.plugins.base.Plugin-class.html#configure +doapfiend.plugins.base.Plugin.enable_opt doapfiend.plugins.base.Plugin-class.html#enable_opt +doapfiend.plugins.base.Plugin.__init__ doapfiend.plugins.base.Plugin-class.html#__init__ +doapfiend.plugins.base.Plugin.name doapfiend.plugins.base.Plugin-class.html#name +doapfiend.plugins.fields.OutputPlugin doapfiend.plugins.fields.OutputPlugin-class.html +doapfiend.plugins.fields.OutputPlugin.name doapfiend.plugins.fields.OutputPlugin-class.html#name +doapfiend.plugins.base.Plugin.help doapfiend.plugins.base.Plugin-class.html#help +doapfiend.plugins.fields.OutputPlugin.serialize doapfiend.plugins.fields.OutputPlugin-class.html#serialize +doapfiend.plugins.fields.OutputPlugin.enabled doapfiend.plugins.fields.OutputPlugin-class.html#enabled +doapfiend.plugins.base.Plugin.configure doapfiend.plugins.base.Plugin-class.html#configure +doapfiend.plugins.fields.OutputPlugin.enable_opt doapfiend.plugins.fields.OutputPlugin-class.html#enable_opt +doapfiend.plugins.fields.OutputPlugin.__init__ doapfiend.plugins.fields.OutputPlugin-class.html#__init__ +doapfiend.plugins.fields.OutputPlugin.add_options doapfiend.plugins.fields.OutputPlugin-class.html#add_options +doapfiend.plugins.freshmeat.FreshmeatPlugin doapfiend.plugins.freshmeat.FreshmeatPlugin-class.html +doapfiend.plugins.freshmeat.FreshmeatPlugin.search doapfiend.plugins.freshmeat.FreshmeatPlugin-class.html#search +doapfiend.plugins.freshmeat.FreshmeatPlugin.name doapfiend.plugins.freshmeat.FreshmeatPlugin-class.html#name +doapfiend.plugins.base.Plugin.help doapfiend.plugins.base.Plugin-class.html#help +doapfiend.plugins.freshmeat.FreshmeatPlugin.enabled doapfiend.plugins.freshmeat.FreshmeatPlugin-class.html#enabled +doapfiend.plugins.base.Plugin.configure doapfiend.plugins.base.Plugin-class.html#configure +doapfiend.plugins.freshmeat.FreshmeatPlugin.enable_opt doapfiend.plugins.freshmeat.FreshmeatPlugin-class.html#enable_opt +doapfiend.plugins.freshmeat.FreshmeatPlugin.__init__ doapfiend.plugins.freshmeat.FreshmeatPlugin-class.html#__init__ +doapfiend.plugins.freshmeat.FreshmeatPlugin.add_options doapfiend.plugins.freshmeat.FreshmeatPlugin-class.html#add_options +doapfiend.plugins.homepage.OutputPlugin doapfiend.plugins.homepage.OutputPlugin-class.html +doapfiend.plugins.homepage.OutputPlugin.search doapfiend.plugins.homepage.OutputPlugin-class.html#search +doapfiend.plugins.homepage.OutputPlugin.name doapfiend.plugins.homepage.OutputPlugin-class.html#name +doapfiend.plugins.base.Plugin.help doapfiend.plugins.base.Plugin-class.html#help +doapfiend.plugins.homepage.OutputPlugin.enabled doapfiend.plugins.homepage.OutputPlugin-class.html#enabled +doapfiend.plugins.base.Plugin.configure doapfiend.plugins.base.Plugin-class.html#configure +doapfiend.plugins.homepage.OutputPlugin.enable_opt doapfiend.plugins.homepage.OutputPlugin-class.html#enable_opt +doapfiend.plugins.homepage.OutputPlugin.__init__ doapfiend.plugins.homepage.OutputPlugin-class.html#__init__ +doapfiend.plugins.homepage.OutputPlugin.add_options doapfiend.plugins.homepage.OutputPlugin-class.html#add_options +doapfiend.plugins.n3.OutputPlugin doapfiend.plugins.n3.OutputPlugin-class.html +doapfiend.plugins.n3.OutputPlugin.name doapfiend.plugins.n3.OutputPlugin-class.html#name +doapfiend.plugins.base.Plugin.help doapfiend.plugins.base.Plugin-class.html#help +doapfiend.plugins.n3.OutputPlugin.serialize doapfiend.plugins.n3.OutputPlugin-class.html#serialize +doapfiend.plugins.n3.OutputPlugin.enabled doapfiend.plugins.n3.OutputPlugin-class.html#enabled +doapfiend.plugins.base.Plugin.configure doapfiend.plugins.base.Plugin-class.html#configure +doapfiend.plugins.n3.OutputPlugin.enable_opt doapfiend.plugins.n3.OutputPlugin-class.html#enable_opt +doapfiend.plugins.n3.OutputPlugin.__init__ doapfiend.plugins.n3.OutputPlugin-class.html#__init__ +doapfiend.plugins.n3.OutputPlugin.add_options doapfiend.plugins.n3.OutputPlugin-class.html#add_options +doapfiend.plugins.ohloh.OhlohPlugin doapfiend.plugins.ohloh.OhlohPlugin-class.html +doapfiend.plugins.ohloh.OhlohPlugin.search doapfiend.plugins.ohloh.OhlohPlugin-class.html#search +doapfiend.plugins.ohloh.OhlohPlugin.name doapfiend.plugins.ohloh.OhlohPlugin-class.html#name +doapfiend.plugins.base.Plugin.help doapfiend.plugins.base.Plugin-class.html#help +doapfiend.plugins.ohloh.OhlohPlugin.enabled doapfiend.plugins.ohloh.OhlohPlugin-class.html#enabled +doapfiend.plugins.base.Plugin.configure doapfiend.plugins.base.Plugin-class.html#configure +doapfiend.plugins.ohloh.OhlohPlugin.enable_opt doapfiend.plugins.ohloh.OhlohPlugin-class.html#enable_opt +doapfiend.plugins.ohloh.OhlohPlugin.__init__ doapfiend.plugins.ohloh.OhlohPlugin-class.html#__init__ +doapfiend.plugins.ohloh.OhlohPlugin.add_options doapfiend.plugins.ohloh.OhlohPlugin-class.html#add_options +doapfiend.plugins.pypi.PyPIPlugin doapfiend.plugins.pypi.PyPIPlugin-class.html +doapfiend.plugins.pypi.PyPIPlugin.search doapfiend.plugins.pypi.PyPIPlugin-class.html#search +doapfiend.plugins.pypi.PyPIPlugin.name doapfiend.plugins.pypi.PyPIPlugin-class.html#name +doapfiend.plugins.base.Plugin.help doapfiend.plugins.base.Plugin-class.html#help +doapfiend.plugins.pypi.PyPIPlugin.enabled doapfiend.plugins.pypi.PyPIPlugin-class.html#enabled +doapfiend.plugins.base.Plugin.configure doapfiend.plugins.base.Plugin-class.html#configure +doapfiend.plugins.pypi.PyPIPlugin.enable_opt doapfiend.plugins.pypi.PyPIPlugin-class.html#enable_opt +doapfiend.plugins.pypi.PyPIPlugin.__init__ doapfiend.plugins.pypi.PyPIPlugin-class.html#__init__ +doapfiend.plugins.pypi.PyPIPlugin.add_options doapfiend.plugins.pypi.PyPIPlugin-class.html#add_options +doapfiend.plugins.sourceforge.SourceForgePlugin doapfiend.plugins.sourceforge.SourceForgePlugin-class.html +doapfiend.plugins.sourceforge.SourceForgePlugin.search doapfiend.plugins.sourceforge.SourceForgePlugin-class.html#search +doapfiend.plugins.sourceforge.SourceForgePlugin.name doapfiend.plugins.sourceforge.SourceForgePlugin-class.html#name +doapfiend.plugins.base.Plugin.help doapfiend.plugins.base.Plugin-class.html#help +doapfiend.plugins.sourceforge.SourceForgePlugin.enabled doapfiend.plugins.sourceforge.SourceForgePlugin-class.html#enabled +doapfiend.plugins.base.Plugin.configure doapfiend.plugins.base.Plugin-class.html#configure +doapfiend.plugins.sourceforge.SourceForgePlugin.enable_opt doapfiend.plugins.sourceforge.SourceForgePlugin-class.html#enable_opt +doapfiend.plugins.sourceforge.SourceForgePlugin.__init__ doapfiend.plugins.sourceforge.SourceForgePlugin-class.html#__init__ +doapfiend.plugins.sourceforge.SourceForgePlugin.add_options doapfiend.plugins.sourceforge.SourceForgePlugin-class.html#add_options +doapfiend.plugins.text.DoapPrinter doapfiend.plugins.text.DoapPrinter-class.html +doapfiend.plugins.text.DoapPrinter.print_people doapfiend.plugins.text.DoapPrinter-class.html#print_people +doapfiend.plugins.text.DoapPrinter.print_field doapfiend.plugins.text.DoapPrinter-class.html#print_field +doapfiend.plugins.text.DoapPrinter.print_releases doapfiend.plugins.text.DoapPrinter-class.html#print_releases +doapfiend.plugins.text.DoapPrinter.print_doap doapfiend.plugins.text.DoapPrinter-class.html#print_doap +doapfiend.plugins.text.DoapPrinter.write doapfiend.plugins.text.DoapPrinter-class.html#write +doapfiend.plugins.text.DoapPrinter.print_misc doapfiend.plugins.text.DoapPrinter-class.html#print_misc +doapfiend.plugins.text.DoapPrinter.print_repos doapfiend.plugins.text.DoapPrinter-class.html#print_repos +doapfiend.plugins.text.DoapPrinter.__init__ doapfiend.plugins.text.DoapPrinter-class.html#__init__ +doapfiend.plugins.text.OutputPlugin doapfiend.plugins.text.OutputPlugin-class.html +doapfiend.plugins.text.OutputPlugin.name doapfiend.plugins.text.OutputPlugin-class.html#name +doapfiend.plugins.base.Plugin.help doapfiend.plugins.base.Plugin-class.html#help +doapfiend.plugins.text.OutputPlugin.serialize doapfiend.plugins.text.OutputPlugin-class.html#serialize +doapfiend.plugins.text.OutputPlugin.enabled doapfiend.plugins.text.OutputPlugin-class.html#enabled +doapfiend.plugins.base.Plugin.configure doapfiend.plugins.base.Plugin-class.html#configure +doapfiend.plugins.text.OutputPlugin.enable_opt doapfiend.plugins.text.OutputPlugin-class.html#enable_opt +doapfiend.plugins.text.OutputPlugin.__init__ doapfiend.plugins.text.OutputPlugin-class.html#__init__ +doapfiend.plugins.text.OutputPlugin.add_options doapfiend.plugins.text.OutputPlugin-class.html#add_options +doapfiend.plugins.url.UrlPlugin doapfiend.plugins.url.UrlPlugin-class.html +doapfiend.plugins.url.UrlPlugin.search doapfiend.plugins.url.UrlPlugin-class.html#search +doapfiend.plugins.url.UrlPlugin.name doapfiend.plugins.url.UrlPlugin-class.html#name +doapfiend.plugins.base.Plugin.help doapfiend.plugins.base.Plugin-class.html#help +doapfiend.plugins.url.UrlPlugin.enabled doapfiend.plugins.url.UrlPlugin-class.html#enabled +doapfiend.plugins.base.Plugin.configure doapfiend.plugins.base.Plugin-class.html#configure +doapfiend.plugins.url.UrlPlugin.enable_opt doapfiend.plugins.url.UrlPlugin-class.html#enable_opt +doapfiend.plugins.url.UrlPlugin.__init__ doapfiend.plugins.url.UrlPlugin-class.html#__init__ +doapfiend.plugins.url.UrlPlugin.add_options doapfiend.plugins.url.UrlPlugin-class.html#add_options +doapfiend.plugins.xml.OutputPlugin doapfiend.plugins.xml.OutputPlugin-class.html +doapfiend.plugins.xml.OutputPlugin.name doapfiend.plugins.xml.OutputPlugin-class.html#name +doapfiend.plugins.base.Plugin.help doapfiend.plugins.base.Plugin-class.html#help +doapfiend.plugins.xml.OutputPlugin.serialize doapfiend.plugins.xml.OutputPlugin-class.html#serialize +doapfiend.plugins.xml.OutputPlugin.enabled doapfiend.plugins.xml.OutputPlugin-class.html#enabled +doapfiend.plugins.base.Plugin.configure doapfiend.plugins.base.Plugin-class.html#configure +doapfiend.plugins.xml.OutputPlugin.enable_opt doapfiend.plugins.xml.OutputPlugin-class.html#enable_opt +doapfiend.plugins.xml.OutputPlugin.__init__ doapfiend.plugins.xml.OutputPlugin-class.html#__init__ +doapfiend.plugins.xml.OutputPlugin.add_options doapfiend.plugins.xml.OutputPlugin-class.html#add_options +doapfiend.utils.NotFoundError doapfiend.utils.NotFoundError-class.html +doapfiend.utils.NotFoundError.__str__ doapfiend.utils.NotFoundError-class.html#__str__ +doapfiend.utils.NotFoundError.__init__ doapfiend.utils.NotFoundError-class.html#__init__ +pygments.lexer.RegexLexerMeta pygments.lexer.RegexLexerMeta-class.html +pygments.lexer.RegexLexerMeta._process_state pygments.lexer.RegexLexerMeta-class.html#_process_state +pygments.lexer.RegexLexerMeta.__call__ pygments.lexer.RegexLexerMeta-class.html#__call__ +pygments.lexer.RegexLexerMeta.process_tokendef pygments.lexer.RegexLexerMeta-class.html#process_tokendef diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/class-tree.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/class-tree.html new file mode 100755 index 00000000..e862c17f --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/class-tree.html @@ -0,0 +1,221 @@ + + + + + Class Hierarchy + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
  + + + + +
[hide private]
[frames] | no frames]
+
+
+ [ Module Hierarchy + | Class Hierarchy ] +

+

Class Hierarchy

+ + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/crarr.png b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/crarr.png new file mode 100755 index 0000000000000000000000000000000000000000..26b43c52433b71e72a9a478c52d446278335f0e4 GIT binary patch literal 340 zcmeAS@N?(olHy`uVBq!ia0vp^f?NMQuI$%1#8??M1uoZK z0}62#ctjR6FvuMOVaB`*rFK9;mUKs7M+SzC{oH>NS%G}l0G|-o|NsA=J-p%i`2!7U zCdJ_j4{u-SDsoA1U`TRixpVcz%O`iHHAYk?=&YaLkmD!Pp6~GW^M_S4D^grJKD>P~ zuPf!ku`N^TLavn`Edv_JSQ6wH%;50sMjDXg>*?YcQgJIe!GUqln>_|<+Os&OOUQS1 zY~Wzutud*iVS#|PHMc&?2WHoZpEo8l+6!Oc$x~=%U)469Gl^f?nq7UBw#1AXkrEde cmFKWBXcRFE*(?@T0vgQV>FVdQ&MBb@0LpZ4r2qf` literal 0 HcmV?d00001 diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend-module.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend-module.html new file mode 100755 index 00000000..6337867f --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend-module.html @@ -0,0 +1,217 @@ + + + + + doapfiend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Package doapfiend

source code

+

doapfiend

+

http://trac.doapspace.org/doapfiend

+

Description

+

doapfiend is a command-line client and library for querying, + creating and displaying DOAP (Description of a Project) RDF + profiles.

+

doapfiend uses RDFAlchemy and rdflib to parse and serialize + DOAP.

+

Plugins

+

Plugins can be written for editing DOAP, scraping websites and + creating DOAP, searching for DOAP in SPARQL endpoints, displaying + DOAP in various formats such as HTML etc.

+ +
+

Version: + 0.3.3 +

+

Author: + Rob Cakebread <rob@doapspace.org> +

+

Copyright: + (C) 2007-2008 Rob Cakebread +

+

License: + BSD-2 +

+
+ + + + + + +
+ + + + + +
Submodules[hide private]
+
+
+ +
+ + + + + + + + + +
+ + + + + +
Variables[hide private]
+
+   + + log = logging.getLogger() +
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend-pysrc.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend-pysrc.html new file mode 100755 index 00000000..08ebce0f --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend-pysrc.html @@ -0,0 +1,155 @@ + + + + + doapfiend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend + + + + + + +
[hide private]
[frames] | no frames]
+
+

Source Code for Package doapfiend

+
+ 1   
+ 2  #pylint: disable-msg=C0103 
+ 3  """ 
+ 4  doapfiend 
+ 5  ========= 
+ 6   
+ 7  U{http://trac.doapspace.org/doapfiend} 
+ 8   
+ 9  Description 
+10  ----------- 
+11  doapfiend is a command-line client and library for querying, creating and 
+12  displaying DOAP (Description of a Project) RDF profiles. 
+13   
+14  doapfiend uses RDFAlchemy and rdflib to parse and serialize DOAP. 
+15   
+16  Plugins 
+17  ------- 
+18  Plugins can be written for editing DOAP, scraping websites and creating DOAP, 
+19  searching for DOAP in SPARQL endpoints, displaying DOAP in various formats such 
+20  as HTML etc. 
+21   
+22   
+23  """ 
+24   
+25   
+26  #Hack to get around warning in RDFAlchemy, bug filed upstream 
+27  import logging 
+28  log = logging.getLogger() 
+29  log.setLevel(logging.ERROR) 
+30   
+31  __docformat__ = 'epytext' 
+32  __version__ = '0.3.3' 
+33  __author__ = 'Rob Cakebread <rob@doapspace.org>' 
+34  __copyright__ = '(C) 2007-2008 Rob Cakebread' 
+35  __license__ = 'BSD-2' 
+36   
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli-module.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli-module.html new file mode 100755 index 00000000..18e14ae1 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli-module.html @@ -0,0 +1,222 @@ + + + + + doapfiend.cli + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module cli + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Module cli

source code

+

cli.py

+

Command-line tool for querying, serializing and displaying DOAP

+

Author: Rob Cakebread <rob@doapspace.org>

+

License : BSD-2

+ + + + + + + + + + +
+ + + + + +
Classes[hide private]
+
+   + + DoapFiend
+ `DoapFiend` class +
+ + + + + + + + + + + + +
+ + + + + +
Functions[hide private]
+
+   + + + + + + +
doapfiend_version()
+ Print doapfiend version
+ source code + +
+ +
+   + + + + + + +
main()
+ Let's do it.
+ source code + +
+ +
+ + + + + + + + + +
+ + + + + +
Variables[hide private]
+
+   + + __revision__ = '' +
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli-pysrc.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli-pysrc.html new file mode 100755 index 00000000..54840695 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli-pysrc.html @@ -0,0 +1,413 @@ + + + + + doapfiend.cli + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module cli + + + + + + +
[hide private]
[frames] | no frames]
+
+

Source Code for Module doapfiend.cli

+
+  1   
+  2  # pylint: disable-msg=C0103 
+  3  ''' 
+  4   
+  5  cli.py 
+  6  ====== 
+  7   
+  8  Command-line tool for querying, serializing and displaying DOAP 
+  9   
+ 10  Author: Rob Cakebread <rob@doapspace.org> 
+ 11   
+ 12  License : BSD-2 
+ 13   
+ 14  ''' 
+ 15   
+ 16  __docformat__ = 'epytext' 
+ 17  __revision__ = '$Revision:  $'[11:-1].strip() 
+ 18   
+ 19   
+ 20  import sys 
+ 21  import logging 
+ 22  import optparse 
+ 23   
+ 24  from doapfiend.plugins import load_plugins 
+ 25  from doapfiend.utils import COLOR 
+ 26  from doapfiend.__init__ import __version__ as VERSION 
+ 27  from doapfiend.doaplib import print_doap, follow_homepages, show_links 
+ 28   
+ 29   
+
30 -class DoapFiend(object): +
31 + 32 '''`DoapFiend` class''' + 33 +
34 - def __init__(self): +
35 '''Initialize attributes, set logger''' + 36 self.doap = None + 37 self.options = None + 38 self.log = logging.getLogger('doapfiend') + 39 self.log.addHandler(logging.StreamHandler()) + 40 #Cache list of all plugins + 41 self.plugins = list(load_plugins(others=True)) + 42 self.serializer = None +
43 +
44 - def get_plugin(self, method): +
45 """ + 46 Return plugin object if CLI option is activated and method exists + 47 + 48 @param method: name of plugin's method we're calling + 49 @type method: string + 50 + 51 @returns: list of plugins with `method` + 52 + 53 """ + 54 all_plugins = [] + 55 for plugin_obj in self.plugins: + 56 plugin = plugin_obj() + 57 plugin.configure(self.options, None) + 58 if plugin.enabled: + 59 if not hasattr(plugin, method): + 60 plugin = None + 61 else: + 62 all_plugins.append(plugin) + 63 return all_plugins +
64 +
65 - def set_log_level(self): +
66 '''Set log level according to command-line options''' + 67 if self.options.verbose: + 68 self.log.setLevel(logging.INFO) + 69 elif self.options.quiet: + 70 self.log.setLevel(logging.ERROR) + 71 elif self.options.debug: + 72 self.log.setLevel(logging.DEBUG) + 73 else: + 74 self.log.setLevel(logging.WARN) +
75 +
76 - def print_doap(self, doap_xml): +
77 ''' + 78 Print doap as n3, rdf/xml, plain text or using serialization plugin + 79 + 80 @param doap_xml: DOAP in RDF/XML serialization + 81 @type doap_xml: text + 82 + 83 @rtype: None + 84 @return: Just displays DOAP + 85 + 86 ''' + 87 if self.options.write: + 88 filename = self.options.write + 89 else: + 90 filename = None + 91 print_doap(doap_xml, serializer=self.serializer, filename=filename, + 92 color=not self.options.no_color) +
93 +
94 - def get_search_plugin(self): +
95 ''' + 96 Return active search plugin callable + 97 + 98 @rtype: callable + 99 @returns: A callable object that fetches for DOAP +100 ''' +101 plugins = self.get_plugin('search') +102 if len(plugins) == 1: +103 return plugins[0].search +
104 +
105 - def run(self): +
106 ''' +107 Run doapfiend command +108 +109 Find the active plugin that has a 'search' method and run it, +110 then output the DOAP with print_doap, using the active plugin +111 with a 'serializer' method. +112 +113 +114 @rtype: int +115 @returns: 0 success or 1 failure +116 +117 ''' +118 opt_parser = self.setup_opt_parser() +119 (self.options, remaining_args) = opt_parser.parse_args() +120 self.set_serializer() +121 if not self.serializer and remaining_args: +122 opt_parser.print_help() +123 return 1 +124 self.set_log_level() +125 +126 if self.options.doapfiend_version: +127 return doapfiend_version() +128 +129 if self.options.no_color: +130 for this in COLOR: +131 COLOR[this] = '\x1b[0m' +132 search_func = self.get_search_plugin() +133 if search_func: +134 doap_xml = search_func() +135 if doap_xml: +136 if self.options.follow: +137 #Search for additional DOAP by looking up all doap:homepage +138 #found and then print all found. This may be used if the +139 #DOAP you've found isn't rich enough or with FOAF, where a +140 #person lists multiple projects they are affiliated with +141 #and you want to find DOAP based on the Projec homepages +142 #found in FOAF. +143 self.print_doap(doap_xml) +144 return follow_homepages(doap_xml) +145 elif self.options.show_links: +146 return show_links(doap_xml) +147 else: +148 return self.print_doap(doap_xml) +149 else: +150 opt_parser.print_help() +151 return 1 +
152 +
153 - def set_serializer(self): +
154 ''' +155 Find all plugins that are enabled on the command-line and have a +156 `serialize` method. If none are enabled, default to plain text +157 ''' +158 plugins = self.get_plugin('serialize') +159 if len(plugins) == 0: +160 self.serializer = None +161 else: +162 #Choose first serializer in case they try more than one +163 self.serializer = plugins[0].serialize +
164 +
165 - def setup_opt_parser(self): +
166 ''' +167 Setup the optparser +168 +169 @rtype: opt_parser.OptionParser +170 @return: Option parser +171 +172 ''' +173 usage = 'usage: %prog [options]' +174 opt_parser = optparse.OptionParser(usage=usage) +175 group_search = optparse.OptionGroup(opt_parser, +176 'Search options', +177 'Options for searching for DOAP') +178 +179 opt_parser.add_option('--version', action='store_true', +180 dest='doapfiend_version', default=False, +181 help='Show doapfiend version and exit.') +182 +183 opt_parser.add_option('-P', '--http-proxy', action='store', +184 dest='proxy', default=False, +185 help='Specify http proxy URL if you use one.') +186 +187 group_output = optparse.OptionGroup(opt_parser, +188 'Output options', +189 'Choose these options to change default output behavior') +190 +191 group_output.add_option('--debug', action='store_true', +192 dest= 'debug', default=False, +193 help='Show debugging information') +194 +195 group_output.add_option('-f', '--follow-links', action='store_true', +196 dest='follow', default=False, +197 help='Search for and show additional DOAP.', +198 metavar='FILENAME') +199 +200 group_output.add_option('-s', '--show-links', action='store_true', +201 dest='show_links', default=False, +202 help='Search for and show links to additional DOAP.', +203 metavar='FILENAME') +204 +205 group_output.add_option('-w', '--write', action='store', +206 dest='write', default=False, +207 help='Write DOAP to a file instead of displaying it.', +208 metavar='FILENAME') +209 +210 group_output.add_option('-C', '--no-color', action='store_true', +211 dest='no_color', default=False, +212 help="Don't use color in output") +213 +214 group_output.add_option('-q', '--quiet', action='store_true', +215 dest='quiet', default=False, help="Show less output") +216 +217 group_output.add_option('-v', '--verbose', action='store_true', +218 dest='verbose', default=False, help="Show more output") +219 +220 # add opts from plugins +221 for plugcls in self.plugins: +222 plug = plugcls() +223 plug.add_options(opt_parser, group_output, group_search) +224 opt_parser.add_option_group(group_search) +225 opt_parser.add_option_group(group_output) +226 return opt_parser +
227 +228 +
229 -def doapfiend_version(): +
230 '''Print doapfiend version''' +231 print VERSION +
232 +233 +
234 -def main(): +
235 '''Let's do it.''' +236 my_doapfiend = DoapFiend() +237 return my_doapfiend.run() +
238 +239 +240 if __name__ == '__main__': +241 sys.exit(main()) +242 +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli.DoapFiend-class.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli.DoapFiend-class.html new file mode 100755 index 00000000..f645c42c --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.cli.DoapFiend-class.html @@ -0,0 +1,502 @@ + + + + + doapfiend.cli.DoapFiend + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module cli :: + Class DoapFiend + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Class DoapFiend

source code

+
+object --+
+         |
+        DoapFiend
+
+ +
+

`DoapFiend` class

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
Instance Methods[hide private]
+
+   + + + + + + +
__init__(self)
+ Initialize attributes, set logger
+ source code + +
+ +
+   + + + + + + +
get_plugin(self, + method)
+ Return plugin object if CLI option is activated and method exists
+ source code + +
+ +
+   + + + + + + +
set_log_level(self)
+ Set log level according to command-line options
+ source code + +
+ +
+ None + + + + + + +
print_doap(self, + doap_xml)
+ Print doap as n3, rdf/xml, plain text or using serialization plugin
+ source code + +
+ +
+ callable + + + + + + +
get_search_plugin(self)
+ Return active search plugin callable
+ source code + +
+ +
+ int + + + + + + +
run(self)
+ Run doapfiend command
+ source code + +
+ +
+   + + + + + + +
set_serializer(self)
+ Find all plugins that are enabled on the command-line and have a + `serialize` method.
+ source code + +
+ +
+ opt_parser.OptionParser + + + + + + +
setup_opt_parser(self)
+ Setup the optparser
+ source code + +
+ +
+

Inherited from object: + __delattr__, + __getattribute__, + __hash__, + __new__, + __reduce__, + __reduce_ex__, + __repr__, + __setattr__, + __str__ +

+
+ + + + + + + + + +
+ + + + + +
Properties[hide private]
+
+

Inherited from object: + __class__ +

+
+ + + + + + +
+ + + + + +
Method Details[hide private]
+
+ +
+ +
+ + +
+

__init__(self) +
(Constructor) +

+
source code  +
+ +

Initialize attributes, set logger

+
+
Overrides: + object.__init__ +
+
+
+
+ +
+ +
+ + +
+

get_plugin(self, + method) +

+
source code  +
+ +

Return plugin object if CLI option is activated and method exists

+
+
Parameters:
+
    +
  • method (string) - name of plugin's method we're calling
  • +
+
Returns:
+
list of plugins with `method`
+
+
+
+ +
+ +
+ + +
+

print_doap(self, + doap_xml) +

+
source code  +
+ +

Print doap as n3, rdf/xml, plain text or using serialization + plugin

+
+
Parameters:
+
    +
  • doap_xml (text) - DOAP in RDF/XML serialization
  • +
+
Returns: None
+
Just displays DOAP
+
+
+
+ +
+ +
+ + +
+

get_search_plugin(self) +

+
source code  +
+ +

Return active search plugin callable

+
+
Returns: callable
+
A callable object that fetches for DOAP
+
+
+
+ +
+ +
+ + +
+

run(self) +

+
source code  +
+ +

Run doapfiend command

+

Find the active plugin that has a 'search' method and run it, then + output the DOAP with print_doap, using the active plugin with a + 'serializer' method.

+
+
Returns: int
+
0 success or 1 failure
+
+
+
+ +
+ +
+ + +
+

set_serializer(self) +

+
source code  +
+ +

Find all plugins that are enabled on the command-line and have a + `serialize` method. If none are enabled, default to plain text

+
+
+
+
+ +
+ +
+ + +
+

setup_opt_parser(self) +

+
source code  +
+ +

Setup the optparser

+
+
Returns: opt_parser.OptionParser
+
Option parser
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.doaplib-module.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.doaplib-module.html new file mode 100755 index 00000000..4616da89 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.doaplib-module.html @@ -0,0 +1,770 @@ + + + + + doapfiend.doaplib + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module doaplib + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Module doaplib

source code

+

Library for parsing, displaying, querying and serializing DOAP

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
Functions[hide private]
+
+ int + + + + + + +
follow_homepages(rdf_xml)
+ If there is a 'doap:Project homepage' it will be looked up on + doapspace.org using get_by_homepage to find any other DOAP.
+ source code + +
+ +
+ int + + + + + + +
show_links(rdf)
+ If there is a 'doap:Project homepage' it will be looked up on + doapspace.org using get_by_homepage to find any other DOAP.
+ source code + +
+ +
+ None + + + + + + +
print_doap_by_homepages(homepages)
+ Given a list of homepage URLs, search for DOAP for each and print
+ source code + +
+ +
+ generator + + + + + + +
get_homepages(rdf, + format='xml')
+ Find all doap:homepage in RDF
+ source code + +
+ +
+ boolean + + + + + + +
rdf_has_doap(store)
+ Returns True if triplestore has the DOAP namespace defined
+ source code + +
+ +
+ Project + + + + + + +
load_graph(doap, + format='xml', + get_list=False)
+ Load a DOAP profile into a RDFAlchemy/rdflib graph
+ source code + +
+ +
+ string + + + + + + +
get_by_pkg_index(index, + project_name, + proxy=None)
+ Get DOAP for a package index project name
+ source code + +
+ +
+ list + + + + + + +
query_by_homepage(url)
+ Get list of URL's for DOAP given a project's homepage.
+ source code + +
+ +
+   + + + + + + +
print_doap(doap_xml, + color=None, + format='text', + serializer=None, + filename=None)
+ Print DOAP as text, xml, or n3 etc.
+ source code + +
+ +
+ function + + + + + + +
get_serializer(format)
+ Return a serializer instance given its name
+ source code + +
+ +
+   + + + + + + +
get_plugin(method)
+ Return plugin object if `method` exists
+ source code + +
+ +
+ text + + + + + + +
fetch_doap(url, + proxy=None)
+ Fetch DOAP by its URL or filename
+ source code + +
+ +
+ + + + + + + + + + + + + + + +
+ + + + + +
Variables[hide private]
+
+   + + LOG = logging.getLogger('doapfiend') +
+   + + XMLRPC_SERVER = <ServerProxy for doapspace.org/xmlrpc/> +
+   + + DOAP_NS = rdflib.URIRef('http://usefulinc.com/ns/doap#') +
+ + + + + + +
+ + + + + +
Function Details[hide private]
+
+ +
+ +
+ + +
+

follow_homepages(rdf_xml) +

+
source code  +
+ +

If there is a 'doap:Project homepage' it will be looked up on + doapspace.org using get_by_homepage to find any other DOAP. This is + useful if we're looking at FOAF and a project is mentioned by homepage. + It can also be used on DOAP files to search for additional DOAP files + about the same project.

+
+
Parameters:
+
    +
  • rdf_xml - RDF serialized as XML @type : string
  • +
+
Returns: int
+
0 on sucess or 1 if there was no DOAP in the RDF
+
+
+
+ +
+ +
+ + +
+

show_links(rdf) +

+
source code  +
+ +

If there is a 'doap:Project homepage' it will be looked up on + doapspace.org using get_by_homepage to find any other DOAP. This is + useful if we're looking at FOAF and a project is mentioned by homepage. + It can also be used on DOAP files to search for additional DOAP files + about the same project.

+
+
Parameters:
+
    +
  • rdf - RDF serialized as XML @type : string
  • +
+
Returns: int
+
0 on sucess or 1 if there was no DOAP in the RDF
+
+
+
+ +
+ +
+ + +
+

print_doap_by_homepages(homepages) +

+
source code  +
+ +

Given a list of homepage URLs, search for DOAP for each and print

+
+
Parameters:
+
    +
  • homepages - Project homepage @type : list
  • +
+
Returns: None
+
None
+
+
+
+ +
+ +
+ + +
+

get_homepages(rdf, + format='xml') +

+
source code  +
+ +

Find all doap:homepage in RDF

+
+
Parameters:
+
    +
  • rdf (string) - RDF
  • +
  • format (string) - Serialization format
  • +
+
Returns: generator
+
homepages
+
+
+
+ +
+ +
+ + +
+

rdf_has_doap(store) +

+
source code  +
+ +

Returns True if triplestore has the DOAP namespace defined

+
+
Parameters:
+
    +
  • store (rdflib ConjunctiveGraph) - triplestore
  • +
+
Returns: boolean
+
True if triplestore contains DOAP namespace
+
+
+
+ +
+ +
+ + +
+

load_graph(doap, + format='xml', + get_list=False) +

+
source code  +
+ +

Load a DOAP profile into a RDFAlchemy/rdflib graph

+

Supports any serialization format rdflib can parse (xml, n3, etc.)

+
+
Parameters:
+
    +
  • doap (string) - DOAP
  • +
  • format (string) - Serialization format we're parsing
  • +
  • get_list - Return list of Projects if True
  • +
+
Returns: Project
+
a Project{rdfSubject}
+
+
+
+ +
+ +
+ + +
+

get_by_pkg_index(index, + project_name, + proxy=None) +

+
source code  +
+ +

Get DOAP for a package index project name

+

Builtin indexes:

+
    +
  • + 'sf' SourceForge +
  • +
  • + 'fm' Freshmeat +
  • +
  • + 'py' Python Package Index +
  • +
+

Note there can be other package indexes available by third party + plugins.

+
+
Parameters:
+
    +
  • index (string) - Package index two letter abbreviation
  • +
  • project_name (string) - project name
  • +
  • proxy (string) - Optional HTTP proxy URL
  • +
+
Returns: string
+
text of file retrieved
+
+
+
+ +
+ +
+ + +
+

query_by_homepage(url) +

+
source code  +
+ +

Get list of URL's for DOAP given a project's homepage. The list can + contain zero or multiple URLs.

+

The return format is: [(source, URL), (source, URL)...]

+

'source' is the two letter package index abbreviation or 'ex' for + external. 'external' meaning the DOAP was spidered on the web. Possible + package indexes:

+

Current indexes:

+
    +
  • + 'sf' SourceForge +
  • +
  • + 'fm' Freshmeat +
  • +
  • + 'py' Python Package Index +
  • +
  • + 'oh' Packages listed on Ohloh +
  • +
+
+
Parameters:
+
    +
  • url (string) - URL of homepage of a project
  • +
+
Returns: list
+
A list of tuples containing URLs for DOAP found by homepage
+
+
+
+ +
+ +
+ + +
+

print_doap(doap_xml, + color=None, + format='text', + serializer=None, + filename=None) +

+
source code  +
+ +

Print DOAP as text, xml, or n3 etc. or to stdout or a file A callable + serializer object may be passed or a name of a serializer plugin.

+
+
Parameters:
+
    +
  • doap_xml (string) - DOAP profile in RDF/XML
  • +
  • format (string) - Serialization syntax formatter name
  • +
  • serializer (callable) - Instance of a serializer
  • +
  • filename (string) - Optional filename to write to
  • +
+
Returns:
+
`serializer` or 1 if invalid serialization request
+
+
+
+ +
+ +
+ + +
+

get_serializer(format) +

+
source code  +
+ +

Return a serializer instance given its name

+
+
Parameters:
+
    +
  • format (string) - Name of serializer
  • +
+
Returns: function
+
Instance of a serializer
+
+
+
+ +
+ +
+ + +
+

get_plugin(method) +

+
source code  +
+ +

Return plugin object if `method` exists

+
+
Parameters:
+
    +
  • method (string) - name of plugin's method we're calling
  • +
+
Returns:
+
list of plugins with `method`
+
+
+
+ +
+ +
+ + +
+

fetch_doap(url, + proxy=None) +

+
source code  +
+ +

Fetch DOAP by its URL or filename

+
+
Parameters:
+
    +
  • url (string) - URL of DOAP profile in RDF/XML serialization
  • +
+
Returns: text
+
DOAP
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.doaplib-pysrc.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.doaplib-pysrc.html new file mode 100755 index 00000000..7fe640cb --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.doaplib-pysrc.html @@ -0,0 +1,508 @@ + + + + + doapfiend.doaplib + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module doaplib + + + + + + +
[hide private]
[frames] | no frames]
+
+

Source Code for Module doapfiend.doaplib

+
+  1  #!/usr/bin/env python 
+  2  #pylint: disable-msg=C0103 
+  3   
+  4  """ 
+  5   
+  6  Library for parsing, displaying, querying and serializing DOAP 
+  7   
+  8  """ 
+  9   
+ 10  import sys 
+ 11  import logging 
+ 12  import xmlrpclib 
+ 13  from cStringIO import StringIO 
+ 14  from xml.sax._exceptions import SAXParseException 
+ 15   
+ 16  from rdfalchemy import rdfSubject 
+ 17  from rdflib import ConjunctiveGraph, Namespace 
+ 18   
+ 19  from doapfiend.utils import fetch_file 
+ 20  from doapfiend.model import Project 
+ 21  from doapfiend.plugins import load_plugins 
+ 22   
+ 23  LOG = logging.getLogger('doapfiend') 
+ 24  XMLRPC_SERVER = xmlrpclib.ServerProxy('http://doapspace.org/xmlrpc/') 
+ 25  DOAP_NS = Namespace('http://usefulinc.com/ns/doap#') 
+ 26   
+ 27   
+
28 -def follow_homepages(rdf_xml): +
29 ''' + 30 If there is a 'doap:Project homepage' it will be looked up + 31 on doapspace.org using get_by_homepage to find any other + 32 DOAP. This is useful if we're looking at FOAF and a project + 33 is mentioned by homepage. It can also be used on DOAP files + 34 to search for additional DOAP files about the same project. + 35 + 36 @param rdf_xml: RDF serialized as XML + 37 @type : string + 38 + 39 @rtype: int + 40 @returns: 0 on sucess or 1 if there was no DOAP in the RDF + 41 ''' + 42 homepages = list(get_homepages(rdf_xml)) + 43 nbr_homepage_urls = len(homepages) + 44 if nbr_homepage_urls >= 1: + 45 print_doap_by_homepages(homepages) + 46 else: + 47 print 'No DOAP found in that RDF.' + 48 return 1 +
49 + 50 + 77 + 78 + 97 +
98 -def get_homepages(rdf, format='xml'): +
99 ''' +100 Find all doap:homepage in RDF +101 +102 @param rdf: RDF +103 @type rdf: string +104 +105 @param format: Serialization format +106 @type format: string +107 +108 @rtype: generator +109 @returns: homepages +110 ''' +111 store = ConjunctiveGraph() +112 store.parse(StringIO(rdf), publicID=None, format=format) +113 if rdf_has_doap(store): +114 for _s, o in store.subject_objects(DOAP_NS["homepage"]): +115 yield(str(o)) +
116 +
117 -def rdf_has_doap(store): +
118 ''' +119 Returns True if triplestore has the DOAP namespace defined +120 +121 @param store: triplestore +122 @type store: rdflib ConjunctiveGraph +123 +124 @rtype: boolean +125 @returns: True if triplestore contains DOAP namespace +126 +127 ''' +128 for namespace in store.namespaces(): +129 if namespace[1] == DOAP_NS: +130 return True +
131 +
132 -def load_graph(doap, format="xml", get_list=False): +
133 ''' +134 Load a DOAP profile into a RDFAlchemy/rdflib graph +135 +136 Supports any serialization format rdflib can parse (xml, n3, etc.) +137 +138 @param doap: DOAP +139 @type doap: string +140 +141 @param format: Serialization format we're parsing +142 @type format: string +143 +144 @param get_list: Return list of Projects if True +145 @type doap: list +146 +147 @rtype: Project +148 @returns: a Project{rdfSubject} +149 +150 ''' +151 rdfSubject.db = ConjunctiveGraph() +152 try: +153 rdfSubject.db.parse(StringIO(doap), format) +154 except SAXParseException: +155 sys.stderr.write("Error: Can't parse RDF/XML.\n") +156 sys.exit(2) +157 #If a serializer works on an entire graph, it doesn't matter which +158 #Project instance we give it. This is true for N3, XML/RDF etc. +159 #The 'text' serializer, on the other hand, prints out a separate +160 #description for each Project found in a graph. This is useful for +161 #'arbitrary' RDF, or FOAF where there may be several Projects listed. +162 #Ideally exactly one Project should be specified in an .rdf file. +163 #In the future load_graph will probably always return a list and let the +164 #plugins determine what to do when there are more than one Project +165 #found. +166 if get_list: +167 LOG.debug("doaplib: list of Projects") +168 try: +169 projs = list(Project.ClassInstances()) +170 LOG.debug("Found %s Projects." % len(projs)) +171 if len(projs) == 0: +172 sys.stderr.write('No DOAP found in that RDF.\n') +173 return projs +174 except StopIteration: +175 sys.stderr.write('No DOAP found in that RDF.\n') +176 sys.exit(2) +177 +178 else: +179 try: +180 LOG.debug("doaplib: single Project") +181 return Project.ClassInstances().next() +182 except StopIteration: +183 sys.stderr.write('No DOAP found in that RDF.\n') +184 sys.exit(2) +185 sys.stderr.write('No DOAP found in that RDF.\n') +
186 +
187 -def get_by_pkg_index(index, project_name, proxy=None): +
188 ''' +189 Get DOAP for a package index project name +190 +191 Builtin indexes: +192 +193 - 'sf' SourceForge +194 - 'fm' Freshmeat +195 - 'py' Python Package Index +196 +197 Note there can be other package indexes available by +198 third party plugins. +199 +200 @param index: Package index two letter abbreviation +201 @type index: string +202 +203 @param project_name: project name +204 @type project_name: string +205 +206 @param proxy: Optional HTTP proxy URL +207 @type proxy: string +208 +209 @rtype: string +210 @return: text of file retrieved +211 +212 ''' +213 for plugin_obj in list(load_plugins()): +214 plugin = plugin_obj() +215 if hasattr(plugin, 'prefix'): +216 if plugin.prefix == index: +217 plugin.query = project_name +218 return plugin.search(proxy) +
219 +220 +
221 -def query_by_homepage(url): +
222 ''' +223 Get list of URL's for DOAP given a project's homepage. +224 The list can contain zero or multiple URLs. +225 +226 The return format is: +227 [(source, URL), (source, URL)...] +228 +229 'source' is the two letter package index abbreviation or 'ex' for external. +230 'external' meaning the DOAP was spidered on the web. +231 Possible package indexes: +232 +233 Current indexes: +234 +235 - 'sf' SourceForge +236 - 'fm' Freshmeat +237 - 'py' Python Package Index +238 - 'oh' Packages listed on Ohloh +239 +240 @param url: URL of homepage of a project +241 @type url: string +242 +243 @rtype: list +244 @return: A list of tuples containing URLs for DOAP found by homepage +245 +246 ''' +247 #Should check for env variable for alternate xmplrpc server for testing? +248 return XMLRPC_SERVER.query_by_homepage(url) +
249 +250 +289 +290 +
291 -def get_serializer(format): +
292 ''' +293 Return a serializer instance given its name +294 +295 @param format: Name of serializer +296 @type format: string +297 +298 @rtype: function +299 @returns: Instance of a serializer +300 ''' +301 #Get all plugins with a `serialize` method +302 for plugin_obj in get_plugin('serialize'): +303 plugin = plugin_obj() +304 if plugin.name == format: +305 return plugin.serialize +
306 +307 +
308 -def get_plugin(method): +
309 """ +310 Return plugin object if `method` exists +311 +312 @param method: name of plugin's method we're calling +313 @type method: string +314 +315 @returns: list of plugins with `method` +316 +317 """ +318 all_plugins = [] +319 for plugin in load_plugins(): +320 #plugin().configure(None, None) +321 if not hasattr(plugin, method): +322 plugin = None +323 else: +324 all_plugins.append(plugin) +325 return all_plugins +
326 +327 +
328 -def fetch_doap(url, proxy=None): +
329 ''' +330 Fetch DOAP by its URL or filename +331 +332 @param url: URL of DOAP profile in RDF/XML serialization +333 @type url: string +334 +335 @rtype: text +336 @return: DOAP +337 ''' +338 return fetch_file(url, proxy) +
339 +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers-module.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers-module.html new file mode 100755 index 00000000..0a4929dc --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers-module.html @@ -0,0 +1,154 @@ + + + + + doapfiend.lexers + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module lexers + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Module lexers

source code

+

pygments.lexers.sw

+

Lexers for semantic web languages.

+

:copyright: 2007 by Philip Cooper + <philip.cooper@openvest.com>. :license: BSD, see LICENSE for more + details.

+ + + + + + + + + + + + + +
+ + + + + +
Classes[hide private]
+
+   + + Notation3Lexer
+ Lexer for the N3 / Turtle / NT +
+   + + SparqlLexer
+ Lexer for SPARQL Not Complete +
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers-pysrc.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers-pysrc.html new file mode 100755 index 00000000..83256cca --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers-pysrc.html @@ -0,0 +1,304 @@ + + + + + doapfiend.lexers + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module lexers + + + + + + +
[hide private]
[frames] | no frames]
+
+

Source Code for Module doapfiend.lexers

+
+  1   
+  2  #pylint: disable-msg=C0301 
+  3   
+  4  """ 
+  5      pygments.lexers.sw 
+  6      ================== 
+  7   
+  8      Lexers for semantic web languages. 
+  9   
+ 10      :copyright: 2007 by Philip Cooper <philip.cooper@openvest.com>. 
+ 11      :license: BSD, see LICENSE for more details. 
+ 12  """ 
+ 13   
+ 14  import re 
+ 15   
+ 16  from pygments.lexer import RegexLexer, include, bygroups 
+ 17  from pygments.token import Text, Comment, Operator, Keyword, Name, Literal 
+ 18   
+ 19   
+ 20  __all__ = ['Notation3Lexer', 'SparqlLexer'] 
+ 21   
+ 22   
+
23 -class Notation3Lexer(RegexLexer): +
24 """ + 25 Lexer for the N3 / Turtle / NT + 26 """ + 27 name = 'N3' + 28 aliases = ['n3', 'turtle'] + 29 filenames = ['*.n3', '*.ttl', '*.NT'] + 30 mimetypes = ['text/rdf+n3', 'application/x-turtle', 'application/n3'] + 31 + 32 tokens = { + 33 'comments': [ + 34 (r'(\s*#.*)', Comment) + 35 ], + 36 'root': [ + 37 include('comments'), + 38 (r'(\s*@(?:prefix|base|keywords)\s*)(\w*:\s+)?(<[^> ]*>\s*\.\s*)', + 39 bygroups(Keyword,Name.Variable,Name.Namespace)), + 40 (r'\s*(<[^>]*\>)', Name.Class, ('triple','predObj')), + 41 (r'(\s*[a-zA-Z_:][a-zA-Z0-9\-_:]*\s)', + 42 Name.Class, ('triple','predObj')), + 43 (r'\s*\[\]\s*', Name.Class, ('triple','predObj')), + 44 ], + 45 'triple' : [ + 46 (r'\s*\.\s*', Text, '#pop') + 47 ], + 48 'predObj': [ + 49 include('comments'), + 50 (r'(\s*[a-zA-Z_:][a-zA-Z0-9\-_:]*\b\s*)', Operator, 'object'), + 51 (r'\s*(<[^>]*\>)', Operator, 'object'), + 52 (r'\s*\]\s*', Text, '#pop'), + 53 (r'(?=\s*\.\s*)', Keyword, '#pop'), + 54 ], + 55 'objList': [ + 56 (r'\s*\)', Text, '#pop'), + 57 include('object') + 58 ], + 59 'object': [ + 60 (r'\s*\[', Text, 'predObj'), + 61 (r'\s*<[^> ]*>', Name.Attribute), + 62 (r'\s*("""(?:.|\n)*?""")(\@[a-z]{2-4}|\^\^<?[a-zA-Z0-9\-\:_#/\.]*>?)?\s*', + 63 bygroups(Literal.String,Text)), + 64 (r'\s*".*?[^\\]"(?:\@[a-z]{2-4}|\^\^<?[a-zA-Z0-9\-\:_#/\.]*>?)?\s*', + 65 Literal.String), + 66 (r'\s*[a-zA-Z0-9\-_\:]\s*', Name.Attribute), + 67 (r'\s*\(', Text, 'objList'), + 68 (r'\s*;\s*\n?', Text, '#pop'), + 69 (r'(?=\s*\])', Text, '#pop'), + 70 (r'(?=\s*\.)', Text, '#pop'), + 71 ], + 72 } +
73 + 74 +
75 -class SparqlLexer(RegexLexer): +
76 """ + 77 Lexer for SPARQL Not Complete + 78 """ + 79 name = 'SPARQL' + 80 aliases = ['sparql'] + 81 filenames = ['*.sparql'] + 82 mimetypes = ['text/x-sql'] + 83 flags = re.IGNORECASE + 84 tokens = { + 85 'comments': [ + 86 (r'(\s*#.*)', Comment) + 87 ], + 88 'root': [ + 89 include('comments'), + 90 (r'(\s*(?:PREFIX|BASE)\s+)(\w*:\w*)?(\s*<[^> ]*>\s*)', + 91 bygroups(Keyword,Name.Variable,Name.Namespace)), + 92 (r'(\s*#.*)', Comment), + 93 (r'((?:SELECT|ASK|CONSTRUCT|DESCRIBE)\s*(?:DISTINCT|REDUCED)?\s*)((?:\?[a-zA-Z0-9_-]+\s*)+|\*)(\s*)', + 94 bygroups(Keyword,Name.Variable,Text)), + 95 (r'(FROM\s*(?:NAMED)?)(\s*.*)', bygroups(Keyword,Text)), + 96 (r'(WHERE)?\s*({)', bygroups(Keyword,Text), 'graph'), + 97 (r'(LIMIT|OFFSET)(\s*[+-]?[0-9]+)', + 98 bygroups(Keyword,Literal.String)), + 99 ], +100 'graph':[ +101 (r'\s*(<[^>]*\>)', Name.Class, ('triple','predObj')), +102 (r'(\s*[a-zA-Z_0-9\-]*:[a-zA-Z0-9\-_]*\s)', +103 Name.Class, ('triple','predObj')), +104 (r'(\s*\?[a-zA-Z0-9_-]*)', Name.Variable, ('triple','predObj')), +105 (r'\s*\[\]\s*', Name.Class, ('triple','predObj')), +106 (r'\s*(FILTER\s*)((?:regex)?\()',bygroups(Keyword,Text),'filterExp'), +107 (r'\s*}', Text, '#pop'), +108 ], +109 'triple' : [ +110 (r'(?=\s*})', Text, '#pop'), +111 (r'\s*\.\s*', Text, '#pop'), +112 ], +113 'predObj': [ +114 include('comments'), +115 (r'(\s*\?[a-zA-Z0-9_-]*\b\s*)', Name.Variable,'object'), +116 (r'(\s*[a-zA-Z_:][a-zA-Z0-9\-_:]*\b\s*)', Operator, 'object'), +117 (r'\s*(<[^>]*\>)', Operator, 'object'), +118 (r'\s*\]\s*', Text, '#pop'), +119 (r'(?=\s*\.\s*)', Keyword, '#pop'), +120 ], +121 'objList': [ +122 (r'\s*\)', Text, '#pop'), +123 include('object'), +124 ], +125 'object': [ +126 include('variable'), +127 (r'\s*\[', Text, 'predObj'), +128 (r'\s*<[^> ]*>', Name.Attribute), +129 (r'\s*("""(?:.|\n)*?""")(\@[a-z]{2-4}|\^\^<?[a-zA-Z0-9\-\:_#/\.]*>?)?\s*', bygroups(Literal.String,Text)), +130 (r'\s*".*?[^\\]"(?:\@[a-z]{2-4}|\^\^<?[a-zA-Z0-9\-\:_#/\.]*>?)?\s*', Literal.String), +131 (r'\s*[a-zA-Z0-9\-_\:]\s*', Name.Attribute), +132 (r'\s*\(', Text, 'objList'), +133 (r'\s*;\s*', Text, '#pop'), +134 (r'(?=\])', Text, '#pop'), +135 (r'(?=\.)', Text, '#pop'), +136 ], +137 'variable':[ +138 (r'(\?[a-zA-Z0-9\-_]+\s*)', Name.Variable), +139 ], +140 'filterExp':[ +141 include('variable'), +142 include('object'), +143 (r'\s*[+*/<>=~!%&|-]+\s*', Operator), +144 (r'\s*\)', Text, '#pop'), +145 ], +146 +147 } +
148 +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers.Notation3Lexer-class.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers.Notation3Lexer-class.html new file mode 100755 index 00000000..5f1601ae --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers.Notation3Lexer-class.html @@ -0,0 +1,394 @@ + + + + + doapfiend.lexers.Notation3Lexer + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module lexers :: + Class Notation3Lexer + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Class Notation3Lexer

source code

+
+           object --+        
+                    |        
+ pygments.lexer.Lexer --+    
+                        |    
+pygments.lexer.RegexLexer --+
+                            |
+                           Notation3Lexer
+
+ +
+

Lexer for the N3 / Turtle / NT

+ + + + + + + + + + +
+ + + + + +
Nested Classes[hide private]
+
+

Inherited from pygments.lexer.RegexLexer: + __metaclass__ +

+
+ + + + + + + + + +
+ + + + + +
Instance Methods[hide private]
+
+

Inherited from pygments.lexer.RegexLexer: + get_tokens_unprocessed +

+

Inherited from pygments.lexer.Lexer: + __init__, + __repr__, + add_filter, + get_tokens +

+

Inherited from object: + __delattr__, + __getattribute__, + __hash__, + __new__, + __reduce__, + __reduce_ex__, + __setattr__, + __str__ +

+
+ + + + + + + + + +
+ + + + + +
Static Methods[hide private]
+
+

Inherited from pygments.lexer.Lexer: + analyse_text +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
Class Variables[hide private]
+
+   + + name = 'N3'
+ Name of the lexer +
+   + + aliases = ['n3', 'turtle']
+ Shortcuts for the lexer +
+   + + filenames = ['*.n3', '*.ttl', '*.NT']
+ fn match rules +
+   + + mimetypes = ['text/rdf+n3', 'application/x-turtle', 'applicati...
+ mime types +
+   + + tokens = {'comments': [('(\\s*#.*)', Token.Comment)], 'objList...
+ Dict of ``{'state': [(regex, tokentype, new_state), ...], ... +
+

Inherited from pygments.lexer.RegexLexer: + flags +

+

Inherited from pygments.lexer.Lexer: + alias_filenames +

+
+ + + + + + + + + +
+ + + + + +
Properties[hide private]
+
+

Inherited from object: + __class__ +

+
+ + + + + + +
+ + + + + +
Class Variable Details[hide private]
+
+ +
+ +
+

mimetypes

+

mime types

+
+
+
+
Value:
+
+['text/rdf+n3', 'application/x-turtle', 'application/n3']
+
+
+
+
+
+ +
+ +
+

tokens

+

Dict of ``{'state': [(regex, tokentype, new_state), ...], ...}``

+

The initial state is 'root'. ``new_state`` can be omitted to signify + no state transition. If it is a string, the state is pushed on the stack + and changed. If it is a tuple of strings, all states are pushed on the + stack and the current state will be the topmost. It can also be + ``combined('state1', 'state2', ...)`` to signify a new, anonymous state + combined from the rules of two or more existing ones. Furthermore, it can + be '#pop' to signify going back one step in the state stack, or '#push' + to push the current state on the stack again.

+

The tuple can also be replaced with ``include('state')``, in which + case the rules from the state named by the string are included in the + current one.

+
+
+
+
Value:
+
+{'comments': [('(\\s*#.*)', Token.Comment)],
+ 'objList': [('\\s*\\)', Token.Text, '#pop'), 'object'],
+ 'object': [('\\s*\\[', Token.Text, 'predObj'),
+            ('\\s*<[^> ]*>', Token.Name.Attribute),
+            ('\\s*("""(?:.|\\n)*?""")(\\@[a-z]{2-4}|\\^\\^<?[a-zA-Z0-9\
+\\-\\:_#/\\.]*>?)?\\s*',
+             <function callback at 0x850933c>),
+            ('\\s*".*?[^\\\\]"(?:\\@[a-z]{2-4}|\\^\\^<?[a-zA-Z0-9\\-\\\
+...
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers.SparqlLexer-class.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers.SparqlLexer-class.html new file mode 100755 index 00000000..c0caf2ee --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.lexers.SparqlLexer-class.html @@ -0,0 +1,381 @@ + + + + + doapfiend.lexers.SparqlLexer + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module lexers :: + Class SparqlLexer + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Class SparqlLexer

source code

+
+           object --+        
+                    |        
+ pygments.lexer.Lexer --+    
+                        |    
+pygments.lexer.RegexLexer --+
+                            |
+                           SparqlLexer
+
+ +
+

Lexer for SPARQL Not Complete

+ + + + + + + + + + +
+ + + + + +
Nested Classes[hide private]
+
+

Inherited from pygments.lexer.RegexLexer: + __metaclass__ +

+
+ + + + + + + + + +
+ + + + + +
Instance Methods[hide private]
+
+

Inherited from pygments.lexer.RegexLexer: + get_tokens_unprocessed +

+

Inherited from pygments.lexer.Lexer: + __init__, + __repr__, + add_filter, + get_tokens +

+

Inherited from object: + __delattr__, + __getattribute__, + __hash__, + __new__, + __reduce__, + __reduce_ex__, + __setattr__, + __str__ +

+
+ + + + + + + + + +
+ + + + + +
Static Methods[hide private]
+
+

Inherited from pygments.lexer.Lexer: + analyse_text +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
Class Variables[hide private]
+
+   + + name = 'SPARQL'
+ Name of the lexer +
+   + + aliases = ['sparql']
+ Shortcuts for the lexer +
+   + + filenames = ['*.sparql']
+ fn match rules +
+   + + mimetypes = ['text/x-sql']
+ mime types +
+   + + flags = 2
+ Flags for compiling the regular expressions. +
+   + + tokens = {'comments': [('(\\s*#.*)', Token.Comment)], 'filterE...
+ Dict of ``{'state': [(regex, tokentype, new_state), ...], ... +
+

Inherited from pygments.lexer.Lexer: + alias_filenames +

+
+ + + + + + + + + +
+ + + + + +
Properties[hide private]
+
+

Inherited from object: + __class__ +

+
+ + + + + + +
+ + + + + +
Class Variable Details[hide private]
+
+ +
+ +
+

tokens

+

Dict of ``{'state': [(regex, tokentype, new_state), ...], ...}``

+

The initial state is 'root'. ``new_state`` can be omitted to signify + no state transition. If it is a string, the state is pushed on the stack + and changed. If it is a tuple of strings, all states are pushed on the + stack and the current state will be the topmost. It can also be + ``combined('state1', 'state2', ...)`` to signify a new, anonymous state + combined from the rules of two or more existing ones. Furthermore, it can + be '#pop' to signify going back one step in the state stack, or '#push' + to push the current state on the stack again.

+

The tuple can also be replaced with ``include('state')``, in which + case the rules from the state named by the string are included in the + current one.

+
+
+
+
Value:
+
+{'comments': [('(\\s*#.*)', Token.Comment)],
+ 'filterExp': ['variable',
+               'object',
+               ('\\s*[+*/<>=~!%&|-]+\\s*', Token.Operator),
+               ('\\s*\\)', Token.Text, '#pop')],
+ 'graph': [('\\s*(<[^>]*\\>)',
+            Token.Name.Class,
+            ('triple', 'predObj')),
+...
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model-module.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model-module.html new file mode 100755 index 00000000..cb1ce790 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model-module.html @@ -0,0 +1,205 @@ + + + + + doapfiend.model + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module model + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Module model

source code

+

Model of a DOAP profile using RDFAlchemy

+ + + + + + + + + + + + + + + + + + + +
+ + + + + +
Classes[hide private]
+
+   + + Project
+ DOAP Project Class +
+   + + Release
+ A release class +
+   + + SVNRepository
+ Subversion repository classs +
+   + + CVSRepository
+ CVS repository class +
+ + + + + + + + + + + + + + + +
+ + + + + +
Variables[hide private]
+
+   + + DOAP = rdflib.URIRef('http://usefulinc.com/ns/doap#') +
+   + + FOAF = rdflib.URIRef('http://xmlns.com/foaf/0.1/') +
+   + + DC = rdflib.URIRef('http://purl.org/dc/elements/1.1/') +
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model-pysrc.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model-pysrc.html new file mode 100755 index 00000000..d00cf7c1 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model-pysrc.html @@ -0,0 +1,305 @@ + + + + + doapfiend.model + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module model + + + + + + +
[hide private]
[frames] | no frames]
+
+

Source Code for Module doapfiend.model

+
+ 1   
+ 2  ''' 
+ 3   
+ 4  Model of a DOAP profile using RDFAlchemy 
+ 5   
+ 6  ''' 
+ 7   
+ 8  from rdfalchemy import rdfSubject, rdfSingle, rdfMultiple 
+ 9  from rdfalchemy.orm import mapper 
+10  from rdflib import Namespace 
+11   
+12  DOAP = Namespace("http://usefulinc.com/ns/doap#") 
+13  FOAF = Namespace("http://xmlns.com/foaf/0.1/") 
+14  DC = Namespace("http://purl.org/dc/elements/1.1/") 
+15   
+16   
+
17 -class Project(rdfSubject): +
18 +19 """ +20 DOAP Project Class +21 """ +22 +23 rdf_type = DOAP.Project +24 +25 category = rdfMultiple(DOAP.category) +26 created = rdfSingle(DOAP.created) +27 shortname = rdfSingle(DOAP.shortname) +28 description = rdfMultiple(DOAP.description) +29 bug_database = rdfSingle(DOAP['bug-database']) +30 developer = rdfMultiple(DOAP.developer, range_type=FOAF.Person) +31 documenter = rdfMultiple(DOAP.documenter, range_type=FOAF.Person) +32 download_mirror = rdfMultiple(DOAP['downoad-mirror']) +33 download_page = rdfSingle(DOAP['download-page']) +34 helper = rdfMultiple(DOAP.helper, range_type=FOAF.Person) +35 homepage = rdfSingle(DOAP.homepage) +36 license = rdfMultiple(DOAP['license']) +37 maintainer = rdfMultiple(DOAP.maintainer, range_type=FOAF.Person) +38 developer = rdfMultiple(DOAP.developer, range_type=FOAF.Person) +39 translator = rdfMultiple(DOAP.translator, range_type=FOAF.Person) +40 helper = rdfMultiple(DOAP.helper, range_type=FOAF.Person) +41 tester = rdfMultiple(DOAP.tester, range_type=FOAF.Person) +42 documenter = rdfMultiple(DOAP.documenter, range_type=FOAF.Person) +43 module = rdfSingle(DOAP.module) +44 name = rdfSingle(DOAP.name) +45 old_homepage = rdfMultiple(DOAP['old-homepage']) +46 programming_language = rdfMultiple(DOAP['programming-language']) +47 releases = rdfMultiple(DOAP.release, range_type=DOAP.Version) +48 svn_repository = rdfSingle(DOAP.repository, 'svn_repository', +49 range_type=DOAP.SVNRepository) +50 cvs_repository = rdfSingle(DOAP.repository, 'cvs_repository', +51 range_type=DOAP.CVSRepository) +52 oper_sys = rdfMultiple(DOAP['os']) +53 screenshots = rdfMultiple(DOAP.screenshots) +54 shortdesc = rdfMultiple(DOAP.shortdesc) +55 tester = rdfMultiple(DOAP.tester, range_type=FOAF.Person) +56 translator = rdfMultiple(DOAP.translator, range_type=FOAF.Person) +57 wiki = rdfMultiple(DOAP.wiki) +
58 +
59 -class Release(rdfSubject): +
60 """A release class""" +61 rdf_type = DOAP.Version +62 revision = rdfSingle(DOAP.revision) +63 name = rdfSingle(DOAP.name) +64 created = rdfSingle(DOAP.created) +65 changelog = rdfSingle(DC.description) +66 file_releases = rdfMultiple(DOAP['file-release']) +
67 +
68 -class SVNRepository(rdfSubject): +
69 """Subversion repository classs""" +70 rdf_type = DOAP.SVNRepository +71 location = rdfSingle(DOAP.location) +72 svn_browse = rdfSingle(DOAP.browse) +
73 +
74 -class CVSRepository(rdfSubject): +
75 """CVS repository class""" +76 rdf_type = DOAP.CVSRepository +77 anon_root = rdfSingle(DOAP['anon-root']) +78 cvs_browse = rdfSingle(DOAP.browse) +79 module = rdfSingle(DOAP.module) +
80 +81 +82 mapper(Project, Release, CVSRepository, SVNRepository) +83 +
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.CVSRepository-class.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.CVSRepository-class.html new file mode 100755 index 00000000..98825496 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.CVSRepository-class.html @@ -0,0 +1,320 @@ + + + + + doapfiend.model.CVSRepository + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module model :: + Class CVSRepository + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Class CVSRepository

source code

+
+        object --+    
+                 |    
+doaplib.rdfSubject --+
+                     |
+                    CVSRepository
+
+ +
+

CVS repository class

+ + + + + + + + + + +
+ + + + + +
Instance Methods[hide private]
+
+

Inherited from doaplib.rdfSubject: + __cmp__, + __delitem__, + __getitem__, + __hash__, + __init__, + __repr__, + md5_term_hash, + n3 +

+

Inherited from doaplib.rdfSubject (private): + _ppo, + _remove, + _rename, + _set_with_dict +

+

Inherited from object: + __delattr__, + __getattribute__, + __new__, + __reduce__, + __reduce_ex__, + __setattr__, + __str__ +

+
+ + + + + + + + + +
+ + + + + +
Class Methods[hide private]
+
+

Inherited from doaplib.rdfSubject: + ClassInstances, + GetRandom, + filter_by, + get_by, + query +

+

Inherited from doaplib.rdfSubject (private): + _getdescriptor +

+
+ + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
Class Variables[hide private]
+
+   + + rdf_type = rdflib.URIRef('http://usefulinc.com/ns/doap#CVSRepo... +
+   + + anon_root = rdfSingle(DOAP ['anon-root']) +
+   + + cvs_browse = rdfSingle(DOAP.browse) +
+   + + module = rdfSingle(DOAP.module) +
+

Inherited from doaplib.rdfSubject: + db +

+
+ + + + + + + + + +
+ + + + + +
Properties[hide private]
+
+

Inherited from object: + __class__ +

+
+ + + + + + +
+ + + + + +
Class Variable Details[hide private]
+
+ +
+ +
+

rdf_type

+ +
+
+
+
Value:
+
+rdflib.URIRef('http://usefulinc.com/ns/doap#CVSRepository')
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.Project-class.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.Project-class.html new file mode 100755 index 00000000..29d87054 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.Project-class.html @@ -0,0 +1,557 @@ + + + + + doapfiend.model.Project + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module model :: + Class Project + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Class Project

source code

+
+        object --+    
+                 |    
+doaplib.rdfSubject --+
+                     |
+                    Project
+
+ +
+

DOAP Project Class

+ + + + + + + + + + +
+ + + + + +
Instance Methods[hide private]
+
+

Inherited from doaplib.rdfSubject: + __cmp__, + __delitem__, + __getitem__, + __hash__, + __init__, + __repr__, + md5_term_hash, + n3 +

+

Inherited from doaplib.rdfSubject (private): + _ppo, + _remove, + _rename, + _set_with_dict +

+

Inherited from object: + __delattr__, + __getattribute__, + __new__, + __reduce__, + __reduce_ex__, + __setattr__, + __str__ +

+
+ + + + + + + + + +
+ + + + + +
Class Methods[hide private]
+
+

Inherited from doaplib.rdfSubject: + ClassInstances, + GetRandom, + filter_by, + get_by, + query +

+

Inherited from doaplib.rdfSubject (private): + _getdescriptor +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
Class Variables[hide private]
+
+   + + rdf_type = rdflib.URIRef('http://usefulinc.com/ns/doap#Project') +
+   + + category = rdfMultiple(DOAP.category) +
+   + + created = rdfSingle(DOAP.created) +
+   + + shortname = rdfSingle(DOAP.shortname) +
+   + + description = rdfMultiple(DOAP.description) +
+   + + bug_database = rdfSingle(DOAP ['bug-database']) +
+   + + download_mirror = rdfMultiple(DOAP ['downoad-mirror']) +
+   + + download_page = rdfSingle(DOAP ['download-page']) +
+   + + homepage = rdfSingle(DOAP.homepage) +
+   + + license = rdfMultiple(DOAP ['license']) +
+   + + maintainer = rdfMultiple(DOAP.maintainer, range_type= FOAF.Per... +
+   + + developer = rdfMultiple(DOAP.developer, range_type= FOAF.Person) +
+   + + helper = rdfMultiple(DOAP.helper, range_type= FOAF.Person) +
+   + + documenter = rdfMultiple(DOAP.documenter, range_type= FOAF.Per... +
+   + + module = rdfSingle(DOAP.module) +
+   + + name = rdfSingle(DOAP.name) +
+   + + old_homepage = rdfMultiple(DOAP ['old-homepage']) +
+   + + programming_language = rdfMultiple(DOAP ['programming-language']) +
+   + + releases = rdfMultiple(DOAP.release, range_type= DOAP.Version) +
+   + + svn_repository = rdfSingle(DOAP.repository, 'svn_repository', ... +
+   + + cvs_repository = rdfSingle(DOAP.repository, 'cvs_repository', ... +
+   + + oper_sys = rdfMultiple(DOAP ['os']) +
+   + + screenshots = rdfMultiple(DOAP.screenshots) +
+   + + shortdesc = rdfMultiple(DOAP.shortdesc) +
+   + + tester = rdfMultiple(DOAP.tester, range_type= FOAF.Person) +
+   + + translator = rdfMultiple(DOAP.translator, range_type= FOAF.Per... +
+   + + wiki = rdfMultiple(DOAP.wiki) +
+

Inherited from doaplib.rdfSubject: + db +

+
+ + + + + + + + + +
+ + + + + +
Properties[hide private]
+
+

Inherited from object: + __class__ +

+
+ + + + + + +
+ + + + + +
Class Variable Details[hide private]
+
+ +
+ +
+

maintainer

+ +
+
+
+
Value:
+
+rdfMultiple(DOAP.maintainer, range_type= FOAF.Person)
+
+
+
+
+
+ +
+ +
+

documenter

+ +
+
+
+
Value:
+
+rdfMultiple(DOAP.documenter, range_type= FOAF.Person)
+
+
+
+
+
+ +
+ +
+

svn_repository

+ +
+
+
+
Value:
+
+rdfSingle(DOAP.repository, 'svn_repository', range_type= DOAP.SVNRepos\
+itory)
+
+
+
+
+
+ +
+ +
+

cvs_repository

+ +
+
+
+
Value:
+
+rdfSingle(DOAP.repository, 'cvs_repository', range_type= DOAP.CVSRepos\
+itory)
+
+
+
+
+
+ +
+ +
+

translator

+ +
+
+
+
Value:
+
+rdfMultiple(DOAP.translator, range_type= FOAF.Person)
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.Release-class.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.Release-class.html new file mode 100755 index 00000000..aa80dbb1 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.Release-class.html @@ -0,0 +1,297 @@ + + + + + doapfiend.model.Release + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module model :: + Class Release + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Class Release

source code

+
+        object --+    
+                 |    
+doaplib.rdfSubject --+
+                     |
+                    Release
+
+ +
+

A release class

+ + + + + + + + + + +
+ + + + + +
Instance Methods[hide private]
+
+

Inherited from doaplib.rdfSubject: + __cmp__, + __delitem__, + __getitem__, + __hash__, + __init__, + __repr__, + md5_term_hash, + n3 +

+

Inherited from doaplib.rdfSubject (private): + _ppo, + _remove, + _rename, + _set_with_dict +

+

Inherited from object: + __delattr__, + __getattribute__, + __new__, + __reduce__, + __reduce_ex__, + __setattr__, + __str__ +

+
+ + + + + + + + + +
+ + + + + +
Class Methods[hide private]
+
+

Inherited from doaplib.rdfSubject: + ClassInstances, + GetRandom, + filter_by, + get_by, + query +

+

Inherited from doaplib.rdfSubject (private): + _getdescriptor +

+
+ + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + + +
Class Variables[hide private]
+
+   + + rdf_type = rdflib.URIRef('http://usefulinc.com/ns/doap#Version') +
+   + + revision = rdfSingle(DOAP.revision) +
+   + + name = rdfSingle(DOAP.name) +
+   + + created = rdfSingle(DOAP.created) +
+   + + changelog = rdfSingle(DC.description) +
+   + + file_releases = rdfMultiple(DOAP ['file-release']) +
+

Inherited from doaplib.rdfSubject: + db +

+
+ + + + + + + + + +
+ + + + + +
Properties[hide private]
+
+

Inherited from object: + __class__ +

+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.SVNRepository-class.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.SVNRepository-class.html new file mode 100755 index 00000000..b1de66d6 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.model.SVNRepository-class.html @@ -0,0 +1,313 @@ + + + + + doapfiend.model.SVNRepository + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Module model :: + Class SVNRepository + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Class SVNRepository

source code

+
+        object --+    
+                 |    
+doaplib.rdfSubject --+
+                     |
+                    SVNRepository
+
+ +
+

Subversion repository classs

+ + + + + + + + + + +
+ + + + + +
Instance Methods[hide private]
+
+

Inherited from doaplib.rdfSubject: + __cmp__, + __delitem__, + __getitem__, + __hash__, + __init__, + __repr__, + md5_term_hash, + n3 +

+

Inherited from doaplib.rdfSubject (private): + _ppo, + _remove, + _rename, + _set_with_dict +

+

Inherited from object: + __delattr__, + __getattribute__, + __new__, + __reduce__, + __reduce_ex__, + __setattr__, + __str__ +

+
+ + + + + + + + + +
+ + + + + +
Class Methods[hide private]
+
+

Inherited from doaplib.rdfSubject: + ClassInstances, + GetRandom, + filter_by, + get_by, + query +

+

Inherited from doaplib.rdfSubject (private): + _getdescriptor +

+
+ + + + + + + + + + + + + + + + + + +
+ + + + + +
Class Variables[hide private]
+
+   + + rdf_type = rdflib.URIRef('http://usefulinc.com/ns/doap#SVNRepo... +
+   + + location = rdfSingle(DOAP.location) +
+   + + svn_browse = rdfSingle(DOAP.browse) +
+

Inherited from doaplib.rdfSubject: + db +

+
+ + + + + + + + + +
+ + + + + +
Properties[hide private]
+
+

Inherited from object: + __class__ +

+
+ + + + + + +
+ + + + + +
Class Variable Details[hide private]
+
+ +
+ +
+

rdf_type

+ +
+
+
+
Value:
+
+rdflib.URIRef('http://usefulinc.com/ns/doap#SVNRepository')
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins-module.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins-module.html new file mode 100755 index 00000000..8d8cf10c --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins-module.html @@ -0,0 +1,398 @@ + + + + + doapfiend.plugins + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Package plugins + + + + + + +
[hide private]
[frames] | no frames]
+
+ +

Package plugins

source code

+

Writing Plugins

+

doapfiend supports setuptools_ entry point plugins.

+

There are two basic rules for plugins:

+
    +
  • + Plugin classes should subclass `doapfiend.plugins.Plugin`_. +
  • +
  • + Plugins may implement any of the methods described in the class + PluginInterface in doapfiend.plugins.base. Please note that this + class is for documentary purposes only; plugins may not subclass + PluginInterface. +
  • +
+

Setuptools: http://peak.telecommunity.com/DevCenter/setuptools + Doapfiend Plugins: + http://trac.doapspace.org/doapfiend/wiki/DoapfiendPlugins

+

Registering

+

For doapfiend to find a plugin, it must be part of a package that + uses setuptools, and the plugin must be included in the entry points + defined in the setup.py for the package:

+
+ setup(name='Some plugin',
+       ...
+       entry_points = {
+           'doapfiend.plugins': [
+               'someplugin = someplugin:SomePlugin'
+               ]
+           },
+       ...
+       )
+
+

Once the package is installed with install or develop, doapfiend + will be able to load the plugin.

+

Defining options

+

All plugins must implement the methods ``add_options(self, parser, + env)`` and ``configure(self, options, conf)``. Subclasses of + doapfiend.plugins.Plugin that want the standard options should call + the superclass methods.

+

doapfiend uses optparse.OptionParser from the standard library to + parse arguments. A plugin's ``add_options()`` method receives a + parser instance. It's good form for a plugin to use that instance + only to add additional arguments that take only long arguments + (--like-this). Most of doapfiend's built-in arguments get their + default value from an environment variable. This is a good practice + because it allows options to be utilized when run through some other + means than the doapfiendtests script.

+

A plugin's ``configure()`` method receives the parsed + ``OptionParser`` options object, as well as the current config + object. Plugins should configure their behavior based on the + user-selected settings, and may raise exceptions if the configured + behavior is nonsensical.

+

Logging

+

doapfiend uses the logging classes from the standard library. To + enable users to view debug messages easily, plugins should use + ``logging.getLogger()`` to acquire a logger in the + ``doapfiend.plugins`` namespace.

+ + + + + + + + +
+ + + + + +
Submodules[hide private]
+
+
+ +
+ + + + + + + + + + + + +
+ + + + + +
Functions[hide private]
+
+   + + + + + + +
call_plugins(plugins, + method, + *arg, + **kw)
+ Call all method on plugins in list, that define it, with provided + arguments.
+ source code + +
+ +
+   + + + + + + +
load_plugins(builtin=True, + others=True)
+ Load plugins, either builtin, others, or both.
+ source code + +
+ +
+ + + + + + + + + + + + +
+ + + + + +
Variables[hide private]
+
+   + + LOG = logging.getLogger('doapfiend') +
+   + + builtin_plugins = ['url', 'homepage', 'n3', 'xml', 'text', 'so... +
+ + + + + + +
+ + + + + +
Function Details[hide private]
+
+ +
+ +
+ + +
+

call_plugins(plugins, + method, + *arg, + **kw) +

+
source code  +
+ +

Call all method on plugins in list, that define it, with provided + arguments. The first response that is not None is returned.

+
+
+
+
+
+ + + + + + +
+ + + + + +
Variables Details[hide private]
+
+ +
+ +
+

builtin_plugins

+ +
+
+
+
Value:
+
+['url',
+ 'homepage',
+ 'n3',
+ 'xml',
+ 'text',
+ 'sourceforge',
+ 'pypi',
+ 'freshmeat',
+...
+
+
+
+
+
+
+ + + + + + + + + + + + + + + + + + + + + + + + +
+ + + + diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins-pysrc.html b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins-pysrc.html new file mode 100755 index 00000000..1ae56557 --- /dev/null +++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/docs/api/doapfiend.plugins-pysrc.html @@ -0,0 +1,352 @@ + + + + + doapfiend.plugins + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
+ + Package doapfiend :: + Package plugins + + + + + + +
[hide private]
[frames] | no frames]
+
+

Source Code for Package doapfiend.plugins

+
+  1   
+  2  # pylint: disable-msg=W0142,C0103 
+  3   
+  4   
+  5  """ 
+  6  Writing Plugins 
+  7  =============== 
+  8   
+  9  doapfiend supports setuptools_ entry point plugins. 
+ 10   
+ 11  There are two basic rules for plugins: 
+ 12   
+ 13   - Plugin classes should subclass `doapfiend.plugins.Plugin`_. 
+ 14   - Plugins may implement any of the methods described in the class 
+ 15     PluginInterface in doapfiend.plugins.base. Please note that this class is for 
+ 16     documentary purposes only; plugins may not subclass PluginInterface. 
+ 17   
+ 18  Setuptools: http://peak.telecommunity.com/DevCenter/setuptools 
+ 19  Doapfiend Plugins: http://trac.doapspace.org/doapfiend/wiki/DoapfiendPlugins  
+ 20   
+ 21  Registering 
+ 22  ----------- 
+ 23   
+ 24  For doapfiend to find a plugin, it must be part of a package that uses 
+ 25  setuptools, and the plugin must be included in the entry points defined 
+ 26  in the setup.py for the package:: 
+ 27   
+ 28    setup(name='Some plugin', 
+ 29          ... 
+ 30          entry_points = { 
+ 31              'doapfiend.plugins': [ 
+ 32                  'someplugin = someplugin:SomePlugin' 
+ 33                  ] 
+ 34              }, 
+ 35          ... 
+ 36          ) 
+ 37   
+ 38  Once the package is installed with install or develop, doapfiend will be able 
+ 39  to load the plugin. 
+ 40   
+ 41  Defining options 
+ 42  ---------------- 
+ 43   
+ 44  All plugins must implement the methods ``add_options(self, parser, env)`` 
+ 45  and ``configure(self, options, conf)``. Subclasses of doapfiend.plugins.Plugin 
+ 46  that want the standard options should call the superclass methods. 
+ 47   
+ 48  doapfiend uses optparse.OptionParser from the standard library to parse 
+ 49  arguments. A plugin's ``add_options()`` method receives a parser 
+ 50  instance. It's good form for a plugin to use that instance only to add 
+ 51  additional arguments that take only long arguments (--like-this). Most 
+ 52  of doapfiend's built-in arguments get their default value from an envir