.
+ :license: BSD, see LICENSE for more details.
+"""
+
+import re
+
+from pygments.lexer import RegexLexer, include, bygroups
+from pygments.token import Text, Comment, Operator, Keyword, Name, Literal
+
+
+__all__ = ['Notation3Lexer', 'SparqlLexer']
+
+
+class Notation3Lexer(RegexLexer):
+ """
+ Lexer for the N3 / Turtle / NT
+ """
+ name = 'N3'
+ aliases = ['n3', 'turtle']
+ filenames = ['*.n3', '*.ttl', '*.NT']
+ mimetypes = ['text/rdf+n3', 'application/x-turtle', 'application/n3']
+
+ tokens = {
+ 'comments': [
+ (r'(\s*#.*)', Comment)
+ ],
+ 'root': [
+ include('comments'),
+ (r'(\s*@(?:prefix|base|keywords)\s*)(\w*:\s+)?(<[^> ]*>\s*\.\s*)',
+ bygroups(Keyword,Name.Variable,Name.Namespace)),
+ (r'\s*(<[^>]*\>)', Name.Class, ('triple','predObj')),
+ (r'(\s*[a-zA-Z_:][a-zA-Z0-9\-_:]*\s)',
+ Name.Class, ('triple','predObj')),
+ (r'\s*\[\]\s*', Name.Class, ('triple','predObj')),
+ ],
+ 'triple' : [
+ (r'\s*\.\s*', Text, '#pop')
+ ],
+ 'predObj': [
+ include('comments'),
+ (r'(\s*[a-zA-Z_:][a-zA-Z0-9\-_:]*\b\s*)', Operator, 'object'),
+ (r'\s*(<[^>]*\>)', Operator, 'object'),
+ (r'\s*\]\s*', Text, '#pop'),
+ (r'(?=\s*\.\s*)', Keyword, '#pop'),
+ ],
+ 'objList': [
+ (r'\s*\)', Text, '#pop'),
+ include('object')
+ ],
+ 'object': [
+ (r'\s*\[', Text, 'predObj'),
+ (r'\s*<[^> ]*>', Name.Attribute),
+ (r'\s*("""(?:.|\n)*?""")(\@[a-z]{2-4}|\^\^[a-zA-Z0-9\-\:_#/\.]*>?)?\s*',
+ bygroups(Literal.String,Text)),
+ (r'\s*".*?[^\\]"(?:\@[a-z]{2-4}|\^\^[a-zA-Z0-9\-\:_#/\.]*>?)?\s*',
+ Literal.String),
+ (r'\s*[a-zA-Z0-9\-_\:]\s*', Name.Attribute),
+ (r'\s*\(', Text, 'objList'),
+ (r'\s*;\s*\n?', Text, '#pop'),
+ (r'(?=\s*\])', Text, '#pop'),
+ (r'(?=\s*\.)', Text, '#pop'),
+ ],
+ }
+
+
+class SparqlLexer(RegexLexer):
+ """
+ Lexer for SPARQL Not Complete
+ """
+ name = 'SPARQL'
+ aliases = ['sparql']
+ filenames = ['*.sparql']
+ mimetypes = ['text/x-sql']
+ flags = re.IGNORECASE
+ tokens = {
+ 'comments': [
+ (r'(\s*#.*)', Comment)
+ ],
+ 'root': [
+ include('comments'),
+ (r'(\s*(?:PREFIX|BASE)\s+)(\w*:\w*)?(\s*<[^> ]*>\s*)',
+ bygroups(Keyword,Name.Variable,Name.Namespace)),
+ (r'(\s*#.*)', Comment),
+ (r'((?:SELECT|ASK|CONSTRUCT|DESCRIBE)\s*(?:DISTINCT|REDUCED)?\s*)((?:\?[a-zA-Z0-9_-]+\s*)+|\*)(\s*)',
+ bygroups(Keyword,Name.Variable,Text)),
+ (r'(FROM\s*(?:NAMED)?)(\s*.*)', bygroups(Keyword,Text)),
+ (r'(WHERE)?\s*({)', bygroups(Keyword,Text), 'graph'),
+ (r'(LIMIT|OFFSET)(\s*[+-]?[0-9]+)',
+ bygroups(Keyword,Literal.String)),
+ ],
+ 'graph':[
+ (r'\s*(<[^>]*\>)', Name.Class, ('triple','predObj')),
+ (r'(\s*[a-zA-Z_0-9\-]*:[a-zA-Z0-9\-_]*\s)',
+ Name.Class, ('triple','predObj')),
+ (r'(\s*\?[a-zA-Z0-9_-]*)', Name.Variable, ('triple','predObj')),
+ (r'\s*\[\]\s*', Name.Class, ('triple','predObj')),
+ (r'\s*(FILTER\s*)((?:regex)?\()',bygroups(Keyword,Text),'filterExp'),
+ (r'\s*}', Text, '#pop'),
+ ],
+ 'triple' : [
+ (r'(?=\s*})', Text, '#pop'),
+ (r'\s*\.\s*', Text, '#pop'),
+ ],
+ 'predObj': [
+ include('comments'),
+ (r'(\s*\?[a-zA-Z0-9_-]*\b\s*)', Name.Variable,'object'),
+ (r'(\s*[a-zA-Z_:][a-zA-Z0-9\-_:]*\b\s*)', Operator, 'object'),
+ (r'\s*(<[^>]*\>)', Operator, 'object'),
+ (r'\s*\]\s*', Text, '#pop'),
+ (r'(?=\s*\.\s*)', Keyword, '#pop'),
+ ],
+ 'objList': [
+ (r'\s*\)', Text, '#pop'),
+ include('object'),
+ ],
+ 'object': [
+ include('variable'),
+ (r'\s*\[', Text, 'predObj'),
+ (r'\s*<[^> ]*>', Name.Attribute),
+ (r'\s*("""(?:.|\n)*?""")(\@[a-z]{2-4}|\^\^[a-zA-Z0-9\-\:_#/\.]*>?)?\s*', bygroups(Literal.String,Text)),
+ (r'\s*".*?[^\\]"(?:\@[a-z]{2-4}|\^\^[a-zA-Z0-9\-\:_#/\.]*>?)?\s*', Literal.String),
+ (r'\s*[a-zA-Z0-9\-_\:]\s*', Name.Attribute),
+ (r'\s*\(', Text, 'objList'),
+ (r'\s*;\s*', Text, '#pop'),
+ (r'(?=\])', Text, '#pop'),
+ (r'(?=\.)', Text, '#pop'),
+ ],
+ 'variable':[
+ (r'(\?[a-zA-Z0-9\-_]+\s*)', Name.Variable),
+ ],
+ 'filterExp':[
+ include('variable'),
+ include('object'),
+ (r'\s*[+*/<>=~!%&|-]+\s*', Operator),
+ (r'\s*\)', Text, '#pop'),
+ ],
+
+ }
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/model.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/model.py
new file mode 100644
index 0000000000000000000000000000000000000000..9b935b51461221f7a942e56d407c20f75a390272
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/model.py
@@ -0,0 +1,83 @@
+
+'''
+
+Model of a DOAP profile using RDFAlchemy
+
+'''
+
+from rdfalchemy import rdfSubject, rdfSingle, rdfMultiple
+from rdfalchemy.orm import mapper
+from rdflib import Namespace
+
+DOAP = Namespace("http://usefulinc.com/ns/doap#")
+FOAF = Namespace("http://xmlns.com/foaf/0.1/")
+DC = Namespace("http://purl.org/dc/elements/1.1/")
+
+
+class Project(rdfSubject):
+
+ """
+ DOAP Project Class
+ """
+
+ rdf_type = DOAP.Project
+
+ category = rdfMultiple(DOAP.category)
+ created = rdfSingle(DOAP.created)
+ shortname = rdfSingle(DOAP.shortname)
+ description = rdfMultiple(DOAP.description)
+ bug_database = rdfSingle(DOAP['bug-database'])
+ developer = rdfMultiple(DOAP.developer, range_type=FOAF.Person)
+ documenter = rdfMultiple(DOAP.documenter, range_type=FOAF.Person)
+ download_mirror = rdfMultiple(DOAP['downoad-mirror'])
+ download_page = rdfSingle(DOAP['download-page'])
+ helper = rdfMultiple(DOAP.helper, range_type=FOAF.Person)
+ homepage = rdfSingle(DOAP.homepage)
+ license = rdfMultiple(DOAP['license'])
+ maintainer = rdfMultiple(DOAP.maintainer, range_type=FOAF.Person)
+ developer = rdfMultiple(DOAP.developer, range_type=FOAF.Person)
+ translator = rdfMultiple(DOAP.translator, range_type=FOAF.Person)
+ helper = rdfMultiple(DOAP.helper, range_type=FOAF.Person)
+ tester = rdfMultiple(DOAP.tester, range_type=FOAF.Person)
+ documenter = rdfMultiple(DOAP.documenter, range_type=FOAF.Person)
+ module = rdfSingle(DOAP.module)
+ name = rdfSingle(DOAP.name)
+ old_homepage = rdfMultiple(DOAP['old-homepage'])
+ programming_language = rdfMultiple(DOAP['programming-language'])
+ releases = rdfMultiple(DOAP.release, range_type=DOAP.Version)
+ svn_repository = rdfSingle(DOAP.repository, 'svn_repository',
+ range_type=DOAP.SVNRepository)
+ cvs_repository = rdfSingle(DOAP.repository, 'cvs_repository',
+ range_type=DOAP.CVSRepository)
+ oper_sys = rdfMultiple(DOAP['os'])
+ screenshots = rdfMultiple(DOAP.screenshots)
+ shortdesc = rdfMultiple(DOAP.shortdesc)
+ tester = rdfMultiple(DOAP.tester, range_type=FOAF.Person)
+ translator = rdfMultiple(DOAP.translator, range_type=FOAF.Person)
+ wiki = rdfMultiple(DOAP.wiki)
+
+class Release(rdfSubject):
+ """A release class"""
+ rdf_type = DOAP.Version
+ revision = rdfSingle(DOAP.revision)
+ name = rdfSingle(DOAP.name)
+ created = rdfSingle(DOAP.created)
+ changelog = rdfSingle(DC.description)
+ file_releases = rdfMultiple(DOAP['file-release'])
+
+class SVNRepository(rdfSubject):
+ """Subversion repository classs"""
+ rdf_type = DOAP.SVNRepository
+ location = rdfSingle(DOAP.location)
+ svn_browse = rdfSingle(DOAP.browse)
+
+class CVSRepository(rdfSubject):
+ """CVS repository class"""
+ rdf_type = DOAP.CVSRepository
+ anon_root = rdfSingle(DOAP['anon-root'])
+ cvs_browse = rdfSingle(DOAP.browse)
+ module = rdfSingle(DOAP.module)
+
+
+mapper(Project, Release, CVSRepository, SVNRepository)
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/__init__.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..35ae07c88a703473dd933ed7b2f92b16774366b8
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/__init__.py
@@ -0,0 +1,144 @@
+
+# pylint: disable-msg=W0142,C0103
+
+
+"""
+Writing Plugins
+===============
+
+doapfiend supports setuptools_ entry point plugins.
+
+There are two basic rules for plugins:
+
+ - Plugin classes should subclass `doapfiend.plugins.Plugin`_.
+ - Plugins may implement any of the methods described in the class
+ PluginInterface in doapfiend.plugins.base. Please note that this class is for
+ documentary purposes only; plugins may not subclass PluginInterface.
+
+Setuptools: http://peak.telecommunity.com/DevCenter/setuptools
+Doapfiend Plugins: http://trac.doapspace.org/doapfiend/wiki/DoapfiendPlugins
+
+Registering
+-----------
+
+For doapfiend to find a plugin, it must be part of a package that uses
+setuptools, and the plugin must be included in the entry points defined
+in the setup.py for the package::
+
+ setup(name='Some plugin',
+ ...
+ entry_points = {
+ 'doapfiend.plugins': [
+ 'someplugin = someplugin:SomePlugin'
+ ]
+ },
+ ...
+ )
+
+Once the package is installed with install or develop, doapfiend will be able
+to load the plugin.
+
+Defining options
+----------------
+
+All plugins must implement the methods ``add_options(self, parser, env)``
+and ``configure(self, options, conf)``. Subclasses of doapfiend.plugins.Plugin
+that want the standard options should call the superclass methods.
+
+doapfiend uses optparse.OptionParser from the standard library to parse
+arguments. A plugin's ``add_options()`` method receives a parser
+instance. It's good form for a plugin to use that instance only to add
+additional arguments that take only long arguments (--like-this). Most
+of doapfiend's built-in arguments get their default value from an environment
+variable. This is a good practice because it allows options to be
+utilized when run through some other means than the doapfiendtests script.
+
+A plugin's ``configure()`` method receives the parsed ``OptionParser`` options
+object, as well as the current config object. Plugins should configure their
+behavior based on the user-selected settings, and may raise exceptions
+if the configured behavior is nonsensical.
+
+Logging
+-------
+
+doapfiend uses the logging classes from the standard library. To enable users
+to view debug messages easily, plugins should use ``logging.getLogger()`` to
+acquire a logger in the ``doapfiend.plugins`` namespace.
+
+"""
+
+import logging
+import pkg_resources
+from warnings import warn
+from inspect import isclass
+from doapfiend.plugins.base import Plugin
+
+LOG = logging.getLogger('doapfiend')
+
+# +==== IMPORTANT ====+
+#If you add any builtin plugins in doapfiend.plugins you must add them
+#to this list for them to be loaded. It's okay to add other Python modules
+#in the doapfiend.plugins namespace, but they won't be recognized as a plugin
+#unless listed here:
+
+builtin_plugins = ['url', 'homepage', 'n3', 'xml', 'text', 'sourceforge',
+ 'pypi', 'freshmeat', 'ohloh', 'fields']
+
+def call_plugins(plugins, method, *arg, **kw):
+ """Call all method on plugins in list, that define it, with provided
+ arguments. The first response that is not None is returned.
+ """
+ for plug in plugins:
+ func = getattr(plug, method, None)
+ if func is None:
+ continue
+ LOG.debug("call plugin %s: %s", plug.name, method)
+ result = func(*arg, **kw)
+ if result is not None:
+ return result
+ return None
+
+def load_plugins(builtin=True, others=True):
+ """Load plugins, either builtin, others, or both.
+ """
+ loaded = []
+ if builtin:
+ for name in builtin_plugins:
+ try:
+ parent = __import__(__name__, globals(), locals(), [name])
+ #print name
+ pmod = getattr(parent, name)
+ for entry in dir(pmod):
+ obj = getattr(pmod, entry)
+ if (isclass(obj)
+ and issubclass(obj, Plugin)
+ and obj is not Plugin
+ and not obj in loaded):
+ #LOG.debug("load builtin plugin %s (%s)" % (name, obj))
+ #print "load builtin plugin %s (%s)" % (name, obj)
+ yield obj
+ loaded.append(obj)
+ except KeyboardInterrupt:
+ raise
+ except Exception, e:
+ warn("Unable to load builtin plugin %s: %s" % (name, e),
+ RuntimeWarning)
+ for entry_point in pkg_resources.iter_entry_points('doapfiend.plugins'):
+ LOG.debug("load plugin %s" % entry_point)
+ try:
+ plugin = entry_point.load()
+ except KeyboardInterrupt:
+ raise
+ except Exception, err_msg:
+ # never want a plugin load to exit doapfiend
+ # but we can't log here because the logger is not yet
+ # configured
+ warn("Unable to load plugin %s: %s" % \
+ (entry_point, err_msg), RuntimeWarning)
+ continue
+ if plugin.__module__.startswith('doapfiend.plugins'):
+ if builtin:
+ yield plugin
+ elif others:
+ yield plugin
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/base.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/base.py
new file mode 100644
index 0000000000000000000000000000000000000000..3cd03d4a08557f80b6897580043f346d68e4db7a
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/base.py
@@ -0,0 +1,78 @@
+
+# pylint: disable-msg=W0201,W0511
+#XXX Attribute 'conf' defined outside __init__
+
+"""
+
+Base plugin class
+=================
+
+All plugins should inherit doapfiend.plugins.Plugin
+
+"""
+
+
+import textwrap
+
+class Plugin(object):
+ """Base class for doapfiend plugins. It's not necessary to subclass this
+ class to create a plugin; however, all plugins must implement
+ `add_options(self, parser)` and `configure(self, options,
+ conf)`, and must have the attributes `enabled` and `name`.
+
+ Plugins should not be enabled by default.
+
+ Subclassing Plugin will give your plugin some friendly default
+ behavior:
+
+ - A --with-$name option will be added to the command line
+ interface to enable the plugin. The plugin class's docstring
+ will be used as the help for this option.
+ - The plugin will not be enabled unless this option is selected by
+ the user.
+ """
+ enabled = False
+ enable_opt = None
+ name = None
+
+ def __init__(self):
+ self.conf = None
+ if self.name is None:
+ self.name = self.__class__.__name__.lower()
+ if self.enable_opt is None:
+ self.enable_opt = "enable_plugin_%s" % self.name
+
+ def add_options(self, parser):
+ """Add command-line options for this plugin.
+
+ The base plugin class adds --with-$name by default, used to enable the
+ plugin.
+ """
+ parser.add_option("--with-%s" % self.name,
+ action="store_true",
+ dest=self.enable_opt,
+ help="Enable plugin %s: %s" %
+ (self.__class__.__name__, self.help())
+ )
+
+ def configure(self, options, conf):
+ """Configure the plugin and system, based on selected options.
+
+ The base plugin class sets the plugin to enabled if the enable option
+ for the plugin (self.enable_opt) is true.
+ """
+ self.conf = conf
+ self.options = options
+ if hasattr(options, self.enable_opt):
+ self.enabled = getattr(options, self.enable_opt)
+
+ def help(self):
+ """Return help for this plugin. This will be output as the help
+ section of the --with-$name option that enables the plugin.
+ """
+ if self.__class__.__doc__:
+ # doc sections are often indented; compress the spaces
+ return textwrap.dedent(self.__class__.__doc__)
+ return "(no help available)"
+
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/fields.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/fields.py
new file mode 100644
index 0000000000000000000000000000000000000000..d462437a14a4e43c753571a6f0783aa429bc001e
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/fields.py
@@ -0,0 +1,215 @@
+#!/usr/bin/env python
+
+# pylint: disable-msg=W0221,R0201
+"""
+
+Plain text serializer
+=====================
+
+This plugin outputs DOAP in human-readable plain text
+
+"""
+
+__docformat__ = 'epytext'
+
+import logging
+
+from rdflib import Namespace
+from rdfalchemy import rdfSubject
+
+from doapfiend.plugins.base import Plugin
+from doapfiend.utils import COLOR
+from doapfiend.doaplib import load_graph
+
+
+FOAF = Namespace("http://xmlns.com/foaf/0.1/")
+
+LOG = logging.getLogger('doapfiend')
+
+
+class OutputPlugin(Plugin):
+
+ """Class for formatting DOAP output"""
+
+ #This will be the opt_parser option (--fields)
+ name = "fields"
+ enabled = False
+ enable_opt = name
+
+ def __init__(self):
+ '''Setup Plain Text OutputPlugin class'''
+ super(OutputPlugin, self).__init__()
+ self.options = None
+
+ def add_options(self, parser, output, search):
+ """Add plugin's options to doapfiend's opt parser"""
+ output.add_option('--%s' % self.name,
+ action='store',
+ dest=self.enable_opt,
+ help='Output specific DOAP fields as plain text')
+ return parser, output, search
+
+ def serialize(self, doap_xml, color=False):
+ '''
+ Serialize RDF/XML DOAP as N3 syntax
+
+ @param doap_xml: DOAP in RDF/XML serialization
+ @type doap_xml: string
+
+ @rtype: unicode
+ @return: DOAP in plain text
+ '''
+ if hasattr(self.options, 'no_color'):
+ color = not self.options.no_color
+ if not color:
+ #This has already been done if we're called from cli.py
+ #Fix me: Need to think on this.
+ for this in COLOR:
+ COLOR[this] = '\x1b[0m'
+
+ if hasattr(self.options, 'quiet'):
+ brief = self.options.quiet
+ else:
+ brief = False
+
+ doap = load_graph(doap_xml)
+ fields = self.options.fields.split(',')
+ #print fields
+ out = ''
+ for field in fields:
+ if '-' in field:
+ field = field.replace('-', '_')
+ field = field.strip()
+ if '.' in field:
+ repo, field = field.split('.')
+ text = print_repos(doap, repo, field)
+ elif field == 'releases':
+ text = get_releases(doap, brief)
+ elif field in ['maintainer', 'developer', 'documenter', 'helper',
+ 'tester', 'translator']:
+ text = get_people(doap, field)
+ else:
+ try:
+ text = getattr(doap, field)
+ except AttributeError:
+ LOG.warn("No such attribute: %s" % field)
+ text = None
+ if not text:
+ continue
+ if isinstance(text, list):
+ text = print_list(doap, field)
+ else:
+ text = print_field(doap, field)
+ out += text + '\n'
+ return out.rstrip()
+
+def print_list(doap, field):
+ '''
+ Print list of DOAP attributes
+
+ @param doap: DOAP in RDF/XML
+ @type doap: text
+
+ @param field: DOAP attribute to be printed
+ @type field: text
+
+ @rtype: text
+ @returns: Field to be printed
+ '''
+ #Can have multiple values per attribute
+ text = ""
+ for thing in getattr(doap, field):
+ if isinstance(thing, rdfSubject):
+ text += thing.resUri
+ else:
+ #unicode object
+ thing = thing.strip()
+ text += thing
+ return text
+
+def print_field(doap, field):
+ '''
+ Print single field
+
+ @param doap: DOAP in RDF/XML
+ @type doap: text
+
+ @param field: DOAP attribute to be printed
+ @type field: text
+
+ @rtype: text
+ @returns: Field to be printed
+ '''
+ text = getattr(doap, field)
+ if isinstance(text, rdfSubject):
+ return text.resUri.strip()
+ else:
+ return text.strip()
+
+def print_repos(doap, repo, field):
+ '''Prints DOAP repository metadata'''
+ if repo == 'cvs':
+ if hasattr(doap.cvs_repository, field):
+ return getattr(doap.cvs_repository, field)
+
+ if repo == 'svn':
+ if field == 'browse':
+ field = 'svn_browse'
+ if hasattr(doap.svn_repository, field):
+ text = getattr(doap.svn_repository, field)
+ if text:
+ if isinstance(text, rdfSubject):
+ return text.resUri
+ else:
+ return text.strip()
+ return ''
+
+def get_people(doap, job):
+ '''Print people for a particular job '''
+ out = ''
+ if hasattr(doap, job):
+ attribs = getattr(doap, job)
+ if len(attribs) > 0:
+ peeps = []
+ for attr in attribs:
+ if attr[FOAF.mbox] is None:
+ person = "%s" % attr[FOAF.name]
+ else:
+ mbox = attr[FOAF.mbox].resUri
+ if mbox.startswith('mailto:'):
+ mbox = mbox[7:]
+ person = "%s <%s>" % (attr[FOAF.name], mbox)
+ else:
+ LOG.debug("mbox is invalid: %s" % mbox)
+ person = "%s" % attr[FOAF.name]
+ peeps.append(person)
+ out += ", ".join([p for p in peeps])
+ return out
+
+
+def get_releases(doap, brief=False):
+ '''Print DOAP package release metadata'''
+ out = ''
+ if hasattr(doap, 'releases') and len(doap.releases) != 0:
+ if not brief:
+ out += COLOR['bold'] + "Releases:" + COLOR['normal'] + '\n'
+ for release in doap.releases:
+ if release.name:
+ out += COLOR['bold'] + COLOR['cyan'] + release.name + \
+ COLOR['normal'] + '\n'
+ if hasattr(release, 'created') and release.created is not None:
+ created = release.created
+ else:
+ created = ''
+ out += COLOR['cyan'] + ' ' + release.revision + ' ' + \
+ COLOR['normal'] + created + '\n'
+ if not brief:
+ if hasattr(release, 'changelog'):
+ if release.changelog:
+ out += COLOR['yellow'] + release.changelog + \
+ COLOR['normal'] + '\n'
+
+ for frel in release.file_releases:
+ out += ' %s' % frel.resUri + '\n'
+ return out
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/freshmeat.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/freshmeat.py
new file mode 100644
index 0000000000000000000000000000000000000000..d79a768c9fda03ed83c22be8d4a778211bafac27
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/freshmeat.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+# pylint: disable-msg=W0221,R0201
+
+"""
+
+freshmeat
+=========
+
+Currently this plugin uses http://doapspace.org/ to fetch DOAP for Freshmeat
+
+
+"""
+
+__docformat__ = 'epytext'
+
+
+from doapfiend.utils import NotFoundError
+from doapfiend.plugins.base import Plugin
+from doapfiend.plugins.pkg_index import get_by_pkg_index
+
+
+class FreshmeatPlugin(Plugin):
+
+ """Get DOAP from Freshmeat package index"""
+
+ #This will be the opt_parser option (--fm) in the output group
+ name = "fm"
+ enabled = False
+ enable_opt = name
+
+ def __init__(self):
+ '''Setup RDF/XML OutputPlugin class'''
+ super(FreshmeatPlugin, self).__init__()
+ self.options = None
+ self.query = None
+
+ def add_options(self, parser, output, search):
+ """Add plugin's options to doapfiend's opt parser"""
+ search.add_option('--%s' % self.name,
+ action='store',
+ dest=self.enable_opt,
+ help='Get DOAP by its Freshmeat project name.',
+ metavar='PROJECT_NAME')
+ return parser, output, search
+
+ def search(self, proxy=None):
+ '''
+ Get Freshmeat DOAP
+
+ @param proxy: URL of optional HTTP proxy
+ @type proxy: string
+
+ @rtype: unicode
+ @returns: Single DOAP
+
+ '''
+ if hasattr(self.options, self.name):
+ self.query = getattr(self.options, self.name)
+ #Else self.query was set directly, someone not using the CLI
+ try:
+ return get_by_pkg_index(self.name, self.query, proxy)
+ except NotFoundError:
+ print "Not found: %s" % self.query
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/homepage.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/homepage.py
new file mode 100644
index 0000000000000000000000000000000000000000..91b710288b5be9b5e56017f6c99e020e34692495
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/homepage.py
@@ -0,0 +1,84 @@
+#!/usr/bin/env python
+
+# pylint: disable-msg=W0221,R0201
+
+"""
+
+homepage
+========
+
+Fetches DOAP by searching doapspace.org by a project's homepage.
+
+"""
+
+__docformat__ = 'epytext'
+
+import logging
+
+from doapfiend.plugins.base import Plugin
+from doapfiend.doaplib import fetch_doap, query_by_homepage
+
+LOG = logging.getLogger("doapfiend")
+
+class OutputPlugin(Plugin):
+
+ """Class for formatting DOAP output"""
+
+ #This will be the opt_parser option (--xml) in the output group
+ name = "homepage"
+ enabled = False
+ enable_opt = name
+
+ def __init__(self):
+ '''Setup RDF/XML OutputPlugin class'''
+ super(OutputPlugin, self).__init__()
+ self.options = None
+
+ def add_options(self, parser, output, search):
+ """Add plugin's options to doapfiend's opt parser"""
+ search.add_option('-o', '--%s' % self.name,
+ action='store',
+ dest=self.enable_opt,
+ help="Search for DOAP by a project's homepage",
+ metavar='HOMEPAGE_URL')
+ return parser, output, search
+
+ def search(self):
+ '''
+ Get DOAP given a project's homepage
+
+ @rtype: unicode
+ @return: DOAP
+ '''
+ return do_search(self.options.homepage)
+
+def do_search(homepage):
+ '''
+ Get DOAP given a project's homepage
+
+ @param homepage: Project homepage URL
+
+ @rtype: unicode
+ @return: DOAP
+ '''
+ resp = query_by_homepage(homepage)
+ LOG.debug(resp)
+ if len(resp) == 0:
+ LOG.error("Not found: %s" % homepage)
+ return
+ elif len(resp) == 1:
+ url = resp[0][1]
+ else:
+ #Multiple, send warning and use first 'external' if any
+ LOG.warn("Warning: Multiple DOAP found.")
+ url = None
+ for this in resp:
+ LOG.warn(this)
+ if not url:
+ #Keep first one if there is no external DOAP
+ url = this[1]
+ if this[0] == 'ex':
+ url = this[1]
+ LOG.warn("Using %s" % url)
+ return fetch_doap(url)
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/n3.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/n3.py
new file mode 100644
index 0000000000000000000000000000000000000000..88e25a2f129dbf6e1611b4ee4a80ec04bb6fb6fc
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/n3.py
@@ -0,0 +1,91 @@
+#!/usr/bin/env python
+
+# pylint: disable-msg=W0221,R0201
+
+"""
+
+Serializer for N3 (Notation 3)
+==============================
+
+This is a plugin for formatting DOAP output as N3 (Notation 3) syntax.
+
+"""
+
+__docformat__ = 'epytext'
+
+import logging
+from cStringIO import StringIO
+
+from rdflib import ConjunctiveGraph
+
+from doapfiend.plugins.base import Plugin
+
+LOG = logging.getLogger(__name__)
+
+
+def get_n3(xml_text, color=False):
+ '''
+ Return N3 (Notation 3) text
+ Note: Returns string for non-color and unicode for colored text
+
+ @param xml_text: XML/RDF
+ @type xml_text: string
+
+ @rtype: unicode or string
+ @return: DOAP in Notation 3
+ '''
+ store = ConjunctiveGraph()
+ graph = store.parse(StringIO(xml_text), publicID=None, format="xml")
+ notation3 = graph.serialize(format="n3")
+
+ if color:
+ #pygments plugin fools pylint
+ # pylint: disable-msg=E0611
+ try:
+ from pygments import highlight
+ from doapfiend.lexers import Notation3Lexer
+ from pygments.formatters import TerminalFormatter
+ except ImportError:
+ return notation3
+ return highlight(notation3,
+ Notation3Lexer(),
+ TerminalFormatter(full=False))
+ else:
+ return notation3
+
+class OutputPlugin(Plugin):
+
+ """Class for formatting DOAP output"""
+
+ #This will be the opt_parser option (--n3)
+ name = "n3"
+ enabled = False
+ enable_opt = None
+
+ def __init__(self):
+ '''Setup N3 OutputPlugin class'''
+ super(OutputPlugin, self).__init__()
+ self.options = None
+
+ def serialize(self, doap_xml, color=False):
+ '''
+ Serialize RDF/XML DOAP as N3 syntax
+
+ @param doap_xml: DOAP in RDF/XML serialization
+ @type doap_xml: string
+
+ @rtype: unicode
+ @return: DOAP in Notation 3
+ '''
+ if hasattr(self, 'options') and hasattr(self.options, 'no_color'):
+ color = not self.options.no_color
+ return get_n3(doap_xml, color)
+
+ def add_options(self, parser, output, search):
+ """Add plugin's options to doapfiend's opt parser"""
+ output.add_option('-n', '--%s' % self.name,
+ action='store_true',
+ dest=self.enable_opt,
+ help='Output DOAP as Notation 3')
+ return parser, output, search
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/ohloh.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/ohloh.py
new file mode 100644
index 0000000000000000000000000000000000000000..64a25f9985ecfe73303acfd2cc14863073355223
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/ohloh.py
@@ -0,0 +1,66 @@
+#!/usr/bin/env python
+
+# pylint: disable-msg=W0221,R0201
+
+"""
+
+ohloh
+=====
+
+This plugin uses http://rdfohloh.wikier.org/ to fetch DOAP for
+projects listed on Ohlohh.
+
+
+"""
+
+__docformat__ = 'epytext'
+
+
+from doapfiend.utils import NotFoundError
+from doapfiend.plugins.base import Plugin
+from doapfiend.plugins.pkg_index import get_by_pkg_index
+
+
+class OhlohPlugin(Plugin):
+
+ """Get DOAP from Ohloh package index"""
+
+ #This will be the opt_parser option (--oh) in the output group
+ name = "oh"
+ enabled = False
+ enable_opt = name
+
+ def __init__(self):
+ '''Setup RDF/XML OutputPlugin class'''
+ super(OhlohPlugin, self).__init__()
+ self.options = None
+ self.query = None
+
+ def add_options(self, parser, output, search):
+ """Add plugin's options to doapfiend's opt parser"""
+ search.add_option('--%s' % self.name,
+ action='store',
+ dest=self.enable_opt,
+ help='Get DOAP by its Ohloh project name or id #.',
+ metavar='PROJECT_NAME')
+ return parser, output, search
+
+ def search(self, proxy=None):
+ '''
+ Get Ohloh DOAP
+
+ @param proxy: Option HTTP proxy URL
+ @type proxy: string
+
+ @rtype: unicode
+ @returns: Single DOAP
+
+ '''
+ if hasattr(self.options, self.name):
+ self.query = getattr(self.options, self.name)
+ #Else self.query was set directly, someone not using the CLI
+ try:
+ return get_by_pkg_index(self.name, self.query, proxy)
+ except NotFoundError:
+ print "Not found: %s" % self.query
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/pkg_index.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/pkg_index.py
new file mode 100644
index 0000000000000000000000000000000000000000..49050e38b07a84b1f9dc2181ff43517ebb7947a8
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/pkg_index.py
@@ -0,0 +1,46 @@
+
+'''
+
+Plugin helper to fetch a single DOAP file from doapspace.org
+by Package Index
+
+'''
+
+from doapfiend.utils import fetch_file
+
+PKG_INDEX_URI = 'http://doapspace.org/doap'
+OHLOH_URI = 'http://rdfohloh.wikier.org/project/'
+
+
+def get_by_pkg_index(index, project_name, proxy=None):
+ '''
+ Get DOAP for a package index project name from doapspace.org
+
+ Builtin indexes:
+
+ - 'sf' SourceForge
+ - 'fm' Freshmeat
+ - 'py' Python Package Index
+ - 'oh' Project listed on Ohlo
+
+ Raises doaplib.utils.NotFound exception on HTTP 404 error
+
+ @param index: Package index two letter abbreviation
+ @type index: string
+
+ @param project_name: project name
+ @type project_name: string
+
+ @param proxy: Optional HTTP proxy URL
+ @type proxy: string
+
+ @rtype: string
+ @return: text of file retrieved
+
+ '''
+ if index == 'oh':
+ url = '%s/%s/rdf' % (OHLOH_URI, project_name)
+ else:
+ url = '%s/%s/%s' % (PKG_INDEX_URI, index, project_name)
+ return fetch_file(url, proxy)
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/pypi.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/pypi.py
new file mode 100644
index 0000000000000000000000000000000000000000..5f0f4b17c5f1722cb5e29a9eaf0dbd7856b6dd83
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/pypi.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+# pylint: disable-msg=W0221,R0201
+
+"""
+pypi
+====
+
+Currently this plugin uses http://doapspace.org/ to fetch DOAP for PyPI
+(The Python Package Index)
+
+"""
+
+__docformat__ = 'epytext'
+
+
+from doapfiend.utils import NotFoundError
+from doapfiend.plugins.base import Plugin
+from doapfiend.plugins.pkg_index import get_by_pkg_index
+
+
+class PyPIPlugin(Plugin):
+
+ """Get DOAP from PyPI package index"""
+
+ #This will be the opt_parser option (--py) in the output group
+ name = 'py'
+ enabled = False
+ enable_opt = name
+
+ def __init__(self):
+ '''Setup RDF/XML OutputPlugin class'''
+ super(PyPIPlugin, self).__init__()
+ self.options = None
+ self.query = None
+
+ def add_options(self, parser, output, search):
+ """Add plugin's options to doapfiend's opt parser"""
+ search.add_option('--%s' % self.name,
+ action='store',
+ dest=self.enable_opt,
+ help='Get DOAP by its PyPI project name.',
+ metavar='PROJECT_NAME')
+ return parser, output, search
+
+ def search(self, proxy=None):
+ '''
+ Get PyPI DOAP
+
+ @param proxy: URL of optional HTTP proxy
+ @type proxy: string
+
+ @rtype: unicode
+ @returns: Single DOAP
+
+ '''
+ if hasattr(self.options, self.name):
+ self.query = getattr(self.options, self.name)
+ #Else self.query was set directly, someone not using the CLI
+ try:
+ return get_by_pkg_index(self.name, self.query, proxy)
+ except NotFoundError:
+ print "Not found: %s" % self.query
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/sourceforge.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/sourceforge.py
new file mode 100644
index 0000000000000000000000000000000000000000..09b063287126af1fa3ae8d43d0fec80ab04b6937
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/sourceforge.py
@@ -0,0 +1,65 @@
+#!/usr/bin/env python
+
+# pylint: disable-msg=W0221,R0201
+
+"""
+
+sourceforge
+===========
+
+Currently this plugin uses http://doapspace.org/ to fetch DOAP for SourceForge
+
+
+"""
+
+__docformat__ = 'epytext'
+
+
+from doapfiend.utils import NotFoundError
+from doapfiend.plugins.base import Plugin
+from doapfiend.plugins.pkg_index import get_by_pkg_index
+
+
+class SourceForgePlugin(Plugin):
+
+ """Get DOAP from SourceForge package index"""
+
+ #This will be the opt_parser option (--sf) in the output group
+ name = "sf"
+ enabled = False
+ enable_opt = name
+
+ def __init__(self):
+ '''Setup RDF/XML OutputPlugin class'''
+ super(SourceForgePlugin, self).__init__()
+ self.options = None
+ self.query = None
+
+ def add_options(self, parser, output, search):
+ """Add plugin's options to doapfiend's opt parser"""
+ search.add_option('--%s' % self.name,
+ action='store',
+ dest=self.enable_opt,
+ help='Get DOAP by its SourceForge project name.',
+ metavar='PROJECT_NAME')
+ return parser, output, search
+
+ def search(self, proxy=None):
+ '''
+ Get SourceForge DOAP
+
+ @param proxy: Option HTTP proxy URL
+ @type proxy: string
+
+ @rtype: unicode
+ @returns: Single DOAP
+
+ '''
+ if hasattr(self.options, self.name):
+ self.query = getattr(self.options, self.name)
+ #Else self.query was set directly, someone not using the CLI
+ try:
+ return get_by_pkg_index(self.name, self.query, proxy)
+ except NotFoundError:
+ print "Not found: %s" % self.query
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/text.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/text.py
new file mode 100644
index 0000000000000000000000000000000000000000..83e6b48cdec45360fd97aaf99319e549514b41c3
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/text.py
@@ -0,0 +1,290 @@
+#!/usr/bin/env python
+
+# pylint: disable-msg=W0221,R0201
+"""
+
+Plain text serializer
+=====================
+
+This plugin outputs DOAP in human-readable plain text
+
+"""
+
+__docformat__ = 'epytext'
+
+import logging
+import textwrap
+from cStringIO import StringIO
+
+from rdflib import Namespace
+from rdfalchemy import rdfSubject
+
+from doapfiend.plugins.base import Plugin
+from doapfiend.utils import COLOR
+from doapfiend.doaplib import load_graph
+
+
+FOAF = Namespace("http://xmlns.com/foaf/0.1/")
+
+LOG = logging.getLogger(__name__)
+
+
+class OutputPlugin(Plugin):
+
+ """Class for formatting DOAP output"""
+
+ #This will be the opt_parser option (--text)
+ name = "text"
+ enabled = False
+ enable_opt = None
+
+ def __init__(self):
+ '''Setup Plain Text OutputPlugin class'''
+ super(OutputPlugin, self).__init__()
+ self.options = None
+
+ def add_options(self, parser, output, search):
+ """Add plugin's options to doapfiend's opt parser"""
+ output.add_option('--%s' % self.name,
+ action='store_true',
+ dest=self.enable_opt,
+ help='Output DOAP as plain text (Default)')
+ return parser, output, search
+
+ def serialize(self, doap_xml, color=False):
+ '''
+ Serialize RDF/XML DOAP as plain text
+
+ @param doap_xml: DOAP in RDF/XML serialization
+ @type doap_xml: string
+
+ @rtype: unicode
+ @return: DOAP in plain text
+ '''
+ if hasattr(self.options, 'no_color'):
+ color = not self.options.no_color
+ if not color:
+ #This has already been done if we're called from cli.py
+ #Fix me: Need to think on this.
+ for this in COLOR:
+ COLOR[this] = '\x1b[0m'
+ if hasattr(self.options, 'quiet'):
+ brief = self.options.quiet
+ else:
+ brief = False
+
+ printer = DoapPrinter(load_graph(doap_xml, get_list=True), brief, color)
+ return printer.print_doap()
+
+
+class DoapPrinter(object):
+
+ '''Prints DOAP in human readable text'''
+
+ def __init__(self, doap, brief=False, color=False):
+ '''Initialize attributes'''
+ self.brief = brief
+ self.doap_list = doap
+ self.doap = None
+ self.text = StringIO()
+ self.color = color
+
+ def write(self, text):
+ '''
+ Write to DOAP output file object
+ '''
+ self.text.write(text.encode('utf-8') + '\n')
+
+ def print_doap(self):
+ '''
+ Serialize DOAP in human readable text, optionally colorized
+
+ @rtype: unicode
+ @return: DOAP as plain text
+ '''
+ for doap in self.doap_list:
+ self.doap = doap
+ self.print_misc()
+ if self.brief:
+ return
+ self.print_people()
+ self.print_repos()
+ self.print_releases()
+ doap = self.text.getvalue()
+ self.text.close()
+ return doap
+
+ def print_misc(self):
+ '''Prints basic DOAP metadata'''
+ #We should be able to get this from model.py automatically,
+ #but this lets us print in the order we like.
+ #Maybe move this to that model.py so we don't forget to sync
+ #when the DOAP schema changes.
+ fields = ('name', 'shortname', 'homepage', 'shortdesc',
+ 'description', 'old_homepage', 'created',
+ 'download_mirror')
+
+ fields_verbose = ('license', 'programming_language',
+ 'bug_database', 'screenshots', 'oper_sys',
+ 'wiki', 'download_page', 'mailing_list')
+
+ for fld in fields:
+ self.print_field(fld)
+ if not self.brief:
+ for fld in fields_verbose:
+ self.print_field(fld)
+
+ def print_repos(self):
+ '''Prints DOAP repository metadata'''
+ if hasattr(self.doap.cvs_repository, 'module') and \
+ self.doap.cvs_repository.module is not None:
+ self.write(misc_field('CVS Module:',
+ self.doap.cvs_repository.module))
+ self.write(misc_field('CVS Anon:',
+ self.doap.cvs_repository.anon_root))
+ self.write(misc_field('CVS Browse:',
+ self.doap.cvs_repository.cvs_browse.resUri))
+
+ if hasattr(self.doap.svn_repository, 'location') and \
+ self.doap.svn_repository.location is not None:
+ self.write(misc_field('SVN Location:',
+ self.doap.svn_repository.location.resUri))
+
+ if hasattr(self.doap.svn_repository, 'svn_browse') and \
+ self.doap.svn_repository.svn_browse is not None:
+ self.write(misc_field('SVN Browse:',
+ self.doap.svn_repository.svn_browse.resUri))
+
+ def print_releases(self):
+ '''Print DOAP package release metadata'''
+ if hasattr(self.doap, 'releases') and len(self.doap.releases) != 0:
+ self.write(COLOR['bold'] + "Releases:" + COLOR['normal'])
+ for release in self.doap.releases:
+ if release.name:
+ self.write(COLOR['bold'] + COLOR['cyan'] + release.name + \
+ COLOR['normal'])
+ if hasattr(release, 'created') and release.created is not None:
+ created = release.created
+ else:
+ created = ''
+ self.write(COLOR['cyan'] + ' ' + release.revision + ' ' + \
+ COLOR['normal'] + created)
+ if hasattr(release, 'changelog'):
+ if release.changelog:
+ self.write(COLOR['yellow'] + \
+ release.changelog +
+ COLOR['normal']
+ )
+ for frel in release.file_releases:
+ self.write(' %s' % frel.resUri)
+
+ def print_people(self):
+ '''Print all people involved in the project'''
+ people = ['maintainer', 'developer', 'documenter', 'helper',
+ 'tester', 'translator']
+ for job in people:
+ if hasattr(self.doap, job):
+ attribs = getattr(self.doap, job)
+ if len(attribs) > 0:
+ peeps = []
+ for attr in attribs:
+ if attr[FOAF.mbox] is None:
+ person = "%s" % attr[FOAF.name]
+ else:
+ mbox = attr[FOAF.mbox].resUri
+ if mbox.startswith('mailto:'):
+ mbox = mbox[7:]
+ person = "%s <%s>" % (attr[FOAF.name], mbox)
+ else:
+ LOG.debug("mbox is invalid: %s" % mbox)
+ person = "%s" % attr[FOAF.name]
+ peeps.append(person)
+ label = job.capitalize() + "s:"
+ #label = label.ljust(13)
+ self.write(misc_field(label,
+ ", ".join([p for p in peeps])))
+
+ def print_field(self, name):
+ '''
+ Print a DOAP element
+
+ @param name: A misc DOAP element
+ @type name: string, list or RDFSubject
+
+ @rtype: None
+ @return: Nothing
+ '''
+ if not hasattr(self.doap, name):
+ return
+ attr = getattr(self.doap, name)
+ if attr is [] or attr is None:
+ return
+
+ label = '%s' % COLOR['bold'] + pretty_name(name) + \
+ COLOR['normal'] + ':'
+ label = label.ljust(21)
+ if isinstance(attr, list):
+ #Can have multiple values per attribute
+ text = ""
+ for thing in getattr(self.doap, name):
+ if isinstance(thing, rdfSubject):
+ text += thing.resUri + "\n"
+ else:
+ #unicode object
+ thing = thing.strip()
+ text += thing + "\n"
+ else:
+ text = getattr(self.doap, name)
+ if isinstance(text, rdfSubject):
+ text = text.resUri
+ else:
+ text = text.strip()
+ if text:
+ if text.startswith('http://'):
+ self.write('%s %s' % (label, text.strip()))
+ else:
+ self.write(textwrap.fill('%s %s' % (label, text),
+ initial_indent='',
+ subsequent_indent = ' '))
+
+
+def pretty_name(field):
+ """
+ Convert DOAP element name to pretty printable label
+ Shorten some labels for formatting purposes
+
+ @param field: Text to be formatted
+ @type field: C{string}
+
+ @return: formatted string
+ @rtype: string
+ """
+ if field == 'programming_language':
+ field = 'Prog. Lang.'
+ elif field == 'created':
+ field = 'DOAP Created'
+ else:
+ field = field.capitalize()
+ field = field.replace('_', ' ')
+ field = field.replace('-', ' ')
+ return field
+
+
+def misc_field(label, text):
+ '''
+ Print colorized and justified single label value pair
+
+ @param label: A label
+ @type label: string
+
+ @param text: Text to print
+ @type text: string
+
+ @rtype: string
+ @return: Colorized, left-justified text with label
+ '''
+ label = label.ljust(13)
+ label = COLOR['bold'] + label + COLOR['normal']
+ return '%s %s' % (label, text)
+
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/url.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/url.py
new file mode 100644
index 0000000000000000000000000000000000000000..43b55b919828c0be955a3c9d4345e4e00596d10b
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/url.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python
+
+# pylint: disable-msg=W0221,R0201
+
+"""
+
+url.py
+======
+
+This plugin loads DOAP by its URL or path to a filename.
+
+
+"""
+
+__docformat__ = 'epytext'
+
+
+from doapfiend.plugins.base import Plugin
+from doapfiend.utils import NotFoundError
+from doapfiend.doaplib import fetch_doap
+
+
+class UrlPlugin(Plugin):
+
+ """Class for formatting DOAP output"""
+
+ #This will be the opt_parser option (--url) in the 'search' group
+ name = 'url'
+ enabled = False
+ enable_opt = name
+
+ def __init__(self):
+ '''Setup RDF/XML OutputPlugin class'''
+ super(UrlPlugin, self).__init__()
+ self.options = None
+
+ def add_options(self, parser, output, search):
+ """Add plugin's options to doapfiend's opt parser"""
+ search.add_option('-u', '--%s' % self.name,
+ action='store',
+ dest=self.enable_opt,
+ help='Get DOAP by its URL or by filename.',
+ metavar='URL')
+ return parser, output, search
+
+ def search(self):
+ '''
+ Get DOAP by its URL or file path
+ This can be any RDF as long as it has the DOAP namespace.
+
+ @rtype: unicode
+ @return: DOAP
+ '''
+ try:
+ return fetch_doap(self.options.url, self.options.proxy)
+ except NotFoundError:
+ print "Not found: %s" % self.options.url
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/xml.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/xml.py
new file mode 100644
index 0000000000000000000000000000000000000000..4d084fb5c930df2c2b9592043439cbc9da260d84
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/plugins/xml.py
@@ -0,0 +1,80 @@
+#!/usr/bin/env python
+
+# pylint: disable-msg=W0221,R0201
+
+"""
+
+Serialize DOAP as XML/RDF
+=========================
+
+This plugin outputs DOAP in RDF/XML
+It basically does nothing because all DOAP today is in RDF/XML.
+In the future this may take N3, Turtle, RDFa etc. and convert it to RDF/XML.
+
+"""
+
+__docformat__ = 'epytext'
+
+from elementtree import ElementTree
+
+from doapfiend.plugins.base import Plugin
+
+
+class OutputPlugin(Plugin):
+
+ """Class for formatting DOAP output"""
+
+ #This will be the opt_parser option (--xml) in the output group
+ name = "xml"
+ enabled = False
+ enable_opt = None
+
+ def __init__(self):
+ '''Setup RDF/XML OutputPlugin class'''
+ super(OutputPlugin, self).__init__()
+ self.options = None
+
+ def add_options(self, parser, output, search):
+ """Add plugin's options to doapfiend's opt parser"""
+ output.add_option('-x', '--%s' % self.name,
+ action='store_true',
+ dest=self.enable_opt,
+ help='Output DOAP as RDF/XML')
+ return parser, output, search
+
+ def serialize(self, doap_xml, color=False):
+ '''
+ Serialize RDF/XML DOAP as N3 syntax
+
+ Since the only input we currently have is XML, all this really does
+ is parse the XML and raise an exception if it's invalid.
+ When we do content negotiation/accept N3 etc., this will serialize.
+
+ @param doap_xml: DOAP in RDF/XML serialization
+ @type doap_xml: string
+
+ @rtype: unicode
+ @returns: DOAP
+ '''
+ #This will raise ExpatError if we have invalid XML
+ #(from xml.parsers.expat import ExpatError)
+ #We could format/pretty print here but we just return exactly what
+ #was fetched.
+ ElementTree.fromstring(doap_xml)
+ if hasattr(self.options, 'no_color'):
+ color = not self.options.no_color
+ if color:
+ #pygments plugin fools pylint
+ # pylint: disable-msg=E0611
+ try:
+ from pygments import highlight
+ from pygments.lexers import XmlLexer
+ from pygments.formatters import TerminalFormatter
+ except ImportError:
+ return doap_xml
+ return highlight(doap_xml,
+ XmlLexer(),
+ TerminalFormatter(full=False))
+ else:
+ return doap_xml
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/utils.py b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..b727ae1a95fa533cd66910bbbea3d795b3c8cd52
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/build/lib.linux-x86_64-2.7/doapfiend/utils.py
@@ -0,0 +1,216 @@
+
+"""
+
+utils.py
+========
+
+Misc utilities for doapfiend
+----------------------------
+
+General purpose helper functions and classes for doapfiend
+You'll probably want to use doaplib for most cases.
+
+License: BSD-2
+
+"""
+
+#pylint: disable-msg=C0103
+
+import urllib
+import logging
+import urlparse
+from httplib import HTTPConnection
+from urllib2 import build_opener, HTTPError, ProxyHandler, URLError
+
+
+__docformat__ = 'epytext'
+
+LOG = logging.getLogger('doapfiend')
+
+COLOR = {'normal': "\033[0m",
+ 'bold': "\033[1m",
+ 'underline': "\033[4m",
+ 'blink': "\033[5m",
+ 'reverse': "\033[7m",
+ 'black': "\033[30m",
+ 'red': "\033[31m",
+ 'green': "\033[32m",
+ 'yellow': "\033[33m",
+ 'blue': "\033[34m",
+ 'magenta': "\033[35m",
+ 'cyan': "\033[36m",
+ 'white': "\033[37m"}
+
+
+class NotFoundError(Exception):
+
+ '''DOAP not found'''
+
+ #pylint: disable-msg=W0231
+ def __init__(self, err_msg):
+ '''Initialize attributes'''
+ self.err_msg = err_msg
+
+ def __str__(self):
+ return repr(self.err_msg)
+
+
+def http_filesize(url):
+ """
+ Get the size of file without downloading it.
+ bla bla bla
+ blaba
+
+ @param url: URL of file
+ @type url: string
+
+ @rtype: string
+ @return: Size of file
+
+ Usage:
+
+ >>> http_filesize('http://trac.doapspace.org/test_file.txt')
+ '160'
+ """
+
+ host, path = urlparse.urlsplit(url)[1:3]
+ if ':' in host:
+ # port specified, try to use it
+ host, port = host.split(':', 1)
+ try:
+ port = int(port)
+ except ValueError:
+ LOG.error('invalid port number %r' % port)
+ return False
+ else:
+ # no port specified, use default port
+ port = None
+ connection = HTTPConnection(host, port=port)
+ connection.request("HEAD", path)
+ resp = connection.getresponse()
+ return resp.getheader('content-length')
+
+
+def http_exists(url):
+ """
+ A quick way to check if a file exists on the web.
+
+ @param url: URL of the document
+ @type url: string
+ @rtype: boolean
+ @return: True or False
+
+ Usage:
+
+ >>> http_exists('http://www.python.org/')
+ True
+ >>> http_exists('http://www.python.org/PenguinOnTheTelly')
+ False
+ """
+
+ host, path = urlparse.urlsplit(url)[1:3]
+ if ':' in host:
+ #port specified, try to use it
+ host, port = host.split(':', 1)
+ try:
+ port = int(port)
+ except ValueError:
+ LOG.error('invalid port number %r' % port)
+ return False
+ else:
+ #no port specified, use default port
+ port = None
+ connection = HTTPConnection(host, port=port)
+ connection.request("HEAD", path)
+ resp = connection.getresponse()
+ if resp.status == 200: # normal 'found' status
+ found = True
+ elif resp.status == 302: # recurse on temporary redirect
+ found = http_exists(urlparse.urljoin(url,
+ resp.getheader('location', '')))
+ else: # everything else -> not found
+ LOG.info("Status %d %s : %s" % (resp.status, resp.reason, url))
+ found = False
+ return found
+
+
+def is_content_type(url_or_file, content_type):
+ """
+ Tells whether the URL or pseudofile from urllib.urlopen is of
+ the required content type.
+
+ @param url_or_file: URL or file path
+ @type url_or_file: string
+ @param content_type: Content type we're looking for
+ @type content_type: string
+
+ @rtype: boolean
+ @returns: True if it can return the Content type we want
+
+ Usage:
+
+ >>> is_content_type('http://doapspace.org/doap/sf/nlyrics.rdf', \
+ 'application/rdf+xml')
+ True
+ >>> is_content_type('http://doapspace.org/', 'application/rdf+xml')
+ False
+ """
+ try:
+ if isinstance(url_or_file, str):
+ thefile = urllib.urlopen(url_or_file)
+ else:
+ thefile = url_or_file
+ result = thefile.info().gettype() == content_type.lower()
+ if thefile is not url_or_file:
+ thefile.close()
+ except IOError:
+ result = False
+ return result
+
+
+def fetch_file(url, proxy=None):
+ '''
+ Download file by URL
+
+ @param url: URL of a file
+ @type url: string
+
+ @param proxy: URL of HTTP Proxy
+ @type proxy: string
+
+ @return: File
+ @rtype: string
+
+ '''
+ if not url.startswith('http://') and not url.startswith('ftp://'):
+ try:
+ return open(url, 'r').read()
+ except IOError, errmsg:
+ LOG.error(errmsg)
+ return ''
+ LOG.debug('Fetching ' + url)
+ if proxy:
+ opener = build_opener(ProxyHandler({'http': proxy}))
+ else:
+ opener = build_opener()
+ opener.addheaders = [('Accept', 'application/rdf+xml'),
+ ('User-agent',
+ 'Mozilla/5.0 (compatible; doapfiend ' +
+ 'http://trac.doapspace.org/doapfiend)')]
+ try:
+ result = opener.open(url)
+ except HTTPError, err_msg:
+ if err_msg.code == 404:
+ raise NotFoundError('Not found: %s' % url)
+ else:
+ LOG.error(err_msg)
+ except URLError, err_msg:
+ LOG.error(err_msg)
+ return
+ return result.read()
+
+
+if __name__ == '__main__':
+ import doctest
+ doctest.testmod()
+
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/dist/doapfiend-0.3.3-py2.7.egg b/web-crawler/lib/doapfiend/doapfiend-0.3.3/dist/doapfiend-0.3.3-py2.7.egg
new file mode 100644
index 0000000000000000000000000000000000000000..e6532291024c3befc60d3ffacae276c5bc8785c4
Binary files /dev/null and b/web-crawler/lib/doapfiend/doapfiend-0.3.3/dist/doapfiend-0.3.3-py2.7.egg differ
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/.gitignore b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..0479089c7f1d10e0a5b276b6db2b02727a040f8d
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/doapfiend/.gitignore
@@ -0,0 +1 @@
+/__init__.pyc
diff --git a/web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/.gitignore b/web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..ebf21bc02f58bfc3dec82b3338e37118937d527d
--- /dev/null
+++ b/web-crawler/lib/doapfiend/doapfiend-0.3.3/tests/.gitignore
@@ -0,0 +1,5 @@
+/test_cli.pyc
+/test_doaplib.pyc
+/test_n3.pyc
+/test_utils.pyc
+/test_xml.pyc
diff --git a/web-crawler/r_forge_data_fetcher.py b/web-crawler/r_forge_data_fetcher.py
index 122ece45ccbd124bf3f0f189294c5ea4792f40b1..d6433dae4c2f931ab25ac3f1c40836c816eb679c 100755
--- a/web-crawler/r_forge_data_fetcher.py
+++ b/web-crawler/r_forge_data_fetcher.py
@@ -2,24 +2,45 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
#
-# Copyright 2014 Bitergium SLL
-# Author: Maria Di Girolamo
+#Copyright 2014 Bitergium SLL
+
+#New Data Fetcher configured for MORPHEMIC Project release 2.0
+# Ownership : Engineering Ingegneria Informatica S.p.A.
+# Author: Maria Antonietta Di Girolamo
+# Year: 2020/2021
+#Maintainer:Maria Antonietta Di Girolamo
from doapfiend.doaplib import load_graph
from xml.dom.minidom import parse, parseString
from Utils import Logger, Sources
from pip import req
+from bs4 import BeautifulSoup
+from lxml.etree import tostring
+from lxml import etree as et
+from lxml import html, etree
-import urllib2
+import urllib.request, urllib.error, urllib.parse
import base64
+import ast
+import re
+import requests
+# create a timestamp using the time() method
+start_time = time.time()
+# declare globals for the Elasticsearch client host
+DOMAIN = "localhost"
+PORT = 9200
+INDEX = "knowledgebase"
+# concatenate a string for the client's host paramater
+host = str(DOMAIN) + ":" + str(PORT)
+# declare an instance of the Elasticsearch library
+client = Elasticsearch(host)
+#es_client used to create the knowldgebase dopp-model
+es_client = Elasticsearch([{'host':str(DOMAIN), 'port':str(PORT)}])
-from bs4 import BeautifulSoup
-from lxml.etree import tostring
-from lxml import html
class R_Forge_DataFetcher():
-#I progetti sono catalogati per :
+#The projects are listed for :
#Browse By:
#- Development Status
#- Environment
@@ -30,10 +51,6 @@ class R_Forge_DataFetcher():
#- Operating System
#- Programming Language
#- Topic
form_cat=55 per Topic
-#Creo un array con tutti i cataloghi della lista.
-
-#As repoterd in the link:
-#Exports Available
#R-Forge data is exported in a variety of standard formats. Many of the export URLs can also accept form/get data to customize the output. All data generated by these pages is realtime.
#RSS/XML Exports
#News Data
@@ -45,62 +62,74 @@ class R_Forge_DataFetcher():
#R-Forge Full Project Listing (RSS 0.91, )
#R-Forge Full Project Listing (RSS 2.0)
#R-Forge Trove Categories Tree (XML,)
-#noi useremo R-Forge Trove Categories Tree
+#WE used R-Forge Trove Categories Tree
def __init__(self):
- print "Starting R_Forge....."
- #response = urllib2.urlopen('https://r-forge.r-project.org/export/trove_tree.php')
+ #print("Starting R_Forge.....")
+ response = urllib.request.urlopen('https://r-forge.r-project.org/export/trove_tree.php')
- #print "after reading list of doap files "
- #doc = response.read()
+ #print "after reading list of doap files "
+ doc = response.read()
+ '''
url_link = 'https://r-forge.r-project.org/softwaremap/trove_list.php?cat=c&form_cat=0'
- print url_link
r_html = urllib2.urlopen(url_link)
- #print "after reading list of doap files "
self.tree = html.fromstring(r_html.read())
r_category = self.tree.xpath('//li[@class="current-cat"]/text()')
- r_project = self.tree.xpath('//ul/li/a/text()')
- r_em = self.tree.xpath('//li/em/text()')
- print r_category
- print r_project
- print r_em
-
+ r_project = self.tree.xpath('//ul/li/a/text()')
+ r_subproject_forcategory = self.tree.xpath('//tr[@class="top"]/td[@id="project-tree-col1"]/ul/li/a')
+ #print("Project for Category ",r_category, "is :")
+ i = 0
+ for i in r_category:
+ print("Project for Category ",i, "is :")
+ for child in r_subproject_forcategory:
+ cat_link = child.attrib['href']
+ print(child.text, " with link :", "https://r-forge.r-project.org/softwaremap/" + cat_link)
+ '''
+
- #print doc
-
-"""
- soup = BeautifulSoup(doc,"lxml")
- #print(soup.prettify())
- #content = soup.find(id="0")
- #print content
+ cat = []
id_parent = []
- print "id_child[ "
+ #print "id_child[ "
+ id_child = []
+ '''
for c in soup.find_all('category'):
+ k = c.get('id')
id_parent.append(c.get('id'))
- id_child = []
- for child in id_parent:
- id_child.append(child)
- print id_child
- print " ] "
- print "id parent "
- print id_parent
- print "id child "
- print id_child
- """
- # for l in id:
+ id_child.append(c.get('name'))
+ #print c.get('id')
+ '''
+ url_link = 'https://r-forge.r-project.org/softwaremap/trove_list.php?cat=c&form_cat=0' #+ k
+ #print k
+ r_html = urllib.request.urlopen(url_link)
+ #print "after reading list of doap files "
+ self.tree = html.fromstring(r_html.read())
+ r_subproject_forcategory = self.tree.xpath('//tr[@class="top"]/td[@id="project-tree-col1"]/ul/li/a/text()')
+ print(r_subproject_forcategory)
+ r_forcategory = self.tree.xpath('//tr[@class="top"]/td[@id="project-tree-col1"]/ul/li/a')
+ for child in r_forcategory:
+ cat_link = child.attrib['href']
+ print cat_link
+ print(( 'Child of Topics' ,child.text, " with link :", "https://r-forge.r-project.org/softwaremap/" + cat_link))
+
+ #id_child.append(c.get('id'))
+ #print cat
+ #print id_parent
+ #print id_child
+ #for l in id_child:
#print l
#for k in l:
- #td id="project-tree-col1"
- #class=project-tree-branches
- # url_link = 'https://r-forge.r-project.org/softwaremap/trove_list.php?cat=c&form_cat=' + k
- # print url_link
- # r_html = urllib2.urlopen(url_link)
- #print "after reading list of doap files "
- # self.tree = html.fromstring(r_html.read())
- # r_category = self.tree.xpath('//p/strong/text()')
- # print r_category
- # r_project = self.tree.xpath('//li[@class="current-cat"]/text()')
- # print r_project
-
-
+ '''
+ for k in c.get('id'):
+ id="project-tree-col1"
+ url_link = 'https://r-forge.r-project.org/softwaremap/trove_list.php?cat=c&form_cat=' + k
+ #print k
+ r_html = urllib2.urlopen(url_link)
+ #print "after reading list of doap files "
+ self.tree = html.fromstring(r_html.read())
+ r_subproject_forcategory = self.tree.xpath('//tr[@class="top"]/td[@id="project-tree-col1"]/ul/li/a')
+ #for child in r_subproject_forcategory:
+ #print child.text
+ #cat_link = child.attrib['href']
+ #print('k ', k ,' Child ' ,child.text, " with link :", "https://r-forge.r-project.org/softwaremap/" + cat_link)
+ '''
R_Forge_DataFetcher()
\ No newline at end of file
diff --git a/web-crawler/startCrawler b/web-crawler/startCrawler
index c4d38d4825f758aff2ea385eb355210bb8ec062b..dfcd3dbc88fa3e100bba5fbdb12c9b7f7def7e07 100755
--- a/web-crawler/startCrawler
+++ b/web-crawler/startCrawler
@@ -5,9 +5,5 @@
# Copyright 2014 Bitergium SLL
-python Orchestrator.py &
-#python RepositoryCrawlerThread.py &
-python NotifierThread.py &
+python3 Orchestrator.py
echo Starting Crawler: find log path and other configuration in ./config
-#cd api
-#pserve api.ini --reload
\ No newline at end of file
diff --git a/web-crawler/stopCrawler b/web-crawler/stopCrawler
index 1fdb6c4187edc448b43d006286ad781c0b133187..af81f56e573071efc75f9dfa59916fc1842a3b6b 100755
--- a/web-crawler/stopCrawler
+++ b/web-crawler/stopCrawler
@@ -1,6 +1,5 @@
#/bin/bash
-for proc in "pserve api.ini --reload" "python Orchestrator.py" "python NotifierThread.py"; do
-#for proc in "pserve api.ini --reload" "python Orchestrator.py"; do
+for proc in "python3 Orchestrator.py"; do
ppid=$(pgrep -f "$proc")
if [[ "$ppid" -ne "" ]]; then
echo "killing process $proc with pid $ppid"
|