1
2
3
4 """
5 @var JSON: to be used to set the return format to JSON
6 @var XML: to be used to set the return format to XML (SPARQL XML format or RDF/XML, depending on the query type). This is the default.
7 @var TURTLE: to be used to set the return format to Turtle
8 @var N3: to be used to set the return format to N3 (for most of the SPARQL services this is equivalent to Turtle)
9 @var RDF: to be used to set the return RDF Graph
10
11 @var POST: to be used to set HTTP POST
12 @var GET: to be used to set HTTP GET. This is the default.
13
14 @var SELECT: to be used to set the query type to SELECT. This is, usually, determined automatically.
15 @var CONSTRUCT: to be used to set the query type to CONSTRUCT. This is, usually, determined automatically.
16 @var ASK: to be used to set the query type to ASK. This is, usually, determined automatically.
17 @var DESCRIBE: to be used to set the query type to DESCRIBE. This is, usually, determined automatically.
18
19 @see: U{SPARQL Specification<http://www.w3.org/TR/rdf-sparql-query/>}
20 @authors: U{Ivan Herman<http://www.ivan-herman.net>}, U{Sergio Fernández<http://www.wikier.org>}, U{Carlos Tejo Alonso<http://www.dayures.net>}
21 @organization: U{World Wide Web Consortium<http://www.w3.org>}, U{Salzburg Research<http://www.salzburgresearch.at>} and U{Foundation CTIC<http://www.fundacionctic.org/>}.
22 @license: U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/copyright-software">}
23 @requires: U{RDFLib<http://rdflib.net>} package.
24 """
25
26 import urllib
27 import urllib2
28 from urllib2 import urlopen as urlopener
29 import socket
30 import base64
31 import re
32 import warnings
33
34 import jsonlayer
35 from KeyCaseInsensitiveDict import KeyCaseInsensitiveDict
36 from SPARQLExceptions import QueryBadFormed, EndPointNotFound, EndPointInternalError
37 from SPARQLUtils import deprecated
38 from SPARQLWrapper import __agent__
39
40
41 JSON = "json"
42 JSONLD = "json-ld"
43 XML = "xml"
44 TURTLE = "n3"
45 N3 = "n3"
46 RDF = "rdf"
47 _allowedFormats = [JSON, XML, TURTLE, N3, RDF]
48
49
50 POST = "POST"
51 GET = "GET"
52 _allowedRequests = [POST, GET]
53
54
55 SELECT = "SELECT"
56 CONSTRUCT = "CONSTRUCT"
57 ASK = "ASK"
58 DESCRIBE = "DESCRIBE"
59 INSERT = "INSERT"
60 DELETE = "DELETE"
61 CREATE = "CREATE"
62 CLEAR = "CLEAR"
63 DROP = "DROP"
64 LOAD = "LOAD"
65 COPY = "COPY"
66 MOVE = "MOVE"
67 ADD = "ADD"
68 _allowedQueryTypes = [SELECT, CONSTRUCT, ASK, DESCRIBE, INSERT, DELETE, CREATE, CLEAR, DROP,
69 LOAD, COPY, MOVE, ADD]
70
71
72 URLENCODED = "urlencoded"
73 POSTDIRECTLY = "postdirectly"
74 _REQUEST_METHODS = [URLENCODED, POSTDIRECTLY]
75
76
77
78
79
80
81
82
83
84 _SPARQL_DEFAULT = ["application/sparql-results+xml", "application/rdf+xml", "*/*"]
85 _SPARQL_XML = ["application/sparql-results+xml"]
86 _SPARQL_JSON = ["application/sparql-results+json", "text/javascript", "application/json"]
87 _RDF_XML = ["application/rdf+xml"]
88 _RDF_N3 = ["text/rdf+n3", "application/n-triples", "application/turtle", "application/n3", "text/n3", "text/turtle"]
89 _RDF_JSONLD = ["application/x-json+ld", "application/ld+json"]
90 _ALL = ["*/*"]
91 _RDF_POSSIBLE = _RDF_XML + _RDF_N3
92 _SPARQL_POSSIBLE = _SPARQL_XML + _SPARQL_JSON + _RDF_XML + _RDF_N3
93 _SPARQL_PARAMS = ["query"]
94
95 try:
96 import rdflib_jsonld
97 _allowedFormats.append(JSONLD)
98 _RDF_POSSIBLE = _RDF_POSSIBLE + _RDF_JSONLD
99 except ImportError:
100 warnings.warn("JSON-LD disabled because no suitable support has been found", RuntimeWarning)
101
102
103
104
105
106
107
108 _returnFormatSetting = ["format", "output", "results"]
114 """
115 Wrapper around an online access to a SPARQL Web entry point.
116
117 The same class instance can be reused for subsequent queries. The values of the base Graph URI, return formats, etc,
118 are retained from one query to the next (in other words, only the query string changes). The instance can also be
119 reset to its initial values using the L{resetQuery} method.
120
121 @cvar pattern: regular expression used to determine whether a query is of type L{CONSTRUCT}, L{SELECT}, L{ASK}, or L{DESCRIBE}.
122 @type pattern: compiled regular expression (see the C{re} module of Python)
123 @ivar baseURI: the URI of the SPARQL service
124 """
125 pattern = re.compile(r"""
126 ((?P<base>(\s*BASE\s*<.*?>)\s*)|(?P<prefixes>(\s*PREFIX\s+.+:\s*<.*?>)\s*))*
127 (?P<queryType>(CONSTRUCT|SELECT|ASK|DESCRIBE|INSERT|DELETE|CREATE|CLEAR|DROP|LOAD|COPY|MOVE|ADD))
128 """, re.VERBOSE | re.IGNORECASE)
129
130 - def __init__(self, endpoint, updateEndpoint=None, returnFormat=XML, defaultGraph=None, agent=__agent__):
131 """
132 Class encapsulating a full SPARQL call.
133 @param endpoint: string of the SPARQL endpoint's URI
134 @type endpoint: string
135 @param updateEndpoint: string of the SPARQL endpoint's URI for update operations (if it's a different one)
136 @type updateEndpoint: string
137 @keyword returnFormat: Default: L{XML}.
138 Can be set to JSON or Turtle/N3
139
140 No local check is done, the parameter is simply
141 sent to the endpoint. Eg, if the value is set to JSON and a construct query is issued, it
142 is up to the endpoint to react or not, this wrapper does not check.
143
144 Possible values:
145 L{JSON}, L{XML}, L{TURTLE}, L{N3} (constants in this module). The value can also be set via explicit
146 call, see below.
147 @type returnFormat: string
148 @keyword defaultGraph: URI for the default graph. Default is None, the value can be set either via an L{explicit call<addDefaultGraph>} or as part of the query string.
149 @type defaultGraph: string
150 """
151 self.endpoint = endpoint
152 self.updateEndpoint = updateEndpoint if updateEndpoint else endpoint
153 self.agent = agent
154 self.user = None
155 self.passwd = None
156 self._defaultGraph = defaultGraph
157
158 if returnFormat in _allowedFormats:
159 self._defaultReturnFormat = returnFormat
160 else:
161 self._defaultReturnFormat = XML
162
163 self.resetQuery()
164
166 """Reset the query, ie, return format, query, default or named graph settings, etc,
167 are reset to their default values."""
168 self.parameters = {}
169 if self._defaultGraph:
170 self.addParameter("default-graph-uri", self._defaultGraph)
171 self.returnFormat = self._defaultReturnFormat
172 self.method = GET
173 self.queryType = SELECT
174 self.queryString = """SELECT * WHERE{ ?s ?p ?o }"""
175 self.timeout = None
176 self.requestMethod = URLENCODED
177
186
188 """Set the timeout (in seconds) to use for querying the endpoint.
189
190 @param timeout: Timeout in seconds.
191 @type timeout: int
192 """
193 self.timeout = int(timeout)
194
196 """Set the internal method to use to perform the request for query or
197 update operations, either URL-encoded (C{SPARQLWrapper.URLENCODED}) or
198 POST directly (C{SPARQLWrapper.POSTDIRECTLY}).
199 Further details at U{http://www.w3.org/TR/sparql11-protocol/#query-operation}
200 and U{http://www.w3.org/TR/sparql11-protocol/#update-operation}.
201
202 @param method: method
203 @type method: str
204 """
205 if method in _REQUEST_METHODS:
206 self.requestMethod = method
207 else:
208 warnings.warn("invalid update method '%s'" % method, RuntimeWarning)
209
210 @deprecated
212 warnings.warn("deprecated method, use setRequestMethod() instead", RuntimeWarning)
213 setRequestMethod(method)
214
215 @deprecated
217 """
218 Add a default graph URI.
219 @param uri: URI of the graph
220 @type uri: string
221 @deprecated: use addParameter("default-graph-uri", uri) instead of this method
222 """
223 self.addParameter("default-graph-uri", uri)
224
225 @deprecated
227 """
228 Add a named graph URI.
229 @param uri: URI of the graph
230 @type uri: string
231 @deprecated: use addParameter("named-graph-uri", uri) instead of this method
232 """
233 self.addParameter("named-graph-uri", uri)
234
235 @deprecated
237 """
238 Some SPARQL endpoints require extra key value pairs.
239 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing
240 virtuoso to retrieve graphs that are not stored in its local database.
241 @param key: key of the query part
242 @type key: string
243 @param value: value of the query part
244 @type value: string
245 @deprecated: use addParameter(key, value) instead of this method
246 """
247 self.addParameter(key, value)
248
249 @deprecated
251 """
252 Method is kept for backwards compatibility. Historically, it "replaces" parameters instead of adding
253 @param name: name
254 @type name: string
255 @param value: value
256 @type value: string
257 @rtype: bool
258 @deprecated: use addParameter(name, value) instead of this method
259 """
260 self.clearParameter(name)
261 return self.addParameter(name, value)
262
264 """
265 Some SPARQL endpoints allow extra key value pairs.
266 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing
267 virtuoso to retrieve graphs that are not stored in its local database.
268 @param name: name
269 @type name: string
270 @param value: value
271 @type value: string
272 @rtype: bool
273 """
274 if name in _SPARQL_PARAMS:
275 return False
276 else:
277 if name not in self.parameters:
278 self.parameters[name] = []
279 self.parameters[name].append(value)
280 return True
281
283 """
284 Clear the values ofd a concrete parameter.
285 @param name: name
286 @type name: string
287 @rtype: bool
288 """
289 if name in _SPARQL_PARAMS:
290 return False
291 else:
292 try:
293 del self.parameters[name]
294 return True
295 except KeyError:
296 return False
297
299 """
300 Set the credentials for querying the current endpoint
301 @param user: username
302 @type user: string
303 @param passwd: password
304 @type passwd: string
305 """
306 self.user = user
307 self.passwd = passwd
308
310 """
311 Set the SPARQL query text. Note: no check is done on the validity of the query
312 (syntax or otherwise) by this module, except for testing the query type (SELECT,
313 ASK, etc). Syntax and validity checking is done by the SPARQL service itself.
314 @param query: query text
315 @type query: string
316 @bug: #2320024
317 """
318 self.queryString = query
319 self.queryType = self._parseQueryType(query)
320
322 """
323 Parse the SPARQL query and return its type (ie, L{SELECT}, L{ASK}, etc).
324
325 Note that the method returns L{SELECT} if nothing is specified. This is just to get all other
326 methods running; in fact, this means that the query is erronous, because the query must be,
327 according to the SPARQL specification, one of Select, Ask, Describe, or Construct. The
328 SPARQL endpoint should raise an exception (via urllib) for such syntax error.
329
330 @param query: query text
331 @type query: string
332 @rtype: string
333 """
334 try:
335 r_queryType = self.pattern.search(query).group("queryType").upper()
336 except AttributeError:
337 warnings.warn("not detected query type for query '%s'" % query.replace("\n", " "), RuntimeWarning)
338 r_queryType = None
339
340 if r_queryType in _allowedQueryTypes :
341 return r_queryType
342 else :
343
344 warnings.warn("unknown query type '%s'" % r_queryType, RuntimeWarning)
345 return SELECT
346
348 """Set the invocation method. By default, this is L{GET}, but can be set to L{POST}.
349 @param method: should be either L{GET} or L{POST}. Other cases are ignored.
350 """
351 if method in _allowedRequests : self.method = method
352
354 """Make urllib2 use keep-alive.
355 @raise ImportError: when could not be imported urlgrabber.keepalive.HTTPHandler
356 """
357 try:
358 from urlgrabber.keepalive import HTTPHandler
359 keepalive_handler = HTTPHandler()
360 opener = urllib2.build_opener(keepalive_handler)
361 urllib2.install_opener(opener)
362 except ImportError:
363 warnings.warn("urlgrabber not installed in the system. The execution of this method has no effect.")
364
370
372 """ Returns TRUE if SPARQLWrapper is configured for executing SPARQL Query request
373 @return: bool
374 """
375 return not self.isSparqlUpdateRequest()
376
378 queryParameters = self.parameters.copy()
379
380
381
382
383
384 for f in _returnFormatSetting:
385 queryParameters[f] = [self.returnFormat]
386
387 utfQueryParameters = {}
388
389 for k, vs in queryParameters.items():
390 encodedValues = []
391
392 for v in vs:
393 if isinstance(v, unicode):
394 encodedValues.append(v.encode('utf-8'))
395 else:
396 encodedValues.append(v)
397
398 utfQueryParameters[k] = encodedValues
399
400 return utfQueryParameters
401
403 if self.queryType in [SELECT, ASK]:
404 if self.returnFormat == XML:
405 acceptHeader = ",".join(_SPARQL_XML)
406 elif self.returnFormat == JSON:
407 acceptHeader = ",".join(_SPARQL_JSON)
408 else:
409 acceptHeader = ",".join(_ALL)
410 elif self.queryType in [INSERT, DELETE]:
411 acceptHeader = "*/*"
412 else:
413 if self.returnFormat == N3 or self.returnFormat == TURTLE:
414 acceptHeader = ",".join(_RDF_N3)
415 elif self.returnFormat == XML:
416 acceptHeader = ",".join(_RDF_XML)
417 elif self.returnFormat == JSONLD and JSONLD in _allowedFormats:
418 acceptHeader = ",".join(_RDF_JSONLD)
419 else:
420 acceptHeader = ",".join(_ALL)
421 return acceptHeader
422
424 """Internal method to create request according a HTTP method. Returns a
425 C{urllib2.Request} object of the urllib2 Python library
426 @return: request
427 """
428 request = None
429 parameters = self._getRequestParameters()
430
431 if self.isSparqlUpdateRequest():
432
433 uri = self.updateEndpoint
434
435 if self.method != POST:
436 warnings.warn("update operations MUST be done by POST")
437
438 if self.requestMethod == POSTDIRECTLY:
439 request = urllib2.Request(uri + "?" + urllib.urlencode(parameters, True))
440 request.add_header("Content-Type", "application/sparql-update")
441 request.data = self.queryString.encode('UTF-8')
442 else:
443 parameters["update"] = [self.queryString]
444
445 request = urllib2.Request(uri)
446 request.add_header("Content-Type", "application/x-www-form-urlencoded")
447 request.data = urllib.urlencode(parameters, True)
448 else:
449
450 uri = self.endpoint
451
452 if self.method == POST:
453 if self.requestMethod == POSTDIRECTLY:
454 request = urllib2.Request(uri + "?" + urllib.urlencode(parameters, True))
455 request.add_header("Content-Type", "application/sparql-query")
456 request.data = self.queryString.encode('UTF-8')
457 else:
458 parameters["query"] = [self.queryString]
459
460 request = urllib2.Request(uri)
461 request.add_header("Content-Type", "application/x-www-form-urlencoded")
462 request.data = urllib.urlencode(parameters, True)
463 else:
464 parameters["query"] = [self.queryString]
465 request = urllib2.Request(uri + "?" + urllib.urlencode(parameters, True))
466
467 request.add_header("User-Agent", self.agent)
468 request.add_header("Accept", self._getAcceptHeader())
469 if self.user and self.passwd:
470 credentials = "%s:%s" % (self.user, self.passwd)
471 request.add_header("Authorization", "Basic %s" % base64.encodestring(credentials.encode('utf-8')))
472
473 return request
474
476 """Internal method to execute the query. Returns the output of the
477 C{urllib2.urlopen} method of the standard Python library
478
479 @return: tuples with the raw request plus the expected format
480 """
481 if (self.timeout): socket.setdefaulttimeout(self.timeout)
482 request = self._createRequest()
483 try:
484 response = urlopener(request)
485 return response, self.returnFormat
486 except urllib2.HTTPError, e:
487 if e.code == 400:
488 raise QueryBadFormed(e.read())
489 elif e.code == 404:
490 raise EndPointNotFound(e.read())
491 elif e.code == 500:
492 raise EndPointInternalError(e.read())
493 else:
494 raise e
495
497 """
498 Execute the query.
499 Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the
500 case when the query is syntactically incorrect, leading to an HTTP error sent back by the SPARQL endpoint).
501 The usual urllib2 exceptions are raised, which therefore cover possible SPARQL errors, too.
502
503 Note that some combinations of return formats and query types may not make sense. For example,
504 a SELECT query with Turtle response is meaningless (the output of a SELECT is not a Graph), or a CONSTRUCT
505 query with JSON output may be a problem because, at the moment, there is no accepted JSON serialization
506 of RDF (let alone one implemented by SPARQL endpoints). In such cases the returned media type of the result is
507 unpredictable and may differ from one SPARQL endpoint implementation to the other. (Endpoints usually fall
508 back to one of the "meaningful" formats, but it is up to the specific implementation to choose which
509 one that is.)
510
511 @return: query result
512 @rtype: L{QueryResult} instance
513 """
514 return QueryResult(self._query())
515
517 """Macro like method: issue a query and return the converted results.
518 @return: the converted query result. See the conversion methods for more details.
519 """
520 res = self.query()
521 return res.convert()
522
527 """
528 Wrapper around an a query result. Users should not create instances of this class, it is
529 generated by a L{SPARQLWrapper.query} call. The results can be
530 converted to various formats, or used directly.
531
532 If used directly: the class gives access to the direct http request results
533 L{self.response}: it is a file-like object with two additional methods: C{geturl()} to
534 return the URL of the resource retrieved and
535 C{info()} that returns the meta-information of the HTTP result as a dictionary-like object
536 (see the urllib2 standard library module of Python).
537
538 For convenience, these methods are also available on the instance. The C{__iter__} and
539 C{next} methods are also implemented (by mapping them to L{self.response}). This means that the
540 common idiom::
541 for l in obj : do_something_with_line(l)
542 would work, too.
543
544 @ivar response: the direct HTTP response; a file-like object, as return by the C{urllib2.urlopen} library call.
545 """
547 """
548 @param result: HTTP response stemming from a L{SPARQLWrapper.query} call, or a tuple with the expected format: (response,format)
549 """
550 if (type(result) == tuple):
551 self.response = result[0]
552 self.requestedFormat = result[1]
553 else:
554 self.response = result
555 """Direct response, see class comments for details"""
556
558 """Return the URI of the original call.
559 @return: URI
560 @rtype: string
561 """
562 return self.response.geturl()
563
565 """Return the meta-information of the HTTP result.
566 @return: meta information
567 @rtype: dictionary
568 """
569 return KeyCaseInsensitiveDict(self.response.info())
570
572 """Return an iterator object. This method is expected for the inclusion
573 of the object in a standard C{for} loop.
574 """
575 return self.response.__iter__()
576
578 """Method for the standard iterator."""
579 return self.response.next()
580
581 @staticmethod
583 """Set the Python module for encoding JSON data. If not an allowed value, the setting is ignored.
584 JSON modules supported:
585 - ``simplejson``: http://code.google.com/p/simplejson/
586 - ``cjson``: http://pypi.python.org/pypi/python-cjson
587 - ``json``: This is the version of ``simplejson`` that is bundled with the
588 Python standard library since version 2.6
589 (see http://docs.python.org/library/json.html)
590 @param module: Possible values: are L{simplejson}, L{cjson}, L{json}. All other cases raise a ValueError exception.
591 @type module: string
592 """
593 jsonlayer.use(module)
594
596 """
597 Convert a JSON result into a Python dict. This method can be overwritten in a subclass
598 for a different conversion method.
599 @return: converted result
600 @rtype: Python dictionary
601 """
602 return jsonlayer.decode(self.response.read().decode("utf-8"))
603
605 """
606 Convert an XML result into a Python dom tree. This method can be overwritten in a
607 subclass for a different conversion method.
608 @return: converted result
609 @rtype: PyXlib DOM node
610 """
611 from xml.dom.minidom import parse
612 return parse(self.response)
613
615 """
616 Convert a RDF/XML result into an RDFLib triple store. This method can be overwritten
617 in a subclass for a different conversion method.
618 @return: converted result
619 @rtype: RDFLib Graph
620 """
621 try:
622 from rdflib.graph import ConjunctiveGraph
623 except ImportError:
624 from rdflib import ConjunctiveGraph
625 retval = ConjunctiveGraph()
626
627
628 retval.load(self.response, publicID=' ')
629 return retval
630
632 """
633 Convert a RDF Turtle/N3 result into a string. This method can be overwritten in a subclass
634 for a different conversion method.
635 @return: converted result
636 @rtype: string
637 """
638 return self.response.read()
639
641 """
642 Convert a RDF JSON-LDresult into an RDFLib triple store. This method can be overwritten
643 in a subclass for a different conversion method.
644 @return: converted result
645 @rtype: RDFLib Graph
646 """
647 from rdflib import ConjunctiveGraph
648 retval = ConjunctiveGraph()
649 retval.load(self.response, format='json-ld', publicID=' ')
650 return retval
651
653 """
654 Encode the return value depending on the return format:
655 - in the case of XML, a DOM top element is returned;
656 - in the case of JSON, a simplejson conversion will return a dictionary;
657 - in the case of RDF/XML, the value is converted via RDFLib into a Graph instance.
658 In all other cases the input simply returned.
659
660 @return: the converted query result. See the conversion methods for more details.
661 """
662 def _content_type_in_list(real, expected):
663 return True in [real.find(mime) != -1 for mime in expected]
664
665 def _validate_format(format_name, allowed, mime, requested):
666 if requested not in allowed:
667 message = "Format requested was %s, but %s (%s) has been returned by the endpoint"
668 warnings.warn(message % (requested.upper(), format_name, mime), RuntimeWarning)
669
670 if "content-type" in self.info():
671 ct = self.info()["content-type"]
672
673 if _content_type_in_list(ct, _SPARQL_XML):
674 _validate_format("XML", [XML], ct, self.requestedFormat)
675 return self._convertXML()
676 elif _content_type_in_list(ct, _SPARQL_JSON):
677 _validate_format("JSON", [JSON], ct, self.requestedFormat)
678 return self._convertJSON()
679 elif _content_type_in_list(ct, _RDF_XML):
680 _validate_format("RDF/XML", [RDF, XML], ct, self.requestedFormat)
681 return self._convertRDF()
682 elif _content_type_in_list(ct, _RDF_N3):
683 _validate_format("N3", [N3, TURTLE], ct, self.requestedFormat)
684 return self._convertN3()
685 elif _content_type_in_list(ct, _RDF_JSONLD):
686 _validate_format("JSON(-LD)", [JSONLD, JSON], ct, self.requestedFormat)
687 return self._convertJSONLD()
688
689 warnings.warn("unknown response content type, returning raw response...", RuntimeWarning)
690 return self.response.read()
691
693 results = self._convertJSON()
694 if minWidth :
695 width = self.__get_results_width(results, minWidth)
696 else :
697 width = self.__get_results_width(results)
698 index = 0
699 for var in results["head"]["vars"] :
700 print ("?" + var).ljust(width[index]),"|",
701 index += 1
702 print
703 print "=" * (sum(width) + 3 * len(width))
704 for result in results["results"]["bindings"] :
705 index = 0
706 for var in results["head"]["vars"] :
707 print result[var]["value"].ljust(width[index]),"|",
708 index += 1
709 print
710
712 width = []
713 for var in results["head"]["vars"] :
714 width.append(max(minWidth, len(var)+1))
715 for result in results["results"]["bindings"] :
716 index = 0
717 for var in results["head"]["vars"] :
718 width[index] = max(width[index], len(result[var]["value"]))
719 index =+ 1
720 return width
721