1
2
3
4 """
5 @var JSON: to be used to set the return format to JSON
6 @var XML: to be used to set the return format to XML (SPARQL XML format or RDF/XML, depending on the query type). This is the default.
7 @var RDFXML: to be used to set the return format to RDF/XML explicitly.
8 @var TURTLE: to be used to set the return format to Turtle
9 @var N3: to be used to set the return format to N3 (for most of the SPARQL services this is equivalent to Turtle)
10 @var RDF: to be used to set the return RDF Graph
11 @var CSV: to be used to set the return format to CSV
12 @var TSV: to be used to set the return format to TSV
13 @var JSONLD: to be used to set the return format to JSON-LD
14
15 @var POST: to be used to set HTTP POST
16 @var GET: to be used to set HTTP GET. This is the default.
17
18 @var SELECT: to be used to set the query type to SELECT. This is, usually, determined automatically.
19 @var CONSTRUCT: to be used to set the query type to CONSTRUCT. This is, usually, determined automatically.
20 @var ASK: to be used to set the query type to ASK. This is, usually, determined automatically.
21 @var DESCRIBE: to be used to set the query type to DESCRIBE. This is, usually, determined automatically.
22
23 @var INSERT: to be used to set the query type to INSERT.
24 @var DELETE: to be used to set the query type to DELETE.
25 @var CREATE: to be used to set the query type to CREATE.
26 @var CLEAR: to be used to set the query type to CLEAR.
27 @var DROP: to be used to set the query type to DROP.
28 @var LOAD: to be used to set the query type to LOAD.
29 @var COPY: to be used to set the query type to COPY.
30 @var MOVE: to be used to set the query type to MOVE.
31 @var ADD: to be used to set the query type to ADD.
32
33
34 @var BASIC: BASIC HTTP Authentication method
35 @var DIGEST: DIGEST HTTP Authentication method
36
37 @see: U{SPARQL Specification<http://www.w3.org/TR/rdf-sparql-query/>}
38 @authors: U{Ivan Herman<http://www.ivan-herman.net>}, U{Sergio Fernández<http://www.wikier.org>}, U{Carlos Tejo Alonso<http://www.dayures.net>}
39 @organization: U{World Wide Web Consortium<http://www.w3.org>}, U{Salzburg Research<http://www.salzburgresearch.at>} and U{Foundation CTIC<http://www.fundacionctic.org/>}.
40 @license: U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/copyright-software">}
41 @requires: U{RDFLib<http://rdflib.net>} package.
42 """
43
44 import urllib
45 import urllib2
46 from urllib2 import urlopen as urlopener
47 import base64
48 import re
49 import sys
50 import warnings
51
52 import json
53 from KeyCaseInsensitiveDict import KeyCaseInsensitiveDict
54 from SPARQLExceptions import QueryBadFormed, EndPointNotFound, EndPointInternalError, Unauthorized, URITooLong
55 from SPARQLWrapper import __agent__
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229 JSON = "json"
230 JSONLD = "json-ld"
231 XML = "xml"
232 TURTLE = "turtle"
233 N3 = "n3"
234 RDF = "rdf"
235 RDFXML = "rdf+xml"
236 CSV = "csv"
237 TSV = "tsv"
238 _allowedFormats = [JSON, XML, TURTLE, N3, RDF, RDFXML, CSV, TSV]
239
240
241 POST = "POST"
242 GET = "GET"
243 _allowedRequests = [POST, GET]
244
245
246 BASIC = "BASIC"
247 DIGEST = "DIGEST"
248 _allowedAuth = [BASIC, DIGEST]
249
250
251 SELECT = "SELECT"
252 CONSTRUCT = "CONSTRUCT"
253 ASK = "ASK"
254 DESCRIBE = "DESCRIBE"
255 INSERT = "INSERT"
256 DELETE = "DELETE"
257 CREATE = "CREATE"
258 CLEAR = "CLEAR"
259 DROP = "DROP"
260 LOAD = "LOAD"
261 COPY = "COPY"
262 MOVE = "MOVE"
263 ADD = "ADD"
264 _allowedQueryTypes = [SELECT, CONSTRUCT, ASK, DESCRIBE, INSERT, DELETE, CREATE, CLEAR, DROP,
265 LOAD, COPY, MOVE, ADD]
266
267
268 URLENCODED = "urlencoded"
269 POSTDIRECTLY = "postdirectly"
270 _REQUEST_METHODS = [URLENCODED, POSTDIRECTLY]
271
272
273
274
275
276
277
278
279
280 _SPARQL_DEFAULT = ["application/sparql-results+xml", "application/rdf+xml", "*/*"]
281 _SPARQL_XML = ["application/sparql-results+xml"]
282 _SPARQL_JSON = ["application/sparql-results+json", "application/json", "text/javascript", "application/javascript"]
283 _RDF_XML = ["application/rdf+xml"]
284 _RDF_TURTLE = ["application/turtle", "text/turtle"]
285 _RDF_N3 = _RDF_TURTLE + ["text/rdf+n3", "application/n-triples", "application/n3", "text/n3"]
286 _RDF_JSONLD = ["application/ld+json", "application/x-json+ld"]
287 _CSV = ["text/csv"]
288 _TSV = ["text/tab-separated-values"]
289 _XML = ["application/xml"]
290 _ALL = ["*/*"]
291 _RDF_POSSIBLE = _RDF_XML + _RDF_N3 + _XML
292
293 _SPARQL_PARAMS = ["query"]
294
295 try:
296 import rdflib_jsonld
297 _allowedFormats.append(JSONLD)
298 _RDF_POSSIBLE = _RDF_POSSIBLE + _RDF_JSONLD
299 except ImportError:
300
301 pass
302
303
304
305
306
307
308
309 _returnFormatSetting = ["format", "output", "results"]
310
311
312
313
315 """
316 Wrapper around an online access to a SPARQL Web entry point.
317
318 The same class instance can be reused for subsequent queries. The values of the base Graph URI, return formats, etc,
319 are retained from one query to the next (in other words, only the query string changes). The instance can also be
320 reset to its initial values using the L{resetQuery} method.
321
322 @cvar prefix_pattern: regular expression used to remove base/prefixes in the process of determining the query type.
323 @type prefix_pattern: compiled regular expression (see the C{re} module of Python)
324 @cvar pattern: regular expression used to determine whether a query (without base/prefixes) is of type L{CONSTRUCT}, L{SELECT}, L{ASK}, L{DESCRIBE}, L{INSERT}, L{DELETE}, L{CREATE}, L{CLEAR}, L{DROP}, L{LOAD}, L{COPY}, L{MOVE} or L{ADD}.
325 @type pattern: compiled regular expression (see the C{re} module of Python)
326 @cvar comments_pattern: regular expression used to remove comments from a query.
327 @type comments_pattern: compiled regular expression (see the C{re} module of Python)
328 @ivar endpoint: SPARQL endpoint's URI.
329 @type endpoint: string
330 @ivar updateEndpoint: SPARQL endpoint's URI for update operations (if it's a different one). Default is C{None}
331 @type updateEndpoint: string
332 @ivar agent: The User-Agent for the HTTP request header.
333 @type agent: string
334 @ivar _defaultGraph: URI for the default graph. Default is C{None}, the value can be set either via an L{explicit call<addParameter>}("default-graph-uri", uri) or as part of the query string.
335 @type _defaultGraph: string
336 @ivar user: The username of the credentials for querying the current endpoint. Default is C{None}, the value can be set an L{explicit call<setCredentials>}.
337 @type user: string
338 @ivar passwd: The password of the credentials for querying the current endpoint. Default is C{None}, the value can be set an L{explicit call<setCredentials>}.
339 @type passwd: string
340 @ivar http_auth: HTTP Authentication type. The default value is L{BASIC}. Possible values are L{BASIC} or L{DIGEST}
341 @type http_auth: string
342 @ivar onlyConneg: Option for allowing (or not) only HTTP Content Negotiation (so dismiss the use of HTTP parameters).The default value is L{False}.
343 @type onlyConneg: boolean
344 @ivar customHttpHeaders: Custom HTTP Headers to be included in the request. Important: These headers override previous values (including C{Content-Type}, C{User-Agent}, C{Accept} and C{Authorization} if they are present). It is a dictionary where keys are the header field nada and values are the header values.
345 @type customHttpHeaders: dict
346 @ivar timeout: The timeout (in seconds) to use for querying the endpoint.
347 @type timeout: int
348 @ivar queryString: The SPARQL query text.
349 @type queryString: string
350 @ivar queryType: The type of SPARQL query (aka SPARQL query form), like L{CONSTRUCT}, L{SELECT}, L{ASK}, L{DESCRIBE}, L{INSERT}, L{DELETE}, L{CREATE}, L{CLEAR}, L{DROP}, L{LOAD}, L{COPY}, L{MOVE} or L{ADD} (constants in this module).
351 @type queryType: string
352 @ivar returnFormat: The return format. The possible values are L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDF}, L{RDFXML}, L{CSV}, L{TSV}, L{JSONLD} (constants in this module).
353 @type returnFormat: string
354 @ivar requestMethod: The request method for query or update operations. The possibles values are URL-encoded (L{URLENCODED}) or POST directly (L{POSTDIRECTLY}).
355 @type requestMethod: string
356 @ivar method: The invocation method. By default, this is L{GET}, but can be set to L{POST}.
357 @type method: string
358 @ivar parameters: The parameters of the request (key/value pairs in a dictionary).
359 @type parameters: dict
360 @ivar _defaultReturnFormat: The default return format.
361 @type _defaultReturnFormat: string
362
363
364 """
365 prefix_pattern = re.compile(r"((?P<base>(\s*BASE\s*<.*?>)\s*)|(?P<prefixes>(\s*PREFIX\s+.+:\s*<.*?>)\s*))*")
366
367 pattern = re.compile(r"(?P<queryType>(CONSTRUCT|SELECT|ASK|DESCRIBE|INSERT|DELETE|CREATE|CLEAR|DROP|LOAD|COPY|MOVE|ADD))", re.VERBOSE | re.IGNORECASE)
368 comments_pattern = re.compile(r"(^|\n)\s*#.*?\n")
369
370 - def __init__(self, endpoint, updateEndpoint=None, returnFormat=XML, defaultGraph=None, agent=__agent__):
371 """
372 Class encapsulating a full SPARQL call.
373 @param endpoint: string of the SPARQL endpoint's URI
374 @type endpoint: string
375 @param updateEndpoint: string of the SPARQL endpoint's URI for update operations (if it's a different one)
376 @type updateEndpoint: string
377 @param returnFormat: Default: L{XML}.
378 Can be set to JSON or Turtle/N3
379
380 No local check is done, the parameter is simply
381 sent to the endpoint. Eg, if the value is set to JSON and a construct query is issued, it
382 is up to the endpoint to react or not, this wrapper does not check.
383
384 Possible values:
385 L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDFXML}, L{CSV}, L{TSV} (constants in this module). The value can also be set via explicit
386 call, see below.
387 @type returnFormat: string
388 @param defaultGraph: URI for the default graph. Default is C{None}, the value can be set either via an L{explicit call<addDefaultGraph>} or as part of the query string.
389 @type defaultGraph: string
390 @param agent: The User-Agent for the HTTP request header.
391 @type agent: string
392 """
393 self.endpoint = endpoint
394 self.updateEndpoint = updateEndpoint if updateEndpoint else endpoint
395 self.agent = agent
396 self.user = None
397 self.passwd = None
398 self.http_auth = BASIC
399 self._defaultGraph = defaultGraph
400 self.onlyConneg = False
401 self.customHttpHeaders = {}
402
403 if returnFormat in _allowedFormats:
404 self._defaultReturnFormat = returnFormat
405 else:
406 self._defaultReturnFormat = XML
407
408 self.resetQuery()
409
411 """Reset the query, ie, return format, method, query, default or named graph settings, etc,
412 are reset to their default values.
413 """
414 self.parameters = {}
415 if self._defaultGraph:
416 self.addParameter("default-graph-uri", self._defaultGraph)
417 self.returnFormat = self._defaultReturnFormat
418 self.method = GET
419 self.setQuery("""SELECT * WHERE{ ?s ?p ?o }""")
420 self.timeout = None
421 self.requestMethod = URLENCODED
422
423
437
447
449 """Set the timeout (in seconds) to use for querying the endpoint.
450
451 @param timeout: Timeout in seconds.
452 @type timeout: int
453 """
454 self.timeout = int(timeout)
455
457 """Set this option for allowing (or not) only HTTP Content Negotiation (so dismiss the use of HTTP parameters).
458 @since: 1.8.1
459
460 @param onlyConneg: True if only HTTP Content Negotiation is allowed; False is HTTP parameters are allowed also.
461 @type onlyConneg: bool
462 """
463 self.onlyConneg = onlyConneg
464
466 """Set the internal method to use to perform the request for query or
467 update operations, either URL-encoded (L{SPARQLWrapper.URLENCODED}) or
468 POST directly (L{SPARQLWrapper.POSTDIRECTLY}).
469 Further details at U{http://www.w3.org/TR/sparql11-protocol/#query-operation}
470 and U{http://www.w3.org/TR/sparql11-protocol/#update-operation}.
471
472 @param method: Possible values are L{SPARQLWrapper.URLENCODED} (URL-encoded) or L{SPARQLWrapper.POSTDIRECTLY} (POST directly). All other cases are ignored.
473 @type method: string
474 """
475 if method in _REQUEST_METHODS:
476 self.requestMethod = method
477 else:
478 warnings.warn("invalid update method '%s'" % method, RuntimeWarning)
479
481 """
482 Add a default graph URI.
483 @param uri: URI of the graph
484 @type uri: string
485 @deprecated: use addParameter("default-graph-uri", uri) instead of this method
486 """
487 self.addParameter("default-graph-uri", uri)
488
490 """
491 Add a named graph URI.
492 @param uri: URI of the graph
493 @type uri: string
494 @deprecated: use addParameter("named-graph-uri", uri) instead of this method
495 """
496 self.addParameter("named-graph-uri", uri)
497
499 """
500 Some SPARQL endpoints require extra key value pairs.
501 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing
502 virtuoso to retrieve graphs that are not stored in its local database.
503 Alias of L{SPARQLWrapper.addParameter} method.
504 @param key: key of the query part
505 @type key: string
506 @param value: value of the query part
507 @type value: string
508 @deprecated: use addParameter(key, value) instead of this method
509 """
510 self.addParameter(key, value)
511
513 """
514 Method is kept for backwards compatibility. Historically, it "replaces" parameters instead of adding.
515 @param name: name
516 @type name: string
517 @param value: value
518 @type value: string
519 @return: Returns a boolean indicating if the adding has been accomplished.
520 @rtype: bool
521 @deprecated: use addParameter(name, value) instead of this method
522 """
523 self.clearParameter(name)
524 return self.addParameter(name, value)
525
527 """
528 Some SPARQL endpoints allow extra key value pairs.
529 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing
530 virtuoso to retrieve graphs that are not stored in its local database.
531 If the param C{query} is tried to be set, this intent is dismissed.
532 Returns a boolean indicating if the set has been accomplished.
533 @param name: name
534 @type name: string
535 @param value: value
536 @type value: string
537 @return: Returns a boolean indicating if the adding has been accomplished.
538 @rtype: bool
539 """
540 if name in _SPARQL_PARAMS:
541 return False
542 else:
543 if name not in self.parameters:
544 self.parameters[name] = []
545 self.parameters[name].append(value)
546 return True
547
549 """
550 Add a custom HTTP header (this method can override all HTTP headers).
551 IMPORTANT: Take into acount that each previous value for the header field names
552 C{Content-Type}, C{User-Agent}, C{Accept} and C{Authorization} would be overriden
553 if the header field name is present as value of the parameter C{httpHeaderName}.
554 @since: 1.8.2
555
556 @param httpHeaderName: The header field name.
557 @type httpHeaderName: string
558 @param httpHeaderValue: The header field value.
559 @type httpHeaderValue: string
560 """
561 self.customHttpHeaders[httpHeaderName] = httpHeaderValue
562
564 """
565 Clear the values of a custom Http Header previously setted.
566 Returns a boolean indicating if the clearing has been accomplished.
567 @since: 1.8.2
568
569 @param httpHeaderName: name
570 @type httpHeaderName: string
571 @return: Returns a boolean indicating if the clearing has been accomplished.
572 @rtype: bool
573 """
574 try:
575 del self.customHttpHeaders[httpHeaderName]
576 return True
577 except KeyError:
578 return False
579
581 """
582 Clear the values of a concrete parameter.
583 Returns a boolean indicating if the clearing has been accomplished.
584 @param name: name
585 @type name: string
586 @return: Returns a boolean indicating if the clearing has been accomplished.
587 @rtype: bool
588 """
589 if name in _SPARQL_PARAMS:
590 return False
591 else:
592 try:
593 del self.parameters[name]
594 return True
595 except KeyError:
596 return False
597
599 """
600 Set the credentials for querying the current endpoint.
601 @param user: username
602 @type user: string
603 @param passwd: password
604 @type passwd: string
605 @param realm: realm. Only used for L{DIGEST} authentication. Default is C{SPARQL}
606 @type realm: string
607 @change: Added C{realm} parameter since version C{1.8.3}.
608 """
609 self.user = user
610 self.passwd = passwd
611 self.realm = realm
612
614 """
615 Set the HTTP Authentication type. Possible values are L{BASIC} or L{DIGEST}.
616 @param auth: auth type
617 @type auth: string
618 @raise TypeError: If the C{auth} parameter is not an string.
619 @raise ValueError: If the C{auth} parameter has not one of the valid values: L{BASIC} or L{DIGEST}.
620 """
621 if not isinstance(auth, str):
622 raise TypeError('setHTTPAuth takes a string')
623 elif auth.upper() in _allowedAuth:
624 self.http_auth = auth.upper()
625 else:
626 valid_types = ", ".join(_allowedAuth)
627 raise ValueError("Value should be one of {0}".format(valid_types))
628
630 """
631 Set the SPARQL query text. Note: no check is done on the validity of the query
632 (syntax or otherwise) by this module, except for testing the query type (SELECT,
633 ASK, etc). Syntax and validity checking is done by the SPARQL service itself.
634 @param query: query text
635 @type query: string
636 @raise TypeError: If the C{query} parameter is not an unicode-string or utf-8 encoded byte-string.
637 """
638 if sys.version < '3':
639 if isinstance(query, unicode):
640 pass
641 elif isinstance(query, str):
642 query = query.decode('utf-8')
643 else:
644 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings')
645 else:
646 if isinstance(query, str):
647 pass
648 elif isinstance(query, bytes):
649 query = query.decode('utf-8')
650 else:
651 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings')
652
653 self.queryString = query
654 self.queryType = self._parseQueryType(query)
655
657 """
658 Internal method for parsing the SPARQL query and return its type (ie, L{SELECT}, L{ASK}, etc).
659
660 Note that the method returns L{SELECT} if nothing is specified. This is just to get all other
661 methods running; in fact, this means that the query is erroneous, because the query must be,
662 according to the SPARQL specification, one of Select, Ask, Describe, or Construct. The
663 SPARQL endpoint should raise an exception (via urllib) for such syntax error.
664
665 @param query: query text
666 @type query: string
667 @return: the type of SPARQL query (aka SPARQL query form)
668 @rtype: string
669 """
670 try:
671 query = query if (isinstance(query, str)) else query.encode('ascii', 'ignore')
672 query = self._cleanComments(query)
673 query_for_queryType = re.sub(self.prefix_pattern, "", query.strip())
674 r_queryType = self.pattern.search(query_for_queryType).group("queryType").upper()
675 except AttributeError:
676 warnings.warn("not detected query type for query '%s'" % query.replace("\n", " "), RuntimeWarning)
677 r_queryType = None
678
679 if r_queryType in _allowedQueryTypes:
680 return r_queryType
681 else:
682
683 warnings.warn("unknown query type '%s'" % r_queryType, RuntimeWarning)
684 return SELECT
685
687 """Set the invocation method. By default, this is L{GET}, but can be set to L{POST}.
688 @param method: should be either L{GET} or L{POST}. Other cases are ignored.
689 @type method: string
690 """
691 if method in _allowedRequests:
692 self.method = method
693
695 """Make urllib2 use keep-alive.
696 @raise ImportError: when could not be imported keepalive.HTTPHandler
697 """
698 try:
699 from keepalive import HTTPHandler
700
701 if urllib2._opener and any(isinstance(h, HTTPHandler) for h in urllib2._opener.handlers):
702
703 return
704
705 keepalive_handler = HTTPHandler()
706 opener = urllib2.build_opener(keepalive_handler)
707 urllib2.install_opener(opener)
708 except ImportError:
709 warnings.warn("keepalive support not available, so the execution of this method has no effect")
710
712 """ Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Update request.
713 @return: Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Update request
714 @rtype: bool
715 """
716 return self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD]
717
719 """ Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Query request.
720 @return: Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Query request.
721 @rtype: bool
722 """
723 return not self.isSparqlUpdateRequest()
724
733
735 """ Internal method for getting the request encoded parameters.
736 @param query: a tuple of two items. The first item can be the string
737 C{query} (for L{SELECT}, L{DESCRIBE}, L{ASK}, L{CONSTRUCT} query) or the string C{update}
738 (for SPARQL Update queries, like L{DELETE} or L{INSERT}). The second item of the tuple
739 is the query string itself.
740 @type query: tuple
741 @return: the request encoded parameters.
742 @rtype: string
743 """
744 query_parameters = self.parameters.copy()
745
746
747 if query and (isinstance(query, tuple)) and len(query) == 2:
748 query_parameters[query[0]] = [query[1]]
749
750 if not self.isSparqlUpdateRequest():
751
752
753
754
755 if not self.onlyConneg:
756 for f in _returnFormatSetting:
757 query_parameters[f] = [self.returnFormat]
758
759
760
761 if self.returnFormat in [TSV, JSONLD, RDFXML]:
762 acceptHeader = self._getAcceptHeader()
763 if "*/*" in acceptHeader:
764 acceptHeader = ""
765 query_parameters[f] += [acceptHeader]
766
767 pairs = (
768 "%s=%s" % (
769 urllib.quote_plus(param.encode('UTF-8'), safe='/'),
770 urllib.quote_plus(value.encode('UTF-8'), safe='/')
771 )
772 for param, values in query_parameters.items() for value in values
773 )
774 return '&'.join(pairs)
775
777 """ Internal method for getting the HTTP Accept Header.
778 @see: U{Hypertext Transfer Protocol -- HTTP/1.1 - Header Field Definitions<https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1>}
779 """
780 if self.queryType in [SELECT, ASK]:
781 if self.returnFormat == XML:
782 acceptHeader = ",".join(_SPARQL_XML)
783 elif self.returnFormat == JSON:
784 acceptHeader = ",".join(_SPARQL_JSON)
785 elif self.returnFormat == CSV:
786 acceptHeader = ",".join(_CSV)
787 elif self.returnFormat == TSV:
788 acceptHeader = ",".join(_TSV)
789 else:
790 acceptHeader = ",".join(_ALL)
791 warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning)
792 elif self.queryType in [CONSTRUCT, DESCRIBE]:
793 if self.returnFormat == N3 or self.returnFormat == TURTLE:
794 acceptHeader = ",".join(_RDF_N3)
795 elif self.returnFormat == XML or self.returnFormat == RDFXML:
796 acceptHeader = ",".join(_RDF_XML)
797 elif self.returnFormat == JSONLD and JSONLD in _allowedFormats:
798 acceptHeader = ",".join(_RDF_JSONLD)
799 else:
800 acceptHeader = ",".join(_ALL)
801 warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning)
802 elif self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD]:
803 if self.returnFormat == XML:
804 acceptHeader = ",".join(_SPARQL_XML)
805 elif self.returnFormat == JSON:
806 acceptHeader = ",".join(_SPARQL_JSON)
807 else:
808 acceptHeader = ",".join(_ALL)
809 else:
810 acceptHeader = "*/*"
811 return acceptHeader
812
814 """Internal method to create request according a HTTP method. Returns a
815 C{urllib2.Request} object of the urllib2 Python library
816 @raise NotImplementedError: If the C{HTTP authentification} method is not one of the valid values: L{BASIC} or L{DIGEST}.
817 @return: request a C{urllib2.Request} object of the urllib2 Python library
818 """
819 request = None
820
821 if self.isSparqlUpdateRequest():
822
823 uri = self.updateEndpoint
824
825 if self.method != POST:
826 warnings.warn("update operations MUST be done by POST")
827
828 if self.requestMethod == POSTDIRECTLY:
829 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters())
830 request.add_header("Content-Type", "application/sparql-update")
831 request.data = self.queryString.encode('UTF-8')
832 else:
833 request = urllib2.Request(uri)
834 request.add_header("Content-Type", "application/x-www-form-urlencoded")
835 request.data = self._getRequestEncodedParameters(("update", self.queryString)).encode('ascii')
836 else:
837
838 uri = self.endpoint
839
840 if self.method == POST:
841 if self.requestMethod == POSTDIRECTLY:
842 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters())
843 request.add_header("Content-Type", "application/sparql-query")
844 request.data = self.queryString.encode('UTF-8')
845 else:
846 request = urllib2.Request(uri)
847 request.add_header("Content-Type", "application/x-www-form-urlencoded")
848 request.data = self._getRequestEncodedParameters(("query", self.queryString)).encode('ascii')
849 else:
850 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters(("query", self.queryString)))
851
852 request.add_header("User-Agent", self.agent)
853 request.add_header("Accept", self._getAcceptHeader())
854 if self.user and self.passwd:
855 if self.http_auth == BASIC:
856 credentials = "%s:%s" % (self.user, self.passwd)
857 request.add_header("Authorization", "Basic %s" % base64.b64encode(credentials.encode('utf-8')).decode('utf-8'))
858 elif self.http_auth == DIGEST:
859 realm = self.realm
860 pwd_mgr = urllib2.HTTPPasswordMgr()
861 pwd_mgr.add_password(realm, uri, self.user, self.passwd)
862 opener = urllib2.build_opener()
863 opener.add_handler(urllib2.HTTPDigestAuthHandler(pwd_mgr))
864 urllib2.install_opener(opener)
865 else:
866 valid_types = ", ".join(_allowedAuth)
867 raise NotImplementedError("Expecting one of: {0}, but received: {1}".format(valid_types,
868 self.http_auth))
869
870
871 for customHttpHeader in self.customHttpHeaders:
872 request.add_header(customHttpHeader, self.customHttpHeaders[customHttpHeader])
873
874 return request
875
877 """Internal method to execute the query. Returns the output of the
878 C{urllib2.urlopen} method of the standard Python library
879
880 @return: tuples with the raw request plus the expected format.
881 @raise QueryBadFormed: If the C{HTTP return code} is C{400}.
882 @raise Unauthorized: If the C{HTTP return code} is C{401}.
883 @raise EndPointNotFound: If the C{HTTP return code} is C{404}.
884 @raise URITooLong: If the C{HTTP return code} is C{414}.
885 @raise EndPointInternalError: If the C{HTTP return code} is C{500}.
886 """
887 request = self._createRequest()
888
889 try:
890 if self.timeout:
891 response = urlopener(request, timeout=self.timeout)
892 else:
893 response = urlopener(request)
894 return response, self.returnFormat
895 except urllib2.HTTPError, e:
896 if e.code == 400:
897 raise QueryBadFormed(e.read())
898 elif e.code == 404:
899 raise EndPointNotFound(e.read())
900 elif e.code == 401:
901 raise Unauthorized(e.read())
902 elif e.code == 414:
903 raise URITooLong(e.read())
904 elif e.code == 500:
905 raise EndPointInternalError(e.read())
906 else:
907 raise e
908
910 """
911 Execute the query.
912 Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the
913 case when the query is syntactically incorrect, leading to an HTTP error sent back by the SPARQL endpoint).
914 The usual urllib2 exceptions are raised, which therefore cover possible SPARQL errors, too.
915
916 Note that some combinations of return formats and query types may not make sense. For example,
917 a SELECT query with Turtle response is meaningless (the output of a SELECT is not a Graph), or a CONSTRUCT
918 query with JSON output may be a problem because, at the moment, there is no accepted JSON serialization
919 of RDF (let alone one implemented by SPARQL endpoints). In such cases the returned media type of the result is
920 unpredictable and may differ from one SPARQL endpoint implementation to the other. (Endpoints usually fall
921 back to one of the "meaningful" formats, but it is up to the specific implementation to choose which
922 one that is.)
923
924 @return: query result
925 @rtype: L{QueryResult} instance
926 """
927 return QueryResult(self._query())
928
930 """Macro like method: issue a query and return the converted results.
931 @return: the converted query result. See the conversion methods for more details.
932 """
933 res = self.query()
934 return res.convert()
935
937 """This method returns the string representation of a L{SPARQLWrapper} object.
938 @return: A human-readable string of the object.
939 @rtype: string
940 @since: 1.8.3
941 """
942 fullname = self.__module__ + "." + self.__class__.__name__
943 items = ('"%s" : %r' % (k, v) for k, v in sorted(self.__dict__.items()))
944 str_dict_items = "{%s}" % (',\n'.join(items))
945 return "<%s object at 0x%016X>\n%s" % (fullname, id(self), str_dict_items)
946
947
948
949
950
952 """
953 Wrapper around an a query result. Users should not create instances of this class, it is
954 generated by a L{SPARQLWrapper.query} call. The results can be
955 converted to various formats, or used directly.
956
957 If used directly: the class gives access to the direct http request results
958 L{self.response}: it is a file-like object with two additional methods: C{geturl()} to
959 return the URL of the resource retrieved and
960 C{info()} that returns the meta-information of the HTTP result as a dictionary-like object
961 (see the urllib2 standard library module of Python).
962
963 For convenience, these methods are also available on the instance. The C{__iter__} and
964 C{next} methods are also implemented (by mapping them to L{self.response}). This means that the
965 common idiom::
966 for l in obj : do_something_with_line(l)
967 would work, too.
968
969 @ivar response: the direct HTTP response; a file-like object, as return by the C{urllib2.urlopen} library call.
970 @ivar requestedFormat: The requested format. The possible values are: L{JSON}, L{XML}, L{RDFXML}, L{TURTLE}, L{N3}, L{RDF}, L{CSV}, L{TSV}, L{JSONLD}.
971 @type requestedFormat: string
972 """
974 """
975 @param result: HTTP response stemming from a L{SPARQLWrapper.query} call, or a tuple with the expected format: (response,format)
976 """
977 if isinstance(result, tuple):
978 self.response = result[0]
979 self.requestedFormat = result[1]
980 else:
981 self.response = result
982
984 """Return the URL of the original call.
985 @return: URL of the original call
986 @rtype: string
987 """
988 return self.response.geturl()
989
991 """Return the meta-information of the HTTP result.
992 @return: meta information of the HTTP result
993 @rtype: dict
994 """
995 return KeyCaseInsensitiveDict(self.response.info())
996
998 """Return an iterator object. This method is expected for the inclusion
999 of the object in a standard C{for} loop.
1000 """
1001 return self.response.__iter__()
1002
1004 """Method for the standard iterator."""
1005 return self.response.next()
1006
1008 """
1009 Convert a JSON result into a Python dict. This method can be overwritten in a subclass
1010 for a different conversion method.
1011 @return: converted result
1012 @rtype: dict
1013 """
1014 return json.loads(self.response.read().decode("utf-8"))
1015
1017 """
1018 Convert an XML result into a Python dom tree. This method can be overwritten in a
1019 subclass for a different conversion method.
1020 @return: converted result
1021 @rtype: PyXlib DOM node
1022 """
1023 from xml.dom.minidom import parse
1024 return parse(self.response)
1025
1027 """
1028 Convert a RDF/XML result into an RDFLib triple store. This method can be overwritten
1029 in a subclass for a different conversion method.
1030 @return: converted result
1031 @rtype: RDFLib C{Graph}
1032 """
1033 try:
1034 from rdflib.graph import ConjunctiveGraph
1035 except ImportError:
1036 from rdflib import ConjunctiveGraph
1037 retval = ConjunctiveGraph()
1038
1039
1040 retval.load(self.response)
1041 return retval
1042
1044 """
1045 Convert a RDF Turtle/N3 result into a string. This method can be overwritten in a subclass
1046 for a different conversion method.
1047 @return: converted result
1048 @rtype: string
1049 """
1050 return self.response.read()
1051
1053 """
1054 Convert a CSV result into a string. This method can be overwritten in a subclass
1055 for a different conversion method.
1056 @return: converted result
1057 @rtype: string
1058 """
1059 return self.response.read()
1060
1062 """
1063 Convert a TSV result into a string. This method can be overwritten in a subclass
1064 for a different conversion method.
1065 @return: converted result
1066 @rtype: string
1067 """
1068 return self.response.read()
1069
1071 """
1072 Convert a RDF JSON-LD result into an RDFLib triple store. This method can be overwritten
1073 in a subclass for a different conversion method.
1074 @return: converted result
1075 @rtype: RDFLib Graph
1076 """
1077 from rdflib import ConjunctiveGraph
1078 retval = ConjunctiveGraph()
1079 retval.load(self.response, format='json-ld')
1080 return retval
1081
1083 """
1084 Encode the return value depending on the return format:
1085 - in the case of XML, a DOM top element is returned;
1086 - in the case of JSON, a simplejson conversion will return a dictionary;
1087 - in the case of RDF/XML, the value is converted via RDFLib into a C{Graph} instance;
1088 - in the case of JSON-LD, the value is converted via RDFLib into a C{Graph} instance;
1089 - in the case of RDF Turtle/N3, a string is returned;
1090 - in the case of CSV/TSV, a string is returned.
1091 In all other cases the input simply returned.
1092
1093 @return: the converted query result. See the conversion methods for more details.
1094 """
1095 def _content_type_in_list(real, expected):
1096 """ Internal method for checking if the content-type header received matches any of the content types of the expected list.
1097 @param real: The content-type header received.
1098 @type real: string
1099 @param expected: A list of expected content types.
1100 @type expected: list
1101 @return: Returns a boolean after checking if the content-type header received matches any of the content types of the expected list.
1102 @rtype: boolean
1103 """
1104 return True in [real.find(mime) != -1 for mime in expected]
1105
1106 def _validate_format(format_name, allowed, mime, requested):
1107 """ Internal method for validating if the requested format is one of the allowed formats.
1108 @param format_name: The format name (to be used in the warning message).
1109 @type format_name: string
1110 @param allowed: A list of allowed content types.
1111 @type allowed: list
1112 @param mime: The content-type header received (to be used in the warning message).
1113 @type mime: string
1114 @param requested: the requested format.
1115 @type requested: string
1116 """
1117 if requested not in allowed:
1118 message = "Format requested was %s, but %s (%s) has been returned by the endpoint"
1119 warnings.warn(message % (requested.upper(), format_name, mime), RuntimeWarning)
1120
1121
1122 if "content-type" in self.info():
1123 ct = self.info()["content-type"]
1124
1125 if _content_type_in_list(ct, _SPARQL_XML):
1126 _validate_format("XML", [XML], ct, self.requestedFormat)
1127 return self._convertXML()
1128 elif _content_type_in_list(ct, _XML):
1129 _validate_format("XML", [XML], ct, self.requestedFormat)
1130 return self._convertXML()
1131 elif _content_type_in_list(ct, _SPARQL_JSON):
1132 _validate_format("JSON", [JSON], ct, self.requestedFormat)
1133 return self._convertJSON()
1134 elif _content_type_in_list(ct, _RDF_XML):
1135 _validate_format("RDF/XML", [RDF, XML, RDFXML], ct, self.requestedFormat)
1136 return self._convertRDF()
1137 elif _content_type_in_list(ct, _RDF_N3):
1138 _validate_format("N3", [N3, TURTLE], ct, self.requestedFormat)
1139 return self._convertN3()
1140 elif _content_type_in_list(ct, _CSV):
1141 _validate_format("CSV", [CSV], ct, self.requestedFormat)
1142 return self._convertCSV()
1143 elif _content_type_in_list(ct, _TSV):
1144 _validate_format("TSV", [TSV], ct, self.requestedFormat)
1145 return self._convertTSV()
1146 elif _content_type_in_list(ct, _RDF_JSONLD):
1147 _validate_format("JSON(-LD)", [JSONLD, JSON], ct, self.requestedFormat)
1148 return self._convertJSONLD()
1149 else:
1150 warnings.warn("unknown response content type '%s' returning raw response..." %(ct), RuntimeWarning)
1151 return self.response.read()
1152
1173
1174 if "content-type" in self.info():
1175 ct = self.info()["content-type"]
1176
1177 if _content_type_in_list(ct, _SPARQL_XML):
1178 return XML
1179 elif _content_type_in_list(ct, _XML):
1180 return XML
1181 elif _content_type_in_list(ct, _SPARQL_JSON):
1182 return JSON
1183 elif _content_type_in_list(ct, _RDF_XML):
1184 return RDFXML
1185 elif _content_type_in_list(ct, _RDF_TURTLE):
1186 return TURTLE
1187 elif _content_type_in_list(ct, _RDF_N3):
1188 return N3
1189 elif _content_type_in_list(ct, _CSV):
1190 return CSV
1191 elif _content_type_in_list(ct, _TSV):
1192 return TSV
1193 elif _content_type_in_list(ct, _RDF_JSONLD):
1194 return JSONLD
1195 else:
1196 warnings.warn("Unknown response content type. Returning raw content-type ('%s')." %(ct), RuntimeWarning)
1197 return ct
1198 return None
1199
1201 """This method prints a representation of a L{QueryResult} object that MUST has as response format L{JSON}.
1202 @param minWidth: The minimun width, counting as characters. The default value is C{None}.
1203 @type minWidth: string
1204 """
1205
1206
1207 responseFormat = self._get_responseFormat()
1208 if responseFormat != JSON:
1209 message = "Format return was %s, but JSON was expected. No printing."
1210 warnings.warn(message % (responseFormat), RuntimeWarning)
1211 return
1212
1213 results = self._convertJSON()
1214 if minWidth:
1215 width = self.__get_results_width(results, minWidth)
1216 else:
1217 width = self.__get_results_width(results)
1218 index = 0
1219 for var in results["head"]["vars"]:
1220 print ("?" + var).ljust(width[index]), "|",
1221 index += 1
1222 print
1223 print "=" * (sum(width) + 3 * len(width))
1224 for result in results["results"]["bindings"]:
1225 index = 0
1226 for var in results["head"]["vars"]:
1227 result_value = self.__get_prettyprint_string_sparql_var_result(result[var])
1228 print result_value.ljust(width[index]), "|",
1229 index += 1
1230 print
1231
1233 width = []
1234 for var in results["head"]["vars"]:
1235 width.append(max(minWidth, len(var)+1))
1236 for result in results["results"]["bindings"]:
1237 index = 0
1238 for var in results["head"]["vars"]:
1239 result_value = self.__get_prettyprint_string_sparql_var_result(result[var])
1240 width[index] = max(width[index], len(result_value))
1241 index += 1
1242 return width
1243
1245 value = result["value"]
1246 lang = result.get("xml:lang", None)
1247 datatype = result.get("datatype", None)
1248 if lang is not None:
1249 value += "@"+lang
1250 if datatype is not None:
1251 value += " ["+datatype+"]"
1252 return value
1253
1255 """This method returns the string representation of a L{QueryResult} object.
1256 @return: A human-readable string of the object.
1257 @rtype: string
1258 @since: 1.8.3
1259 """
1260 fullname = self.__module__ + "." + self.__class__.__name__
1261 str_requestedFormat = '"requestedFormat" : '+repr(self.requestedFormat)
1262 str_url = self.response.url
1263 str_code = self.response.code
1264 str_headers = self.response.info()
1265 str_response = '"response (a file-like object, as return by the urllib2.urlopen library call)" : {\n\t"url" : "%s",\n\t"code" : "%s",\n\t"headers" : %s}' % (str_url, str_code, str_headers)
1266 return "<%s object at 0x%016X>\n{%s,\n%s}" % (fullname, id(self), str_requestedFormat, str_response)
1267