Package SPARQLWrapper :: Module Wrapper
[hide private]
[frames] | no frames]

Source Code for Module SPARQLWrapper.Wrapper

  1  # -*- coding: utf-8 -*- 
  2  # epydoc 
  3  # 
  4  """ 
  5  @var JSON: to be used to set the return format to JSON 
  6  @var XML: to be used to set the return format to XML (SPARQL XML format or RDF/XML, depending on the query type). This is the default. 
  7  @var TURTLE: to be used to set the return format to Turtle 
  8  @var N3: to be used to set the return format to N3 (for most of the SPARQL services this is equivalent to Turtle) 
  9  @var RDF: to be used to set the return RDF Graph 
 10   
 11  @var POST: to be used to set HTTP POST 
 12  @var GET: to be used to set HTTP GET. This is the default. 
 13   
 14  @var SELECT: to be used to set the query type to SELECT. This is, usually, determined automatically. 
 15  @var CONSTRUCT: to be used to set the query type to CONSTRUCT. This is, usually, determined automatically. 
 16  @var ASK: to be used to set the query type to ASK. This is, usually, determined automatically. 
 17  @var DESCRIBE: to be used to set the query type to DESCRIBE. This is, usually, determined automatically. 
 18   
 19  @see: U{SPARQL Specification<http://www.w3.org/TR/rdf-sparql-query/>} 
 20  @authors: U{Ivan Herman<http://www.ivan-herman.net>}, U{Sergio Fernández<http://www.wikier.org>}, U{Carlos Tejo Alonso<http://www.dayures.net>} 
 21  @organization: U{World Wide Web Consortium<http://www.w3.org>}, U{Salzburg Research<http://www.salzburgresearch.at>} and U{Foundation CTIC<http://www.fundacionctic.org/>}. 
 22  @license: U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/copyright-software">} 
 23  @requires: U{RDFLib<http://rdflib.net>} package. 
 24  """ 
 25   
 26  import urllib 
 27  import urllib2 
 28  from urllib2 import urlopen as urlopener  # don't change the name: tests override it 
 29  import socket 
 30  import base64 
 31  import re 
 32  import sys 
 33  import warnings 
 34   
 35  import json 
 36  from KeyCaseInsensitiveDict import KeyCaseInsensitiveDict 
 37  from SPARQLExceptions import QueryBadFormed, EndPointNotFound, EndPointInternalError 
 38  from SPARQLUtils import deprecated 
 39  from SPARQLWrapper import __agent__ 
 40   
 41  #  Possible output format keys... 
 42  JSON   = "json" 
 43  JSONLD = "json-ld" 
 44  XML    = "xml" 
 45  TURTLE = "turtle" 
 46  N3     = "n3" 
 47  RDF    = "rdf" 
 48  _allowedFormats = [JSON, XML, TURTLE, N3, RDF] 
 49   
 50  # Possible HTTP methods 
 51  POST = "POST" 
 52  GET  = "GET" 
 53  _allowedRequests = [POST, GET] 
 54   
 55  # Possible HTTP Authentication methods 
 56  BASIC = "BASIC" 
 57  DIGEST = "DIGEST" 
 58  _allowedAuth = [BASIC, DIGEST] 
 59   
 60  # Possible SPARQL/SPARUL query type 
 61  SELECT     = "SELECT" 
 62  CONSTRUCT  = "CONSTRUCT" 
 63  ASK        = "ASK" 
 64  DESCRIBE   = "DESCRIBE" 
 65  INSERT     = "INSERT" 
 66  DELETE     = "DELETE" 
 67  CREATE     = "CREATE" 
 68  CLEAR      = "CLEAR" 
 69  DROP       = "DROP" 
 70  LOAD       = "LOAD" 
 71  COPY       = "COPY" 
 72  MOVE       = "MOVE" 
 73  ADD        = "ADD" 
 74  _allowedQueryTypes = [SELECT, CONSTRUCT, ASK, DESCRIBE, INSERT, DELETE, CREATE, CLEAR, DROP, 
 75                        LOAD, COPY, MOVE, ADD] 
 76   
 77  # Possible methods to perform requests 
 78  URLENCODED = "urlencoded" 
 79  POSTDIRECTLY = "postdirectly" 
 80  _REQUEST_METHODS  = [URLENCODED, POSTDIRECTLY] 
 81   
 82  # Possible output format (mime types) that can be converted by the local script. Unfortunately, 
 83  # it does not work by simply setting the return format, because there is still a certain level of confusion 
 84  # among implementations. 
 85  # For example, Joseki returns application/javascript and not the sparql-results+json thing that is required... 
 86  # Ie, alternatives should be given... 
 87  # Andy Seaborne told me (June 2007) that the right return format is now added to his CVS, ie, future releases of 
 88  # joseki will be o.k., too. The situation with turtle and n3 is even more confusing because the text/n3 and text/turtle 
 89  # mime types have just been proposed and not yet widely used... 
 90  _SPARQL_DEFAULT  = ["application/sparql-results+xml", "application/rdf+xml", "*/*"] 
 91  _SPARQL_XML      = ["application/sparql-results+xml"] 
 92  _SPARQL_JSON     = ["application/sparql-results+json", "text/javascript", "application/json"] 
 93  _RDF_XML         = ["application/rdf+xml"] 
 94  _RDF_N3          = ["text/rdf+n3", "application/n-triples", "application/turtle", "application/n3", "text/n3", "text/turtle"] 
 95  _RDF_JSONLD      = ["application/x-json+ld", "application/ld+json"] 
 96  _ALL             = ["*/*"] 
 97  _RDF_POSSIBLE    = _RDF_XML + _RDF_N3 
 98  _SPARQL_POSSIBLE = _SPARQL_XML + _SPARQL_JSON + _RDF_XML + _RDF_N3 
 99  _SPARQL_PARAMS   = ["query"] 
100   
101  try: 
102      import rdflib_jsonld 
103      _allowedFormats.append(JSONLD) 
104      _RDF_POSSIBLE = _RDF_POSSIBLE + _RDF_JSONLD 
105  except ImportError: 
106      #warnings.warn("JSON-LD disabled because no suitable support has been found", RuntimeWarning) 
107      pass 
108   
109  # This is very ugly. The fact is that the key for the choice of the output format is not defined.  
110  # Virtuoso uses 'format', joseki uses 'output', rasqual seems to use "results", etc. Lee Feigenbaum  
111  # told me that virtuoso also understand 'output' these days, so I removed 'format'. I do not have  
112  # info about the others yet, ie, for the time being I keep the general mechanism. Hopefully, in a  
113  # future release, I can get rid of that. However, these processors are (hopefully) oblivious to the  
114  # parameters they do not understand. So: just repeat all possibilities in the final URI. UGLY!!!!!!! 
115  _returnFormatSetting = ["format", "output", "results"] 
116 117 ####################################################################################################### 118 119 120 -class SPARQLWrapper(object):
121 """ 122 Wrapper around an online access to a SPARQL Web entry point. 123 124 The same class instance can be reused for subsequent queries. The values of the base Graph URI, return formats, etc, 125 are retained from one query to the next (in other words, only the query string changes). The instance can also be 126 reset to its initial values using the L{resetQuery} method. 127 128 @cvar pattern: regular expression used to determine whether a query is of type L{CONSTRUCT}, L{SELECT}, L{ASK}, or L{DESCRIBE}. 129 @type pattern: compiled regular expression (see the C{re} module of Python) 130 @ivar baseURI: the URI of the SPARQL service 131 """ 132 pattern = re.compile(r""" 133 ((?P<base>(\s*BASE\s*<.*?>)\s*)|(?P<prefixes>(\s*PREFIX\s+.+:\s*<.*?>)\s*))* 134 (?P<queryType>(CONSTRUCT|SELECT|ASK|DESCRIBE|INSERT|DELETE|CREATE|CLEAR|DROP|LOAD|COPY|MOVE|ADD)) 135 """, re.VERBOSE | re.IGNORECASE) 136
137 - def __init__(self, endpoint, updateEndpoint=None, returnFormat=XML, defaultGraph=None, agent=__agent__):
138 """ 139 Class encapsulating a full SPARQL call. 140 @param endpoint: string of the SPARQL endpoint's URI 141 @type endpoint: string 142 @param updateEndpoint: string of the SPARQL endpoint's URI for update operations (if it's a different one) 143 @type updateEndpoint: string 144 @keyword returnFormat: Default: L{XML}. 145 Can be set to JSON or Turtle/N3 146 147 No local check is done, the parameter is simply 148 sent to the endpoint. Eg, if the value is set to JSON and a construct query is issued, it 149 is up to the endpoint to react or not, this wrapper does not check. 150 151 Possible values: 152 L{JSON}, L{XML}, L{TURTLE}, L{N3} (constants in this module). The value can also be set via explicit 153 call, see below. 154 @type returnFormat: string 155 @keyword defaultGraph: URI for the default graph. Default is None, the value can be set either via an L{explicit call<addDefaultGraph>} or as part of the query string. 156 @type defaultGraph: string 157 """ 158 self.endpoint = endpoint 159 self.updateEndpoint = updateEndpoint if updateEndpoint else endpoint 160 self.agent = agent 161 self.user = None 162 self.passwd = None 163 self.http_auth = BASIC 164 self._defaultGraph = defaultGraph 165 166 if returnFormat in _allowedFormats: 167 self._defaultReturnFormat = returnFormat 168 else: 169 self._defaultReturnFormat = XML 170 171 self.resetQuery()
172
173 - def resetQuery(self):
174 """Reset the query, ie, return format, query, default or named graph settings, etc, 175 are reset to their default values.""" 176 self.parameters = {} 177 if self._defaultGraph: 178 self.addParameter("default-graph-uri", self._defaultGraph) 179 self.returnFormat = self._defaultReturnFormat 180 self.method = GET 181 self.setQuery("""SELECT * WHERE{ ?s ?p ?o }""") 182 self.timeout = None 183 self.requestMethod = URLENCODED
184
185 - def setReturnFormat(self, format):
186 """Set the return format. If not an allowed value, the setting is ignored. 187 188 @param format: Possible values: are L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDF} (constants in this module). All other cases are ignored. 189 @type format: str 190 """ 191 if format in _allowedFormats : 192 self.returnFormat = format 193 elif format == JSONLD: 194 raise ValueError("Current instance does not support JSON-LD; you might want to install the rdflib-json package.") 195 else: 196 raise ValueError("Invalid format '%s'; current instance supports: %s.", (format, ", ".join(_allowedFormats)))
197
198 - def supportsReturnFormat(self, format):
199 """Check if a return format is supported. 200 201 @param format: Possible values: are L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDF} (constants in this module). All other cases are ignored. 202 @type format: bool 203 """ 204 return (format in _allowedFormats)
205
206 - def setTimeout(self, timeout):
207 """Set the timeout (in seconds) to use for querying the endpoint. 208 209 @param timeout: Timeout in seconds. 210 @type timeout: int 211 """ 212 self.timeout = int(timeout)
213
214 - def setRequestMethod(self, method):
215 """Set the internal method to use to perform the request for query or 216 update operations, either URL-encoded (C{SPARQLWrapper.URLENCODED}) or 217 POST directly (C{SPARQLWrapper.POSTDIRECTLY}). 218 Further details at U{http://www.w3.org/TR/sparql11-protocol/#query-operation} 219 and U{http://www.w3.org/TR/sparql11-protocol/#update-operation}. 220 221 @param method: method 222 @type method: str 223 """ 224 if method in _REQUEST_METHODS: 225 self.requestMethod = method 226 else: 227 warnings.warn("invalid update method '%s'" % method, RuntimeWarning)
228 229 @deprecated
230 - def addDefaultGraph(self, uri):
231 """ 232 Add a default graph URI. 233 @param uri: URI of the graph 234 @type uri: string 235 @deprecated: use addParameter("default-graph-uri", uri) instead of this method 236 """ 237 self.addParameter("default-graph-uri", uri)
238 239 @deprecated
240 - def addNamedGraph(self, uri):
241 """ 242 Add a named graph URI. 243 @param uri: URI of the graph 244 @type uri: string 245 @deprecated: use addParameter("named-graph-uri", uri) instead of this method 246 """ 247 self.addParameter("named-graph-uri", uri)
248 249 @deprecated
250 - def addExtraURITag(self, key, value):
251 """ 252 Some SPARQL endpoints require extra key value pairs. 253 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing 254 virtuoso to retrieve graphs that are not stored in its local database. 255 @param key: key of the query part 256 @type key: string 257 @param value: value of the query part 258 @type value: string 259 @deprecated: use addParameter(key, value) instead of this method 260 """ 261 self.addParameter(key, value)
262 263 @deprecated
264 - def addCustomParameter(self, name, value):
265 """ 266 Method is kept for backwards compatibility. Historically, it "replaces" parameters instead of adding 267 @param name: name 268 @type name: string 269 @param value: value 270 @type value: string 271 @rtype: bool 272 @deprecated: use addParameter(name, value) instead of this method 273 """ 274 self.clearParameter(name) 275 return self.addParameter(name, value)
276
277 - def addParameter(self, name, value):
278 """ 279 Some SPARQL endpoints allow extra key value pairs. 280 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing 281 virtuoso to retrieve graphs that are not stored in its local database. 282 @param name: name 283 @type name: string 284 @param value: value 285 @type value: string 286 @rtype: bool 287 """ 288 if name in _SPARQL_PARAMS: 289 return False 290 else: 291 if name not in self.parameters: 292 self.parameters[name] = [] 293 self.parameters[name].append(value) 294 return True
295
296 - def clearParameter(self, name):
297 """ 298 Clear the values ofd a concrete parameter. 299 @param name: name 300 @type name: string 301 @rtype: bool 302 """ 303 if name in _SPARQL_PARAMS: 304 return False 305 else: 306 try: 307 del self.parameters[name] 308 return True 309 except KeyError: 310 return False
311
312 - def setCredentials(self, user, passwd):
313 """ 314 Set the credentials for querying the current endpoint 315 @param user: username 316 @type user: string 317 @param passwd: password 318 @type passwd: string 319 """ 320 self.user = user 321 self.passwd = passwd
322
323 - def setHTTPAuth(self, auth):
324 """ 325 Set the HTTP Authentication type (Basic or Digest) 326 @param auth: auth type 327 @type auth: string 328 """ 329 if not isinstance(auth, str): 330 raise TypeError('setHTTPAuth takes a string') 331 elif auth.upper() in _allowedAuth: 332 self.http_auth = auth.upper() 333 else: 334 valid_types = ", ".join(_allowedAuth) 335 raise ValueError("Value should be one of {0}".format(valid_types))
336
337 - def setQuery(self, query):
338 """ 339 Set the SPARQL query text. Note: no check is done on the validity of the query 340 (syntax or otherwise) by this module, except for testing the query type (SELECT, 341 ASK, etc). Syntax and validity checking is done by the SPARQL service itself. 342 @param query: query text 343 @type query: string 344 @bug: #2320024 345 """ 346 if sys.version < '3': # have to write it like this, for 2to3 compatibility 347 if isinstance(query, unicode): 348 pass 349 elif isinstance(query, str): 350 query = query.decode('utf-8') 351 else: 352 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings') 353 else: 354 if isinstance(query, str): 355 pass 356 elif isinstance(query, bytes): 357 query = query.decode('utf-8') 358 else: 359 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings') 360 361 self.queryString = query 362 self.queryType = self._parseQueryType(query)
363
364 - def _parseQueryType(self,query):
365 """ 366 Parse the SPARQL query and return its type (ie, L{SELECT}, L{ASK}, etc). 367 368 Note that the method returns L{SELECT} if nothing is specified. This is just to get all other 369 methods running; in fact, this means that the query is erronous, because the query must be, 370 according to the SPARQL specification, one of Select, Ask, Describe, or Construct. The 371 SPARQL endpoint should raise an exception (via urllib) for such syntax error. 372 373 @param query: query text 374 @type query: string 375 @rtype: string 376 """ 377 try: 378 query = query if type(query)==str else query.encode('ascii', 'ignore') 379 query = re.sub(re.compile("#.*?\n" ), "" , query) # remove all occurance singleline comments (issue #32) 380 r_queryType = self.pattern.search(query).group("queryType").upper() 381 except AttributeError: 382 warnings.warn("not detected query type for query '%s'" % query.replace("\n", " "), RuntimeWarning) 383 r_queryType = None 384 385 if r_queryType in _allowedQueryTypes : 386 return r_queryType 387 else : 388 #raise Exception("Illegal SPARQL Query; must be one of SELECT, ASK, DESCRIBE, or CONSTRUCT") 389 warnings.warn("unknown query type '%s'" % r_queryType, RuntimeWarning) 390 return SELECT
391
392 - def setMethod(self,method):
393 """Set the invocation method. By default, this is L{GET}, but can be set to L{POST}. 394 @param method: should be either L{GET} or L{POST}. Other cases are ignored. 395 """ 396 if method in _allowedRequests : self.method = method
397
398 - def setUseKeepAlive(self):
399 """Make urllib2 use keep-alive. 400 @raise ImportError: when could not be imported keepalive.HTTPHandler 401 """ 402 try: 403 from keepalive import HTTPHandler 404 keepalive_handler = HTTPHandler() 405 opener = urllib2.build_opener(keepalive_handler) 406 urllib2.install_opener(opener) 407 except ImportError: 408 warnings.warn("keepalive support not available, so the execution of this method has no effect")
409
410 - def isSparqlUpdateRequest(self):
411 """ Returns TRUE if SPARQLWrapper is configured for executing SPARQL Update request 412 @return: bool 413 """ 414 return self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD]
415
416 - def isSparqlQueryRequest(self):
417 """ Returns TRUE if SPARQLWrapper is configured for executing SPARQL Query request 418 @return: bool 419 """ 420 return not self.isSparqlUpdateRequest()
421
422 - def _getRequestEncodedParameters(self, query=None):
423 query_parameters = self.parameters.copy() 424 425 if query and type(query) == tuple and len(query) == 2: 426 #tuple ("query"/"update", queryString) 427 query_parameters[query[0]] = [query[1]] 428 429 # This is very ugly. The fact is that the key for the choice of the output format is not defined. 430 # Virtuoso uses 'format',sparqler uses 'output' 431 # However, these processors are (hopefully) oblivious to the parameters they do not understand. 432 # So: just repeat all possibilities in the final URI. UGLY!!!!!!! 433 for f in _returnFormatSetting: 434 query_parameters[f] = [self.returnFormat] 435 436 pairs = ( 437 "%s=%s" % ( 438 urllib.quote_plus(param.encode('UTF-8'), safe='/'), 439 urllib.quote_plus(value.encode('UTF-8'), safe='/') 440 ) 441 for param, values in query_parameters.items() for value in values 442 ) 443 444 return '&'.join(pairs)
445
446 - def _getAcceptHeader(self):
447 if self.queryType in [SELECT, ASK]: 448 if self.returnFormat == XML: 449 acceptHeader = ",".join(_SPARQL_XML) 450 elif self.returnFormat == JSON: 451 acceptHeader = ",".join(_SPARQL_JSON) 452 else: 453 acceptHeader = ",".join(_ALL) 454 elif self.queryType in [INSERT, DELETE]: 455 acceptHeader = "*/*" 456 else: 457 if self.returnFormat == N3 or self.returnFormat == TURTLE: 458 acceptHeader = ",".join(_RDF_N3) 459 elif self.returnFormat == XML: 460 acceptHeader = ",".join(_RDF_XML) 461 elif self.returnFormat == JSONLD and JSONLD in _allowedFormats: 462 acceptHeader = ",".join(_RDF_JSONLD) 463 else: 464 acceptHeader = ",".join(_ALL) 465 return acceptHeader
466
467 - def _createRequest(self):
468 """Internal method to create request according a HTTP method. Returns a 469 C{urllib2.Request} object of the urllib2 Python library 470 @return: request 471 """ 472 request = None 473 474 if self.isSparqlUpdateRequest(): 475 #protocol details at http://www.w3.org/TR/sparql11-protocol/#update-operation 476 uri = self.updateEndpoint 477 478 if self.method != POST: 479 warnings.warn("update operations MUST be done by POST") 480 481 if self.requestMethod == POSTDIRECTLY: 482 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters()) 483 request.add_header("Content-Type", "application/sparql-update") 484 request.data = self.queryString.encode('UTF-8') 485 else: # URL-encoded 486 request = urllib2.Request(uri) 487 request.add_header("Content-Type", "application/x-www-form-urlencoded") 488 request.data = self._getRequestEncodedParameters(("update", self.queryString)).encode('ascii') 489 else: 490 #protocol details at http://www.w3.org/TR/sparql11-protocol/#query-operation 491 uri = self.endpoint 492 493 if self.method == POST: 494 if self.requestMethod == POSTDIRECTLY: 495 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters()) 496 request.add_header("Content-Type", "application/sparql-query") 497 request.data = self.queryString.encode('UTF-8') 498 else: # URL-encoded 499 request = urllib2.Request(uri) 500 request.add_header("Content-Type", "application/x-www-form-urlencoded") 501 request.data = self._getRequestEncodedParameters(("query", self.queryString)).encode('ascii') 502 else: # GET 503 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters(("query", self.queryString))) 504 505 request.add_header("User-Agent", self.agent) 506 request.add_header("Accept", self._getAcceptHeader()) 507 if self.user and self.passwd: 508 if self.http_auth == BASIC: 509 credentials = "%s:%s" % (self.user, self.passwd) 510 request.add_header("Authorization", "Basic %s" % base64.b64encode(credentials.encode('utf-8'))) 511 elif self.http_auth == DIGEST: 512 realm = "SPARQL" 513 pwd_mgr = urllib2.HTTPPasswordMgr() 514 pwd_mgr.add_password(realm, uri, self.user, self.passwd) 515 opener = urllib2.build_opener() 516 opener.add_handler(urllib2.HTTPDigestAuthHandler(pwd_mgr)) 517 urllib2.install_opener(opener) 518 else: 519 valid_types = ", ".join(_allowedAuth) 520 raise NotImplementedError("Expecting one of: {0}, but received: {1}".format(valid_types, 521 self.http_auth)) 522 523 return request
524
525 - def _query(self):
526 """Internal method to execute the query. Returns the output of the 527 C{urllib2.urlopen} method of the standard Python library 528 529 @return: tuples with the raw request plus the expected format 530 """ 531 if self.timeout: 532 socket.setdefaulttimeout(self.timeout) 533 534 request = self._createRequest() 535 536 try: 537 response = urlopener(request) 538 return response, self.returnFormat 539 except urllib2.HTTPError, e: 540 if e.code == 400: 541 raise QueryBadFormed(e.read()) 542 elif e.code == 404: 543 raise EndPointNotFound(e.read()) 544 elif e.code == 500: 545 raise EndPointInternalError(e.read()) 546 else: 547 raise e
548
549 - def query(self):
550 """ 551 Execute the query. 552 Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the 553 case when the query is syntactically incorrect, leading to an HTTP error sent back by the SPARQL endpoint). 554 The usual urllib2 exceptions are raised, which therefore cover possible SPARQL errors, too. 555 556 Note that some combinations of return formats and query types may not make sense. For example, 557 a SELECT query with Turtle response is meaningless (the output of a SELECT is not a Graph), or a CONSTRUCT 558 query with JSON output may be a problem because, at the moment, there is no accepted JSON serialization 559 of RDF (let alone one implemented by SPARQL endpoints). In such cases the returned media type of the result is 560 unpredictable and may differ from one SPARQL endpoint implementation to the other. (Endpoints usually fall 561 back to one of the "meaningful" formats, but it is up to the specific implementation to choose which 562 one that is.) 563 564 @return: query result 565 @rtype: L{QueryResult} instance 566 """ 567 return QueryResult(self._query())
568
569 - def queryAndConvert(self):
570 """Macro like method: issue a query and return the converted results. 571 @return: the converted query result. See the conversion methods for more details. 572 """ 573 res = self.query() 574 return res.convert()
575
576 ####################################################################################################### 577 578 579 -class QueryResult(object):
580 """ 581 Wrapper around an a query result. Users should not create instances of this class, it is 582 generated by a L{SPARQLWrapper.query} call. The results can be 583 converted to various formats, or used directly. 584 585 If used directly: the class gives access to the direct http request results 586 L{self.response}: it is a file-like object with two additional methods: C{geturl()} to 587 return the URL of the resource retrieved and 588 C{info()} that returns the meta-information of the HTTP result as a dictionary-like object 589 (see the urllib2 standard library module of Python). 590 591 For convenience, these methods are also available on the instance. The C{__iter__} and 592 C{next} methods are also implemented (by mapping them to L{self.response}). This means that the 593 common idiom:: 594 for l in obj : do_something_with_line(l) 595 would work, too. 596 597 @ivar response: the direct HTTP response; a file-like object, as return by the C{urllib2.urlopen} library call. 598 """
599 - def __init__(self,result):
600 """ 601 @param result: HTTP response stemming from a L{SPARQLWrapper.query} call, or a tuple with the expected format: (response,format) 602 """ 603 if (type(result) == tuple): 604 self.response = result[0] 605 self.requestedFormat = result[1] 606 else: 607 self.response = result 608 """Direct response, see class comments for details"""
609
610 - def geturl(self):
611 """Return the URI of the original call. 612 @return: URI 613 @rtype: string 614 """ 615 return self.response.geturl()
616
617 - def info(self):
618 """Return the meta-information of the HTTP result. 619 @return: meta information 620 @rtype: dictionary 621 """ 622 return KeyCaseInsensitiveDict(self.response.info())
623
624 - def __iter__(self):
625 """Return an iterator object. This method is expected for the inclusion 626 of the object in a standard C{for} loop. 627 """ 628 return self.response.__iter__()
629
630 - def next(self):
631 """Method for the standard iterator.""" 632 return self.response.next()
633
634 - def _convertJSON(self):
635 """ 636 Convert a JSON result into a Python dict. This method can be overwritten in a subclass 637 for a different conversion method. 638 @return: converted result 639 @rtype: Python dictionary 640 """ 641 return json.loads(self.response.read().decode("utf-8"))
642
643 - def _convertXML(self):
644 """ 645 Convert an XML result into a Python dom tree. This method can be overwritten in a 646 subclass for a different conversion method. 647 @return: converted result 648 @rtype: PyXlib DOM node 649 """ 650 from xml.dom.minidom import parse 651 return parse(self.response)
652
653 - def _convertRDF(self):
654 """ 655 Convert a RDF/XML result into an RDFLib triple store. This method can be overwritten 656 in a subclass for a different conversion method. 657 @return: converted result 658 @rtype: RDFLib Graph 659 """ 660 try: 661 from rdflib.graph import ConjunctiveGraph 662 except ImportError: 663 from rdflib import ConjunctiveGraph 664 retval = ConjunctiveGraph() 665 # this is a strange hack. If the publicID is not set, rdflib (or the underlying xml parser) makes a funny 666 #(and, as far as I could see, meaningless) error message... 667 retval.load(self.response, publicID=' ') 668 return retval
669
670 - def _convertN3(self):
671 """ 672 Convert a RDF Turtle/N3 result into a string. This method can be overwritten in a subclass 673 for a different conversion method. 674 @return: converted result 675 @rtype: string 676 """ 677 return self.response.read()
678
679 - def _convertJSONLD(self):
680 """ 681 Convert a RDF JSON-LDresult into an RDFLib triple store. This method can be overwritten 682 in a subclass for a different conversion method. 683 @return: converted result 684 @rtype: RDFLib Graph 685 """ 686 from rdflib import ConjunctiveGraph 687 retval = ConjunctiveGraph() 688 retval.load(self.response, format='json-ld', publicID=' ') 689 return retval
690
691 - def convert(self):
692 """ 693 Encode the return value depending on the return format: 694 - in the case of XML, a DOM top element is returned; 695 - in the case of JSON, a simplejson conversion will return a dictionary; 696 - in the case of RDF/XML, the value is converted via RDFLib into a Graph instance. 697 In all other cases the input simply returned. 698 699 @return: the converted query result. See the conversion methods for more details. 700 """ 701 def _content_type_in_list(real, expected): 702 return True in [real.find(mime) != -1 for mime in expected]
703 704 def _validate_format(format_name, allowed, mime, requested): 705 if requested not in allowed: 706 message = "Format requested was %s, but %s (%s) has been returned by the endpoint" 707 warnings.warn(message % (requested.upper(), format_name, mime), RuntimeWarning)
708 709 if "content-type" in self.info(): 710 ct = self.info()["content-type"] 711 712 if _content_type_in_list(ct, _SPARQL_XML): 713 _validate_format("XML", [XML], ct, self.requestedFormat) 714 return self._convertXML() 715 elif _content_type_in_list(ct, _SPARQL_JSON): 716 _validate_format("JSON", [JSON], ct, self.requestedFormat) 717 return self._convertJSON() 718 elif _content_type_in_list(ct, _RDF_XML): 719 _validate_format("RDF/XML", [RDF, XML], ct, self.requestedFormat) 720 return self._convertRDF() 721 elif _content_type_in_list(ct, _RDF_N3): 722 _validate_format("N3", [N3, TURTLE], ct, self.requestedFormat) 723 return self._convertN3() 724 elif _content_type_in_list(ct, _RDF_JSONLD): 725 _validate_format("JSON(-LD)", [JSONLD, JSON], ct, self.requestedFormat) 726 return self._convertJSONLD() 727 728 warnings.warn("unknown response content type, returning raw response...", RuntimeWarning) 729 return self.response.read() 730
731 - def print_results(self, minWidth=None):
732 results = self._convertJSON() 733 if minWidth : 734 width = self.__get_results_width(results, minWidth) 735 else : 736 width = self.__get_results_width(results) 737 index = 0 738 for var in results["head"]["vars"] : 739 print ("?" + var).ljust(width[index]),"|", 740 index += 1 741 print 742 print "=" * (sum(width) + 3 * len(width)) 743 for result in results["results"]["bindings"] : 744 index = 0 745 for var in results["head"]["vars"] : 746 result = self.__get_prettyprint_string_sparql_var_result(result[var]) 747 print result.ljust(width[index]),"|", 748 index += 1 749 print
750
751 - def __get_results_width(self, results, minWidth=2):
752 width = [] 753 for var in results["head"]["vars"] : 754 width.append(max(minWidth, len(var)+1)) 755 for result in results["results"]["bindings"] : 756 index = 0 757 for var in results["head"]["vars"] : 758 result = self.__get_prettyprint_string_sparql_var_result(result[var]) 759 width[index] = max(width[index], len(result)) 760 index =+ 1 761 return width
762
763 - def __get_prettyprint_string_sparql_var_result(self, result):
764 value = result["value"] 765 lang = result.get("xml:lang", None) 766 datatype = result.get("datatype",None) 767 if lang is not None: 768 value+="@"+lang 769 if datatype is not None: 770 value+=" ["+datatype+"]" 771 return value
772