Package SPARQLWrapper :: Module Wrapper
[hide private]
[frames] | no frames]

Source Code for Module SPARQLWrapper.Wrapper

  1  # -*- coding: utf-8 -*- 
  2  # epydoc 
  3  # 
  4  """ 
  5  @var JSON: to be used to set the return format to JSON 
  6  @var XML: to be used to set the return format to XML (SPARQL XML format or RDF/XML, depending on the query type). This is the default. 
  7  @var TURTLE: to be used to set the return format to Turtle 
  8  @var N3: to be used to set the return format to N3 (for most of the SPARQL services this is equivalent to Turtle) 
  9  @var RDF: to be used to set the return RDF Graph 
 10   
 11  @var POST: to be used to set HTTP POST 
 12  @var GET: to be used to set HTTP GET. This is the default. 
 13   
 14  @var SELECT: to be used to set the query type to SELECT. This is, usually, determined automatically. 
 15  @var CONSTRUCT: to be used to set the query type to CONSTRUCT. This is, usually, determined automatically. 
 16  @var ASK: to be used to set the query type to ASK. This is, usually, determined automatically. 
 17  @var DESCRIBE: to be used to set the query type to DESCRIBE. This is, usually, determined automatically. 
 18   
 19  @see: U{SPARQL Specification<http://www.w3.org/TR/rdf-sparql-query/>} 
 20  @authors: U{Ivan Herman<http://www.ivan-herman.net>}, U{Sergio Fernández<http://www.wikier.org>}, U{Carlos Tejo Alonso<http://www.dayures.net>} 
 21  @organization: U{World Wide Web Consortium<http://www.w3.org>}, U{Salzburg Research<http://www.salzburgresearch.at>} and U{Foundation CTIC<http://www.fundacionctic.org/>}. 
 22  @license: U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/copyright-software">} 
 23  @requires: U{RDFLib<http://rdflib.net>} package. 
 24  """ 
 25   
 26  import urllib 
 27  import urllib2 
 28  from urllib2 import urlopen as urlopener  # don't change the name: tests override it 
 29  import socket 
 30  import base64 
 31  import re 
 32  import sys 
 33  import warnings 
 34   
 35  import jsonlayer 
 36  from KeyCaseInsensitiveDict import KeyCaseInsensitiveDict 
 37  from SPARQLExceptions import QueryBadFormed, EndPointNotFound, EndPointInternalError 
 38  from SPARQLUtils import deprecated 
 39  from SPARQLWrapper import __agent__ 
 40   
 41  #  Possible output format keys... 
 42  JSON   = "json" 
 43  JSONLD = "json-ld" 
 44  XML    = "xml" 
 45  TURTLE = "turtle" 
 46  N3     = "n3" 
 47  RDF    = "rdf" 
 48  _allowedFormats = [JSON, XML, TURTLE, N3, RDF] 
 49   
 50  # Possible HTTP methods 
 51  POST = "POST" 
 52  GET  = "GET" 
 53  _allowedRequests = [POST, GET] 
 54   
 55  # Possible HTTP Authentication methods 
 56  BASIC = "BASIC" 
 57  DIGEST = "DIGEST" 
 58  _allowedAuth = [BASIC, DIGEST] 
 59   
 60  # Possible SPARQL/SPARUL query type 
 61  SELECT     = "SELECT" 
 62  CONSTRUCT  = "CONSTRUCT" 
 63  ASK        = "ASK" 
 64  DESCRIBE   = "DESCRIBE" 
 65  INSERT     = "INSERT" 
 66  DELETE     = "DELETE" 
 67  CREATE     = "CREATE" 
 68  CLEAR      = "CLEAR" 
 69  DROP       = "DROP" 
 70  LOAD       = "LOAD" 
 71  COPY       = "COPY" 
 72  MOVE       = "MOVE" 
 73  ADD        = "ADD" 
 74  _allowedQueryTypes = [SELECT, CONSTRUCT, ASK, DESCRIBE, INSERT, DELETE, CREATE, CLEAR, DROP, 
 75                        LOAD, COPY, MOVE, ADD] 
 76   
 77  # Possible methods to perform requests 
 78  URLENCODED = "urlencoded" 
 79  POSTDIRECTLY = "postdirectly" 
 80  _REQUEST_METHODS  = [URLENCODED, POSTDIRECTLY] 
 81   
 82  # Possible output format (mime types) that can be converted by the local script. Unfortunately, 
 83  # it does not work by simply setting the return format, because there is still a certain level of confusion 
 84  # among implementations. 
 85  # For example, Joseki returns application/javascript and not the sparql-results+json thing that is required... 
 86  # Ie, alternatives should be given... 
 87  # Andy Seaborne told me (June 2007) that the right return format is now added to his CVS, ie, future releases of 
 88  # joseki will be o.k., too. The situation with turtle and n3 is even more confusing because the text/n3 and text/turtle 
 89  # mime types have just been proposed and not yet widely used... 
 90  _SPARQL_DEFAULT  = ["application/sparql-results+xml", "application/rdf+xml", "*/*"] 
 91  _SPARQL_XML      = ["application/sparql-results+xml"] 
 92  _SPARQL_JSON     = ["application/sparql-results+json", "text/javascript", "application/json"] 
 93  _RDF_XML         = ["application/rdf+xml"] 
 94  _RDF_N3          = ["text/rdf+n3", "application/n-triples", "application/turtle", "application/n3", "text/n3", "text/turtle"] 
 95  _RDF_JSONLD      = ["application/x-json+ld", "application/ld+json"] 
 96  _ALL             = ["*/*"] 
 97  _RDF_POSSIBLE    = _RDF_XML + _RDF_N3 
 98  _SPARQL_POSSIBLE = _SPARQL_XML + _SPARQL_JSON + _RDF_XML + _RDF_N3 
 99  _SPARQL_PARAMS   = ["query"] 
100   
101  try: 
102      import rdflib_jsonld 
103      _allowedFormats.append(JSONLD) 
104      _RDF_POSSIBLE = _RDF_POSSIBLE + _RDF_JSONLD 
105  except ImportError: 
106      #warnings.warn("JSON-LD disabled because no suitable support has been found", RuntimeWarning) 
107      pass 
108   
109  # This is very ugly. The fact is that the key for the choice of the output format is not defined.  
110  # Virtuoso uses 'format', joseki uses 'output', rasqual seems to use "results", etc. Lee Feigenbaum  
111  # told me that virtuoso also understand 'output' these days, so I removed 'format'. I do not have  
112  # info about the others yet, ie, for the time being I keep the general mechanism. Hopefully, in a  
113  # future release, I can get rid of that. However, these processors are (hopefully) oblivious to the  
114  # parameters they do not understand. So: just repeat all possibilities in the final URI. UGLY!!!!!!! 
115  _returnFormatSetting = ["format", "output", "results"] 
116 117 ####################################################################################################### 118 119 120 -class SPARQLWrapper(object):
121 """ 122 Wrapper around an online access to a SPARQL Web entry point. 123 124 The same class instance can be reused for subsequent queries. The values of the base Graph URI, return formats, etc, 125 are retained from one query to the next (in other words, only the query string changes). The instance can also be 126 reset to its initial values using the L{resetQuery} method. 127 128 @cvar pattern: regular expression used to determine whether a query is of type L{CONSTRUCT}, L{SELECT}, L{ASK}, or L{DESCRIBE}. 129 @type pattern: compiled regular expression (see the C{re} module of Python) 130 @ivar baseURI: the URI of the SPARQL service 131 """ 132 pattern = re.compile(r""" 133 ((?P<base>(\s*BASE\s*<.*?>)\s*)|(?P<prefixes>(\s*PREFIX\s+.+:\s*<.*?>)\s*))* 134 (?P<queryType>(CONSTRUCT|SELECT|ASK|DESCRIBE|INSERT|DELETE|CREATE|CLEAR|DROP|LOAD|COPY|MOVE|ADD)) 135 """, re.VERBOSE | re.IGNORECASE) 136
137 - def __init__(self, endpoint, updateEndpoint=None, returnFormat=XML, defaultGraph=None, agent=__agent__):
138 """ 139 Class encapsulating a full SPARQL call. 140 @param endpoint: string of the SPARQL endpoint's URI 141 @type endpoint: string 142 @param updateEndpoint: string of the SPARQL endpoint's URI for update operations (if it's a different one) 143 @type updateEndpoint: string 144 @keyword returnFormat: Default: L{XML}. 145 Can be set to JSON or Turtle/N3 146 147 No local check is done, the parameter is simply 148 sent to the endpoint. Eg, if the value is set to JSON and a construct query is issued, it 149 is up to the endpoint to react or not, this wrapper does not check. 150 151 Possible values: 152 L{JSON}, L{XML}, L{TURTLE}, L{N3} (constants in this module). The value can also be set via explicit 153 call, see below. 154 @type returnFormat: string 155 @keyword defaultGraph: URI for the default graph. Default is None, the value can be set either via an L{explicit call<addDefaultGraph>} or as part of the query string. 156 @type defaultGraph: string 157 """ 158 self.endpoint = endpoint 159 self.updateEndpoint = updateEndpoint if updateEndpoint else endpoint 160 self.agent = agent 161 self.user = None 162 self.passwd = None 163 self.http_auth = BASIC 164 self._defaultGraph = defaultGraph 165 166 if returnFormat in _allowedFormats: 167 self._defaultReturnFormat = returnFormat 168 else: 169 self._defaultReturnFormat = XML 170 171 self.resetQuery()
172
173 - def resetQuery(self):
174 """Reset the query, ie, return format, query, default or named graph settings, etc, 175 are reset to their default values.""" 176 self.parameters = {} 177 if self._defaultGraph: 178 self.addParameter("default-graph-uri", self._defaultGraph) 179 self.returnFormat = self._defaultReturnFormat 180 self.method = GET 181 self.setQuery("""SELECT * WHERE{ ?s ?p ?o }""") 182 self.timeout = None 183 self.requestMethod = URLENCODED
184
185 - def setReturnFormat(self, format):
186 """Set the return format. If not an allowed value, the setting is ignored. 187 188 @param format: Possible values: are L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDF} (constants in this module). All other cases are ignored. 189 @type format: str 190 """ 191 if format in _allowedFormats : 192 self.returnFormat = format 193 elif format == JSONLD: 194 raise ValueError("Current instance does not support JSON-LD; you might want to install the rdflib-json package.") 195 else: 196 raise ValueError("Invalid format '%s'; current instance supports: %s.", (format, ", ".join(_allowedFormats)))
197
198 - def supportsReturnFormat(self, format):
199 """Check if a return format is supported. 200 201 @param format: Possible values: are L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDF} (constants in this module). All other cases are ignored. 202 @type format: bool 203 """ 204 return (format in _allowedFormats)
205
206 - def setTimeout(self, timeout):
207 """Set the timeout (in seconds) to use for querying the endpoint. 208 209 @param timeout: Timeout in seconds. 210 @type timeout: int 211 """ 212 self.timeout = int(timeout)
213
214 - def setRequestMethod(self, method):
215 """Set the internal method to use to perform the request for query or 216 update operations, either URL-encoded (C{SPARQLWrapper.URLENCODED}) or 217 POST directly (C{SPARQLWrapper.POSTDIRECTLY}). 218 Further details at U{http://www.w3.org/TR/sparql11-protocol/#query-operation} 219 and U{http://www.w3.org/TR/sparql11-protocol/#update-operation}. 220 221 @param method: method 222 @type method: str 223 """ 224 if method in _REQUEST_METHODS: 225 self.requestMethod = method 226 else: 227 warnings.warn("invalid update method '%s'" % method, RuntimeWarning)
228 229 @deprecated
230 - def addDefaultGraph(self, uri):
231 """ 232 Add a default graph URI. 233 @param uri: URI of the graph 234 @type uri: string 235 @deprecated: use addParameter("default-graph-uri", uri) instead of this method 236 """ 237 self.addParameter("default-graph-uri", uri)
238 239 @deprecated
240 - def addNamedGraph(self, uri):
241 """ 242 Add a named graph URI. 243 @param uri: URI of the graph 244 @type uri: string 245 @deprecated: use addParameter("named-graph-uri", uri) instead of this method 246 """ 247 self.addParameter("named-graph-uri", uri)
248 249 @deprecated
250 - def addExtraURITag(self, key, value):
251 """ 252 Some SPARQL endpoints require extra key value pairs. 253 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing 254 virtuoso to retrieve graphs that are not stored in its local database. 255 @param key: key of the query part 256 @type key: string 257 @param value: value of the query part 258 @type value: string 259 @deprecated: use addParameter(key, value) instead of this method 260 """ 261 self.addParameter(key, value)
262 263 @deprecated
264 - def addCustomParameter(self, name, value):
265 """ 266 Method is kept for backwards compatibility. Historically, it "replaces" parameters instead of adding 267 @param name: name 268 @type name: string 269 @param value: value 270 @type value: string 271 @rtype: bool 272 @deprecated: use addParameter(name, value) instead of this method 273 """ 274 self.clearParameter(name) 275 return self.addParameter(name, value)
276
277 - def addParameter(self, name, value):
278 """ 279 Some SPARQL endpoints allow extra key value pairs. 280 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing 281 virtuoso to retrieve graphs that are not stored in its local database. 282 @param name: name 283 @type name: string 284 @param value: value 285 @type value: string 286 @rtype: bool 287 """ 288 if name in _SPARQL_PARAMS: 289 return False 290 else: 291 if name not in self.parameters: 292 self.parameters[name] = [] 293 self.parameters[name].append(value) 294 return True
295
296 - def clearParameter(self, name):
297 """ 298 Clear the values ofd a concrete parameter. 299 @param name: name 300 @type name: string 301 @rtype: bool 302 """ 303 if name in _SPARQL_PARAMS: 304 return False 305 else: 306 try: 307 del self.parameters[name] 308 return True 309 except KeyError: 310 return False
311
312 - def setCredentials(self, user, passwd):
313 """ 314 Set the credentials for querying the current endpoint 315 @param user: username 316 @type user: string 317 @param passwd: password 318 @type passwd: string 319 """ 320 self.user = user 321 self.passwd = passwd
322
323 - def setHTTPAuth(self, auth):
324 """ 325 Set the HTTP Authentication type (Basic or Digest) 326 @param auth: auth type 327 @type auth: string 328 """ 329 if not isinstance(auth, str): 330 raise TypeError('setHTTPAuth takes a string') 331 elif auth.upper() in _allowedAuth: 332 self.http_auth = auth.upper() 333 else: 334 valid_types = ", ".join(_allowedAuth) 335 raise ValueError("Value should be one of {0}".format(valid_types))
336
337 - def setQuery(self, query):
338 """ 339 Set the SPARQL query text. Note: no check is done on the validity of the query 340 (syntax or otherwise) by this module, except for testing the query type (SELECT, 341 ASK, etc). Syntax and validity checking is done by the SPARQL service itself. 342 @param query: query text 343 @type query: string 344 @bug: #2320024 345 """ 346 if sys.version < '3': # have to write it like this, for 2to3 compatibility 347 if isinstance(query, unicode): 348 pass 349 elif isinstance(query, str): 350 query = query.decode('utf-8') 351 else: 352 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings') 353 else: 354 if isinstance(query, str): 355 pass 356 elif isinstance(query, bytes): 357 query = query.decode('utf-8') 358 else: 359 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings') 360 361 self.queryString = query 362 self.queryType = self._parseQueryType(query)
363
364 - def _parseQueryType(self,query):
365 """ 366 Parse the SPARQL query and return its type (ie, L{SELECT}, L{ASK}, etc). 367 368 Note that the method returns L{SELECT} if nothing is specified. This is just to get all other 369 methods running; in fact, this means that the query is erronous, because the query must be, 370 according to the SPARQL specification, one of Select, Ask, Describe, or Construct. The 371 SPARQL endpoint should raise an exception (via urllib) for such syntax error. 372 373 @param query: query text 374 @type query: string 375 @rtype: string 376 """ 377 try: 378 query = query if type(query)==str else query.encode('ascii', 'ignore') 379 query = re.sub(re.compile("#.*?\n" ), "" , query) # remove all occurance singleline comments (issue #32) 380 r_queryType = self.pattern.search(query).group("queryType").upper() 381 except AttributeError: 382 warnings.warn("not detected query type for query '%s'" % query.replace("\n", " "), RuntimeWarning) 383 r_queryType = None 384 385 if r_queryType in _allowedQueryTypes : 386 return r_queryType 387 else : 388 #raise Exception("Illegal SPARQL Query; must be one of SELECT, ASK, DESCRIBE, or CONSTRUCT") 389 warnings.warn("unknown query type '%s'" % r_queryType, RuntimeWarning) 390 return SELECT
391
392 - def setMethod(self,method):
393 """Set the invocation method. By default, this is L{GET}, but can be set to L{POST}. 394 @param method: should be either L{GET} or L{POST}. Other cases are ignored. 395 """ 396 if method in _allowedRequests : self.method = method
397
398 - def setUseKeepAlive(self):
399 """Make urllib2 use keep-alive. 400 @raise ImportError: when could not be imported urlgrabber.keepalive.HTTPHandler 401 """ 402 try: 403 from urlgrabber.keepalive import HTTPHandler 404 keepalive_handler = HTTPHandler() 405 opener = urllib2.build_opener(keepalive_handler) 406 urllib2.install_opener(opener) 407 except ImportError: 408 warnings.warn("urlgrabber not installed in the system. The execution of this method has no effect.")
409
410 - def isSparqlUpdateRequest(self):
411 """ Returns TRUE if SPARQLWrapper is configured for executing SPARQL Update request 412 @return: bool 413 """ 414 return self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD]
415
416 - def isSparqlQueryRequest(self):
417 """ Returns TRUE if SPARQLWrapper is configured for executing SPARQL Query request 418 @return: bool 419 """ 420 return not self.isSparqlUpdateRequest()
421
422 - def _getRequestEncodedParameters(self, query=None):
423 query_parameters = self.parameters.copy() 424 425 if query and type(query) == tuple and len(query) == 2: 426 #tuple ("query"/"update", queryString) 427 query_parameters[query[0]] = [query[1]] 428 429 # This is very ugly. The fact is that the key for the choice of the output format is not defined. 430 # Virtuoso uses 'format',sparqler uses 'output' 431 # However, these processors are (hopefully) oblivious to the parameters they do not understand. 432 # So: just repeat all possibilities in the final URI. UGLY!!!!!!! 433 for f in _returnFormatSetting: 434 query_parameters[f] = [self.returnFormat] 435 436 pairs = ( 437 "%s=%s" % ( 438 urllib.quote_plus(param.encode('UTF-8'), safe='/'), 439 urllib.quote_plus(value.encode('UTF-8'), safe='/') 440 ) 441 for param, values in query_parameters.items() for value in values 442 ) 443 444 return '&'.join(pairs)
445
446 - def _getAcceptHeader(self):
447 if self.queryType in [SELECT, ASK]: 448 if self.returnFormat == XML: 449 acceptHeader = ",".join(_SPARQL_XML) 450 elif self.returnFormat == JSON: 451 acceptHeader = ",".join(_SPARQL_JSON) 452 else: 453 acceptHeader = ",".join(_ALL) 454 elif self.queryType in [INSERT, DELETE]: 455 acceptHeader = "*/*" 456 else: 457 if self.returnFormat == N3 or self.returnFormat == TURTLE: 458 acceptHeader = ",".join(_RDF_N3) 459 elif self.returnFormat == XML: 460 acceptHeader = ",".join(_RDF_XML) 461 elif self.returnFormat == JSONLD and JSONLD in _allowedFormats: 462 acceptHeader = ",".join(_RDF_JSONLD) 463 else: 464 acceptHeader = ",".join(_ALL) 465 return acceptHeader
466
467 - def _createRequest(self):
468 """Internal method to create request according a HTTP method. Returns a 469 C{urllib2.Request} object of the urllib2 Python library 470 @return: request 471 """ 472 request = None 473 474 if self.isSparqlUpdateRequest(): 475 #protocol details at http://www.w3.org/TR/sparql11-protocol/#update-operation 476 uri = self.updateEndpoint 477 478 if self.method != POST: 479 warnings.warn("update operations MUST be done by POST") 480 481 if self.requestMethod == POSTDIRECTLY: 482 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters()) 483 request.add_header("Content-Type", "application/sparql-update") 484 request.data = self.queryString.encode('UTF-8') 485 else: # URL-encoded 486 request = urllib2.Request(uri) 487 request.add_header("Content-Type", "application/x-www-form-urlencoded") 488 request.data = self._getRequestEncodedParameters(("update", self.queryString)).encode('ascii') 489 else: 490 #protocol details at http://www.w3.org/TR/sparql11-protocol/#query-operation 491 uri = self.endpoint 492 493 if self.method == POST: 494 if self.requestMethod == POSTDIRECTLY: 495 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters()) 496 request.add_header("Content-Type", "application/sparql-query") 497 request.data = self.queryString.encode('UTF-8') 498 else: # URL-encoded 499 request = urllib2.Request(uri) 500 request.add_header("Content-Type", "application/x-www-form-urlencoded") 501 request.data = self._getRequestEncodedParameters(("query", self.queryString)).encode('ascii') 502 else: # GET 503 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters(("query", self.queryString))) 504 505 request.add_header("User-Agent", self.agent) 506 request.add_header("Accept", self._getAcceptHeader()) 507 if self.user and self.passwd: 508 if self.http_auth == BASIC: 509 credentials = "%s:%s" % (self.user, self.passwd) 510 request.add_header("Authorization", "Basic %s" % base64.b64encode(credentials.encode('utf-8'))) 511 elif self.http_auth == DIGEST: 512 realm = "SPARQL" 513 pwd_mgr = urllib2.HTTPPasswordMgr() 514 pwd_mgr.add_password(realm, uri, self.user, self.passwd) 515 opener = urllib2.build_opener() 516 opener.add_handler(urllib2.HTTPDigestAuthHandler(pwd_mgr)) 517 urllib2.install_opener(opener) 518 else: 519 valid_types = ", ".join(_allowedAuth) 520 raise NotImplementedError("Expecting one of: {0}, but received: {1}".format(valid_types, 521 self.http_auth)) 522 523 return request
524
525 - def _query(self):
526 """Internal method to execute the query. Returns the output of the 527 C{urllib2.urlopen} method of the standard Python library 528 529 @return: tuples with the raw request plus the expected format 530 """ 531 if self.timeout: 532 socket.setdefaulttimeout(self.timeout) 533 534 request = self._createRequest() 535 536 try: 537 response = urlopener(request) 538 return response, self.returnFormat 539 except urllib2.HTTPError, e: 540 if e.code == 400: 541 raise QueryBadFormed(e.read()) 542 elif e.code == 404: 543 raise EndPointNotFound(e.read()) 544 elif e.code == 500: 545 raise EndPointInternalError(e.read()) 546 else: 547 raise e
548
549 - def query(self):
550 """ 551 Execute the query. 552 Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the 553 case when the query is syntactically incorrect, leading to an HTTP error sent back by the SPARQL endpoint). 554 The usual urllib2 exceptions are raised, which therefore cover possible SPARQL errors, too. 555 556 Note that some combinations of return formats and query types may not make sense. For example, 557 a SELECT query with Turtle response is meaningless (the output of a SELECT is not a Graph), or a CONSTRUCT 558 query with JSON output may be a problem because, at the moment, there is no accepted JSON serialization 559 of RDF (let alone one implemented by SPARQL endpoints). In such cases the returned media type of the result is 560 unpredictable and may differ from one SPARQL endpoint implementation to the other. (Endpoints usually fall 561 back to one of the "meaningful" formats, but it is up to the specific implementation to choose which 562 one that is.) 563 564 @return: query result 565 @rtype: L{QueryResult} instance 566 """ 567 return QueryResult(self._query())
568
569 - def queryAndConvert(self):
570 """Macro like method: issue a query and return the converted results. 571 @return: the converted query result. See the conversion methods for more details. 572 """ 573 res = self.query() 574 return res.convert()
575
576 ####################################################################################################### 577 578 579 -class QueryResult(object):
580 """ 581 Wrapper around an a query result. Users should not create instances of this class, it is 582 generated by a L{SPARQLWrapper.query} call. The results can be 583 converted to various formats, or used directly. 584 585 If used directly: the class gives access to the direct http request results 586 L{self.response}: it is a file-like object with two additional methods: C{geturl()} to 587 return the URL of the resource retrieved and 588 C{info()} that returns the meta-information of the HTTP result as a dictionary-like object 589 (see the urllib2 standard library module of Python). 590 591 For convenience, these methods are also available on the instance. The C{__iter__} and 592 C{next} methods are also implemented (by mapping them to L{self.response}). This means that the 593 common idiom:: 594 for l in obj : do_something_with_line(l) 595 would work, too. 596 597 @ivar response: the direct HTTP response; a file-like object, as return by the C{urllib2.urlopen} library call. 598 """
599 - def __init__(self,result):
600 """ 601 @param result: HTTP response stemming from a L{SPARQLWrapper.query} call, or a tuple with the expected format: (response,format) 602 """ 603 if (type(result) == tuple): 604 self.response = result[0] 605 self.requestedFormat = result[1] 606 else: 607 self.response = result 608 """Direct response, see class comments for details"""
609
610 - def geturl(self):
611 """Return the URI of the original call. 612 @return: URI 613 @rtype: string 614 """ 615 return self.response.geturl()
616
617 - def info(self):
618 """Return the meta-information of the HTTP result. 619 @return: meta information 620 @rtype: dictionary 621 """ 622 return KeyCaseInsensitiveDict(self.response.info())
623
624 - def __iter__(self):
625 """Return an iterator object. This method is expected for the inclusion 626 of the object in a standard C{for} loop. 627 """ 628 return self.response.__iter__()
629
630 - def next(self):
631 """Method for the standard iterator.""" 632 return self.response.next()
633 634 @staticmethod
635 - def setJSONModule(module):
636 """Set the Python module for encoding JSON data. If not an allowed value, the setting is ignored. 637 JSON modules supported: 638 - ``simplejson``: http://code.google.com/p/simplejson/ 639 - ``cjson``: http://pypi.python.org/pypi/python-cjson 640 - ``json``: This is the version of ``simplejson`` that is bundled with the 641 Python standard library since version 2.6 642 (see http://docs.python.org/library/json.html) 643 @param module: Possible values: are L{simplejson}, L{cjson}, L{json}. All other cases raise a ValueError exception. 644 @type module: string 645 """ 646 jsonlayer.use(module)
647
648 - def _convertJSON(self):
649 """ 650 Convert a JSON result into a Python dict. This method can be overwritten in a subclass 651 for a different conversion method. 652 @return: converted result 653 @rtype: Python dictionary 654 """ 655 return jsonlayer.decode(self.response.read().decode("utf-8"))
656
657 - def _convertXML(self):
658 """ 659 Convert an XML result into a Python dom tree. This method can be overwritten in a 660 subclass for a different conversion method. 661 @return: converted result 662 @rtype: PyXlib DOM node 663 """ 664 from xml.dom.minidom import parse 665 return parse(self.response)
666
667 - def _convertRDF(self):
668 """ 669 Convert a RDF/XML result into an RDFLib triple store. This method can be overwritten 670 in a subclass for a different conversion method. 671 @return: converted result 672 @rtype: RDFLib Graph 673 """ 674 try: 675 from rdflib.graph import ConjunctiveGraph 676 except ImportError: 677 from rdflib import ConjunctiveGraph 678 retval = ConjunctiveGraph() 679 # this is a strange hack. If the publicID is not set, rdflib (or the underlying xml parser) makes a funny 680 #(and, as far as I could see, meaningless) error message... 681 retval.load(self.response, publicID=' ') 682 return retval
683
684 - def _convertN3(self):
685 """ 686 Convert a RDF Turtle/N3 result into a string. This method can be overwritten in a subclass 687 for a different conversion method. 688 @return: converted result 689 @rtype: string 690 """ 691 return self.response.read()
692
693 - def _convertJSONLD(self):
694 """ 695 Convert a RDF JSON-LDresult into an RDFLib triple store. This method can be overwritten 696 in a subclass for a different conversion method. 697 @return: converted result 698 @rtype: RDFLib Graph 699 """ 700 from rdflib import ConjunctiveGraph 701 retval = ConjunctiveGraph() 702 retval.load(self.response, format='json-ld', publicID=' ') 703 return retval
704
705 - def convert(self):
706 """ 707 Encode the return value depending on the return format: 708 - in the case of XML, a DOM top element is returned; 709 - in the case of JSON, a simplejson conversion will return a dictionary; 710 - in the case of RDF/XML, the value is converted via RDFLib into a Graph instance. 711 In all other cases the input simply returned. 712 713 @return: the converted query result. See the conversion methods for more details. 714 """ 715 def _content_type_in_list(real, expected): 716 return True in [real.find(mime) != -1 for mime in expected]
717 718 def _validate_format(format_name, allowed, mime, requested): 719 if requested not in allowed: 720 message = "Format requested was %s, but %s (%s) has been returned by the endpoint" 721 warnings.warn(message % (requested.upper(), format_name, mime), RuntimeWarning)
722 723 if "content-type" in self.info(): 724 ct = self.info()["content-type"] 725 726 if _content_type_in_list(ct, _SPARQL_XML): 727 _validate_format("XML", [XML], ct, self.requestedFormat) 728 return self._convertXML() 729 elif _content_type_in_list(ct, _SPARQL_JSON): 730 _validate_format("JSON", [JSON], ct, self.requestedFormat) 731 return self._convertJSON() 732 elif _content_type_in_list(ct, _RDF_XML): 733 _validate_format("RDF/XML", [RDF, XML], ct, self.requestedFormat) 734 return self._convertRDF() 735 elif _content_type_in_list(ct, _RDF_N3): 736 _validate_format("N3", [N3, TURTLE], ct, self.requestedFormat) 737 return self._convertN3() 738 elif _content_type_in_list(ct, _RDF_JSONLD): 739 _validate_format("JSON(-LD)", [JSONLD, JSON], ct, self.requestedFormat) 740 return self._convertJSONLD() 741 742 warnings.warn("unknown response content type, returning raw response...", RuntimeWarning) 743 return self.response.read() 744
745 - def print_results(self, minWidth=None):
746 results = self._convertJSON() 747 if minWidth : 748 width = self.__get_results_width(results, minWidth) 749 else : 750 width = self.__get_results_width(results) 751 index = 0 752 for var in results["head"]["vars"] : 753 print ("?" + var).ljust(width[index]),"|", 754 index += 1 755 print 756 print "=" * (sum(width) + 3 * len(width)) 757 for result in results["results"]["bindings"] : 758 index = 0 759 for var in results["head"]["vars"] : 760 result = self.__get_prettyprint_string_sparql_var_result(result[var]) 761 print result.ljust(width[index]),"|", 762 index += 1 763 print
764
765 - def __get_results_width(self, results, minWidth=2):
766 width = [] 767 for var in results["head"]["vars"] : 768 width.append(max(minWidth, len(var)+1)) 769 for result in results["results"]["bindings"] : 770 index = 0 771 for var in results["head"]["vars"] : 772 result = self.__get_prettyprint_string_sparql_var_result(result[var]) 773 width[index] = max(width[index], len(result)) 774 index =+ 1 775 return width
776
777 - def __get_prettyprint_string_sparql_var_result(self, result):
778 value = result["value"] 779 lang = result.get("xml:lang", None) 780 datatype = result.get("datatype",None) 781 if lang is not None: 782 value+="@"+lang 783 if datatype is not None: 784 value+=" ["+datatype+"]" 785 return value
786