Package SPARQLWrapper :: Module Wrapper
[hide private]
[frames] | no frames]

Source Code for Module SPARQLWrapper.Wrapper

   1  # -*- coding: utf-8 -*- 
   2  # epydoc 
   3  # 
   4  """ 
   5  @var JSON: to be used to set the return format to JSON 
   6  @var XML: to be used to set the return format to XML (SPARQL XML format or RDF/XML, depending on the query type). This is the default. 
   7  @var RDFXML: to be used to set the return format to RDF/XML explicitly. 
   8  @var TURTLE: to be used to set the return format to Turtle 
   9  @var N3: to be used to set the return format to N3 (for most of the SPARQL services this is equivalent to Turtle) 
  10  @var RDF: to be used to set the return RDF Graph 
  11  @var CSV: to be used to set the return format to CSV 
  12  @var TSV: to be used to set the return format to TSV 
  13  @var JSONLD: to be used to set the return format to JSON-LD 
  14   
  15  @var POST: to be used to set HTTP POST 
  16  @var GET: to be used to set HTTP GET. This is the default. 
  17   
  18  @var SELECT: to be used to set the query type to SELECT. This is, usually, determined automatically. 
  19  @var CONSTRUCT: to be used to set the query type to CONSTRUCT. This is, usually, determined automatically. 
  20  @var ASK: to be used to set the query type to ASK. This is, usually, determined automatically. 
  21  @var DESCRIBE: to be used to set the query type to DESCRIBE. This is, usually, determined automatically. 
  22   
  23  @var INSERT: to be used to set the query type to INSERT. 
  24  @var DELETE: to be used to set the query type to DELETE. 
  25  @var CREATE: to be used to set the query type to CREATE. 
  26  @var CLEAR: to be used to set the query type to CLEAR. 
  27  @var DROP: to be used to set the query type to DROP. 
  28  @var LOAD: to be used to set the query type to LOAD. 
  29  @var COPY: to be used to set the query type to COPY. 
  30  @var MOVE: to be used to set the query type to MOVE. 
  31  @var ADD: to be used to set the query type to ADD. 
  32   
  33   
  34  @var BASIC: BASIC HTTP Authentication method 
  35  @var DIGEST: DIGEST HTTP Authentication method 
  36   
  37  @see: U{SPARQL Specification<http://www.w3.org/TR/rdf-sparql-query/>} 
  38  @authors: U{Ivan Herman<http://www.ivan-herman.net>}, U{Sergio Fernández<http://www.wikier.org>}, U{Carlos Tejo Alonso<http://www.dayures.net>} 
  39  @organization: U{World Wide Web Consortium<http://www.w3.org>}, U{Salzburg Research<http://www.salzburgresearch.at>} and U{Foundation CTIC<http://www.fundacionctic.org/>}. 
  40  @license: U{W3C® SOFTWARE NOTICE AND LICENSE<href="http://www.w3.org/Consortium/Legal/copyright-software">} 
  41  @requires: U{RDFLib<http://rdflib.net>} package. 
  42  """ 
  43   
  44  import urllib 
  45  import urllib2 
  46  from urllib2 import urlopen as urlopener  # don't change the name: tests override it 
  47  import base64 
  48  import re 
  49  import sys 
  50  import warnings 
  51   
  52  import json 
  53  from KeyCaseInsensitiveDict import KeyCaseInsensitiveDict 
  54  from SPARQLExceptions import QueryBadFormed, EndPointNotFound, EndPointInternalError, Unauthorized, URITooLong 
  55  from SPARQLWrapper import __agent__ 
  56   
  57  #  From <https://www.w3.org/TR/sparql11-protocol/#query-success> 
  58  #  The response body of a successful query operation with a 2XX response is either: 
  59  #  * SELECT and ASK: a SPARQL Results Document in XML, JSON, or CSV/TSV format. 
  60  #  * DESCRIBE and CONSTRUCT: an RDF graph serialized, for example, in the RDF/XML syntax, or an equivalent RDF graph serialization. 
  61  # 
  62  #  Possible parameter keys and values... 
  63  #  Examples: 
  64  #  - ClioPatria: the SWI-Prolog Semantic Web Server <http://cliopatria.swi-prolog.org/home> 
  65  #    * Parameter key: "format" <http://cliopatria.swi-prolog.org/help/http> 
  66  #    * Parameter value must have one of these values: "rdf+xml", "json", "csv", "application/sparql-results+xml" or "application/sparql-results+json". 
  67  # 
  68  #  - OpenLink Virtuoso  <http://virtuoso.openlinksw.com> 
  69  #    * Parameter key: "format" or "output" 
  70  #    * Parameter value, like directly: 
  71  #      "text/html" (HTML), "text/x-html+tr" (HTML (Faceted Browsing Links)), "application/vnd.ms-excel" 
  72  #      "application/sparql-results+xml" (XML), "application/sparql-results+json", (JSON) 
  73  #      "application/javascript" (Javascript), "text/turtle" (Turtle), "application/rdf+xml" (RDF/XML) 
  74  #      "text/plain" (N-Triples), "text/csv" (CSV), "text/tab-separated-values" (TSV) 
  75  #    * Parameter value, like indirectly: 
  76  #      "HTML" (alias text/html), "JSON" (alias application/sparql-results+json), "XML" (alias application/sparql-results+xml), "TURTLE" (alias text/rdf+n3), JavaScript (alias application/javascript) 
  77  #       See  <http://virtuoso.openlinksw.com/dataspace/doc/dav/wiki/Main/VOSSparqlProtocol#Additional HTTP Response Formats -- SELECT> 
  78  # 
  79  #      For a SELECT query type, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml 
  80  #      For a ASK query type, the default return mimetype (if Accept: */* is sent) is text/html 
  81  #      For a CONSTRUCT query type, the default return mimetype (if Accept: */* is sent) is text/turtle 
  82  #      For a DESCRIBE query type, the default return mimetype (if Accept: */* is sent) is text/turtle 
  83  # 
  84  # 
  85  #  - Fuseki (formerly there was Joseki) <https://jena.apache.org/documentation/serving_data/> 
  86  #    * Parameter key: "format" or "output" 
  87  #      See Fuseki 1: https://github.com/apache/jena/blob/master/jena-fuseki1/src/main/java/org/apache/jena/fuseki/HttpNames.java 
  88  #      See Fuseki 2: https://github.com/apache/jena/blob/master/jena-arq/src/main/java/org/apache/jena/riot/web/HttpNames.java 
  89  #    * Fuseki 1 - Short names for "output=" : "json", "xml", "sparql", "text", "csv", "tsv", "thrift" 
  90  #      See <https://github.com/apache/jena/blob/master/jena-fuseki1/src/main/java/org/apache/jena/fuseki/servlets/ResponseResultSet.java> 
  91  #    * Fuseki 2 - Short names for "output=" : "json", "xml", "sparql", "text", "csv", "tsv", "thrift" 
  92  #      See <https://github.com/apache/jena/blob/master/jena-fuseki2/jena-fuseki-core/src/main/java/org/apache/jena/fuseki/servlets/ResponseResultSet.java> 
  93  #      If a non-expected short name is used, the server returns an "Error 400: Can't determine output serialization" 
  94  #      application/ld+json supported in CONSTRUCT, DESCRIBE 
  95  #      Valid alias for SELECT and ASK: "json", "xml", csv", "tsv" 
  96  #      Valid alias for DESCRIBE and CONSTRUCT: "json" (alias for json-ld ONLY in Fuseki2), "xml" 
  97  #      Valid mimetype for DESCRIBE and CONSTRUCT: "application/ld+json" 
  98  #      Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+json 
  99  #      Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is text/turtle 
 100  #      In case of a bad formed query, Fuseki1 returns 200 instead of 400. 
 101  # 
 102  #  - Eclipse RDF4J (formerly known as Sesame) <http://rdf4j.org/> 
 103  #    * Uses only content negotiation (no URL parameters). 
 104  #    * See <http://rdf4j.org/doc/the-rdf4j-server-rest-api/#The_QUERY_operation> 
 105  # 
 106  #  - RASQAL <http://librdf.org/rasqal/> 
 107  #    * Parameter key: "results" 
 108  #    * Uses roqet as RDF query utility 
 109  #      For variable bindings, the values of FORMAT vary upon what Rasqal supports but include simple 
 110  #      for a simple text format (default), xml for the SPARQL Query Results XML format, csv for SPARQL CSV, 
 111  #      tsv for SPARQL TSV, rdfxml and turtle for RDF syntax formats, and json for a JSON version of the results. 
 112  # 
 113  #      For RDF graph results, the values of FORMAT are ntriples (N-Triples, default), 
 114  #      rdfxml-abbrev (RDF/XML Abbreviated), rdfxml (RDF/XML), turtle (Turtle), 
 115  #      json (RDF/JSON resource centric), json-triples (RDF/JSON triples) or 
 116  #      rss-1.0 (RSS 1.0, also an RDF/XML syntax). 
 117  # 
 118  #      See <http://librdf.org/rasqal/roqet.html> 
 119  # 
 120  #  - Marklogic <http://marklogic.com> 
 121  #    * Uses content negotiation (no URL parameters). 
 122  #    * You can use following methods to query triples <https://docs.marklogic.com/guide/semantics/semantic-searches#chapter>: 
 123  #      - SPARQL mode in Query Console. For details, see Querying Triples with SPARQL 
 124  #      - XQuery using the semantics functions, and Search API, or a combination of XQuery and SPARQL. For details, see Querying Triples with XQuery or JavaScript. 
 125  #      - HTTP via a SPARQL endpoint. For details, see Using Semantics with the REST Client API. 
 126  #    * Formats are specified as part of the HTTP Accept headers of the REST request. <https://docs.marklogic.com/guide/semantics/REST#id_92428> 
 127  #      - When you query the SPARQL endpoint with REST Client APIs, you can specify the result output format.  <https://docs.marklogic.com/guide/semantics/REST#id_54258> 
 128  #        The response type format depends on the type of query and the MIME type in the HTTP Accept header. 
 129  #      - This table describes the MIME types and Accept Header/Output formats (MIME type) for different types of SPARQL queries. See <https://docs.marklogic.com/guide/semantics/REST#id_54258> and <https://docs.marklogic.com/guide/semantics/loading#id_70682> 
 130  #        SELECT "application/sparql-results+xml", "application/sparql-results+json", "text/html", "text/csv" 
 131  #        CONSTRUCT or DESCRIBE "application/n-triples", "application/rdf+json", "application/rdf+xml", "text/turtle", "text/n3", "application/n-quads", "application/trig" 
 132  #        ASK queries return a boolean (true or false). 
 133  # 
 134  #  - AllegroGraph <https://franz.com/agraph/allegrograph/> 
 135  #    * Uses only content negotiation (no URL parameters). 
 136  #    * The server always looks at the Accept header of a request, and tries to 
 137  #      generate a response in the format that the client asks for. If this fails, 
 138  #      a 406 response is returned. When no Accept, or an Accept of */* is specified, 
 139  #      the server prefers text/plain, in order to make it easy to explore the interface from a web browser. 
 140  #    * Accept header expected (values returned by server when a wrong header is sent): 
 141  #    ** SELECT 
 142  #    *** application/sparql-results+xml (DEFAULT if Accept: */* is sent) 
 143  #    *** application/sparql-results+json (and application/json) 
 144  #    *** text/csv 
 145  #    *** text/tab-separated-values 
 146  #    *** OTHERS: application/sparql-results+ttl, text/integer, application/x-lisp-structured-expression, text/table, application/processed-csv, text/simple-csv, application/x-direct-upis 
 147  # 
 148  #    ** ASK 
 149  #    *** application/sparql-results+xml (DEFAULT if Accept: */* is sent) 
 150  #    *** application/sparql-results+json (and application/json) 
 151  #    *** Not supported: text/csv 
 152  #    *** Not supported: text/tab-separated-values 
 153  # 
 154  #    ** CONSTRUCT 
 155  #    *** application/rdf+xml (DEFAULT if Accept: */* is sent) 
 156  #    *** text/rdf+n3 
 157  #    *** OTHERS: text/integer, application/json, text/plain, text/x-nquads, application/trix, text/table, application/x-direct-upis 
 158  # 
 159  #    ** DESCRIBE 
 160  #    *** application/rdf+xml (DEFAULT if Accept: */* is sent) 
 161  #    *** text/rdf+n3 
 162  # 
 163  #      See <https://franz.com/agraph/support/documentation/current/http-protocol.html> 
 164  # 
 165  # 
 166  #  - 4store. Code repository <https://github.com/4store/4store> documentation <https://4store.danielknoell.de/trac/wiki/SparqlServer/> 
 167  #    * Parameter key: "output" 
 168  #    * Parameter value: alias. If an unexpected alias is used, the server is not working properly 
 169  #    * Also, it uses content negotiation 
 170  #    ** SELECT 
 171  #    *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent)) 
 172  #    *** application/sparql-results+json or application/json (alias json) 
 173  #    *** text/csv (alias csv) 
 174  #    *** text/tab-separated-values (alias tsv). Returns "text/plain" in GET. 
 175  #    *** Other values: text/plain, application/n-triples 
 176  # 
 177  #    ** ASK 
 178  #    *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent)) 
 179  #    *** application/sparql-results+json or application/json (alias json) 
 180  #    *** text/csv (alias csv) 
 181  #    *** text/tab-separated-values (alias tsv). Returns "text/plain" in GET. 
 182  #    *** Other values: text/plain, application/n-triples 
 183  # 
 184  #    ** CONSTRUCT 
 185  #    *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent) 
 186  #    *** text/turtle (alias "text") 
 187  # 
 188  #    ** DESCRIBE 
 189  #    *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent) 
 190  #    *** text/turtle (alias "text") 
 191  # 
 192  #      Valid alias for SELECT and ASK: "json", "xml", csv", "tsv" (also "text" and "ascii") 
 193  #      Valid alias for DESCRIBE and CONSTRUCT: "xml", "text" (for turtle) 
 194  #      Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml 
 195  #      Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is application/rdf+xml 
 196  # 
 197  # 
 198  #  - Blazegraph <https://www.blazegraph.com/> & NanoSparqlServer <https://wiki.blazegraph.com/wiki/index.php/NanoSparqlServer> <https://wiki.blazegraph.com/wiki/index.php/REST_API#SPARQL_End_Point> 
 199  #    * Parameter key: "format" (available since version 1.4.0). Setting this parameter will override any Accept Header that is present. <https://wiki.blazegraph.com/wiki/index.php/REST_API#GET_or_POST> 
 200  #    * Parameter value: alias. If an unexpected alias is used, the server is not working properly 
 201  #    * Also, it uses content negotiation 
 202  #    ** SELECT 
 203  #    *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent)) 
 204  #    *** application/sparql-results+json or application/json (alias json) 
 205  #    *** text/csv 
 206  #    *** text/tab-separated-values 
 207  #    *** Other values: application/x-binary-rdf-results-table 
 208  # 
 209  #    ** ASK 
 210  #    *** application/sparql-results+xml (alias xml) (DEFAULT if Accept: */* is sent)) 
 211  #    *** application/sparql-results+json or application/json (alias json) 
 212  # 
 213  #    ** CONSTRUCT 
 214  #    *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent) 
 215  #    *** text/turtle (returns text/n3) 
 216  #    *** text/n3 
 217  # 
 218  #    ** DESCRIBE 
 219  #    *** application/rdf+xml (alias xml) (DEFAULT if Accept: */* is sent) 
 220  #    *** text/turtle (returns text/n3) 
 221  #    *** text/n3 
 222  # 
 223  #      Valid alias for SELECT and ASK: "xml", "json" 
 224  #      Valid alias for DESCRIBE and CONSTRUCT: "xml", "json" (but it returns unexpected "application/sparql-results+json") 
 225  #      Default return mimetypes: For a SELECT and ASK query types, the default return mimetype (if Accept: */* is sent) is application/sparql-results+xml 
 226  #      Default return mimetypes: For a DESCRIBE and CONTRUCT query types, the default return mimetype (if Accept: */* is sent) is application/rdf+xml 
 227   
 228  # alias 
 229  JSON   = "json" 
 230  JSONLD = "json-ld" 
 231  XML    = "xml" 
 232  TURTLE = "turtle" 
 233  N3     = "n3" 
 234  RDF    = "rdf" 
 235  RDFXML = "rdf+xml" 
 236  CSV    = "csv" 
 237  TSV    = "tsv" 
 238  _allowedFormats = [JSON, XML, TURTLE, N3, RDF, RDFXML, CSV, TSV] 
 239   
 240  # Possible HTTP methods 
 241  POST = "POST" 
 242  GET = "GET" 
 243  _allowedRequests = [POST, GET] 
 244   
 245  # Possible HTTP Authentication methods 
 246  BASIC = "BASIC" 
 247  DIGEST = "DIGEST" 
 248  _allowedAuth = [BASIC, DIGEST] 
 249   
 250  # Possible SPARQL/SPARUL query type (aka SPARQL Query forms) 
 251  SELECT     = "SELECT" 
 252  CONSTRUCT  = "CONSTRUCT" 
 253  ASK        = "ASK" 
 254  DESCRIBE   = "DESCRIBE" 
 255  INSERT     = "INSERT" 
 256  DELETE     = "DELETE" 
 257  CREATE     = "CREATE" 
 258  CLEAR      = "CLEAR" 
 259  DROP       = "DROP" 
 260  LOAD       = "LOAD" 
 261  COPY       = "COPY" 
 262  MOVE       = "MOVE" 
 263  ADD        = "ADD" 
 264  _allowedQueryTypes = [SELECT, CONSTRUCT, ASK, DESCRIBE, INSERT, DELETE, CREATE, CLEAR, DROP, 
 265                        LOAD, COPY, MOVE, ADD] 
 266   
 267  # Possible methods to perform requests 
 268  URLENCODED = "urlencoded" 
 269  POSTDIRECTLY = "postdirectly" 
 270  _REQUEST_METHODS = [URLENCODED, POSTDIRECTLY] 
 271   
 272  # Possible output format (mime types) that can be converted by the local script. Unfortunately, 
 273  # it does not work by simply setting the return format, because there is still a certain level of confusion 
 274  # among implementations. 
 275  # For example, Joseki returns application/javascript and not the sparql-results+json thing that is required... 
 276  # Ie, alternatives should be given... 
 277  # Andy Seaborne told me (June 2007) that the right return format is now added to his CVS, ie, future releases of 
 278  # joseki will be o.k., too. The situation with turtle and n3 is even more confusing because the text/n3 and text/turtle 
 279  # mime types have just been proposed and not yet widely used... 
 280  _SPARQL_DEFAULT  = ["application/sparql-results+xml", "application/rdf+xml", "*/*"] 
 281  _SPARQL_XML      = ["application/sparql-results+xml"] 
 282  _SPARQL_JSON     = ["application/sparql-results+json", "application/json", "text/javascript", "application/javascript"] # VIVO server returns "application/javascript" 
 283  _RDF_XML         = ["application/rdf+xml"] 
 284  _RDF_TURTLE      = ["application/turtle", "text/turtle"] 
 285  _RDF_N3          = _RDF_TURTLE + ["text/rdf+n3", "application/n-triples", "application/n3", "text/n3"] 
 286  _RDF_JSONLD      = ["application/ld+json", "application/x-json+ld"] 
 287  _CSV             = ["text/csv"] 
 288  _TSV             = ["text/tab-separated-values"] 
 289  _XML             = ["application/xml"] 
 290  _ALL             = ["*/*"] 
 291  _RDF_POSSIBLE    = _RDF_XML + _RDF_N3 + _XML 
 292   
 293  _SPARQL_PARAMS = ["query"] 
 294   
 295  try: 
 296      import rdflib_jsonld 
 297      _allowedFormats.append(JSONLD) 
 298      _RDF_POSSIBLE = _RDF_POSSIBLE + _RDF_JSONLD 
 299  except ImportError: 
 300      #warnings.warn("JSON-LD disabled because no suitable support has been found", RuntimeWarning) 
 301      pass 
 302   
 303  # This is very ugly. The fact is that the key for the choice of the output format is not defined. 
 304  # Virtuoso uses 'format', joseki uses 'output', rasqual seems to use "results", etc. Lee Feigenbaum 
 305  # told me that virtuoso also understand 'output' these days, so I removed 'format'. I do not have 
 306  # info about the others yet, ie, for the time being I keep the general mechanism. Hopefully, in a 
 307  # future release, I can get rid of that. However, these processors are (hopefully) oblivious to the 
 308  # parameters they do not understand. So: just repeat all possibilities in the final URI. UGLY!!!!!!! 
 309  _returnFormatSetting = ["format", "output", "results"] 
 310   
 311  ####################################################################################################### 
 312   
 313   
314 -class SPARQLWrapper(object):
315 """ 316 Wrapper around an online access to a SPARQL Web entry point. 317 318 The same class instance can be reused for subsequent queries. The values of the base Graph URI, return formats, etc, 319 are retained from one query to the next (in other words, only the query string changes). The instance can also be 320 reset to its initial values using the L{resetQuery} method. 321 322 @cvar prefix_pattern: regular expression used to remove base/prefixes in the process of determining the query type. 323 @type prefix_pattern: compiled regular expression (see the C{re} module of Python) 324 @cvar pattern: regular expression used to determine whether a query (without base/prefixes) is of type L{CONSTRUCT}, L{SELECT}, L{ASK}, L{DESCRIBE}, L{INSERT}, L{DELETE}, L{CREATE}, L{CLEAR}, L{DROP}, L{LOAD}, L{COPY}, L{MOVE} or L{ADD}. 325 @type pattern: compiled regular expression (see the C{re} module of Python) 326 @cvar comments_pattern: regular expression used to remove comments from a query. 327 @type comments_pattern: compiled regular expression (see the C{re} module of Python) 328 @ivar endpoint: SPARQL endpoint's URI. 329 @type endpoint: string 330 @ivar updateEndpoint: SPARQL endpoint's URI for update operations (if it's a different one). Default is C{None} 331 @type updateEndpoint: string 332 @ivar agent: The User-Agent for the HTTP request header. 333 @type agent: string 334 @ivar _defaultGraph: URI for the default graph. Default is C{None}, the value can be set either via an L{explicit call<addParameter>}("default-graph-uri", uri) or as part of the query string. 335 @type _defaultGraph: string 336 @ivar user: The username of the credentials for querying the current endpoint. Default is C{None}, the value can be set an L{explicit call<setCredentials>}. 337 @type user: string 338 @ivar passwd: The password of the credentials for querying the current endpoint. Default is C{None}, the value can be set an L{explicit call<setCredentials>}. 339 @type passwd: string 340 @ivar http_auth: HTTP Authentication type. The default value is L{BASIC}. Possible values are L{BASIC} or L{DIGEST} 341 @type http_auth: string 342 @ivar onlyConneg: Option for allowing (or not) only HTTP Content Negotiation (so dismiss the use of HTTP parameters).The default value is L{False}. 343 @type onlyConneg: boolean 344 @ivar customHttpHeaders: Custom HTTP Headers to be included in the request. Important: These headers override previous values (including C{Content-Type}, C{User-Agent}, C{Accept} and C{Authorization} if they are present). It is a dictionary where keys are the header field nada and values are the header values. 345 @type customHttpHeaders: dict 346 @ivar timeout: The timeout (in seconds) to use for querying the endpoint. 347 @type timeout: int 348 @ivar queryString: The SPARQL query text. 349 @type queryString: string 350 @ivar queryType: The type of SPARQL query (aka SPARQL query form), like L{CONSTRUCT}, L{SELECT}, L{ASK}, L{DESCRIBE}, L{INSERT}, L{DELETE}, L{CREATE}, L{CLEAR}, L{DROP}, L{LOAD}, L{COPY}, L{MOVE} or L{ADD} (constants in this module). 351 @type queryType: string 352 @ivar returnFormat: The return format. The possible values are L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDF}, L{RDFXML}, L{CSV}, L{TSV}, L{JSONLD} (constants in this module). 353 @type returnFormat: string 354 @ivar requestMethod: The request method for query or update operations. The possibles values are URL-encoded (L{URLENCODED}) or POST directly (L{POSTDIRECTLY}). 355 @type requestMethod: string 356 @ivar method: The invocation method. By default, this is L{GET}, but can be set to L{POST}. 357 @type method: string 358 @ivar parameters: The parameters of the request (key/value pairs in a dictionary). 359 @type parameters: dict 360 @ivar _defaultReturnFormat: The default return format. 361 @type _defaultReturnFormat: string 362 363 364 """ 365 prefix_pattern = re.compile(r"((?P<base>(\s*BASE\s*<.*?>)\s*)|(?P<prefixes>(\s*PREFIX\s+.+:\s*<.*?>)\s*))*") 366 # Maybe the future name could be queryType_pattern 367 pattern = re.compile(r"(?P<queryType>(CONSTRUCT|SELECT|ASK|DESCRIBE|INSERT|DELETE|CREATE|CLEAR|DROP|LOAD|COPY|MOVE|ADD))", re.VERBOSE | re.IGNORECASE) 368 comments_pattern = re.compile(r"(^|\n)\s*#.*?\n") 369
370 - def __init__(self, endpoint, updateEndpoint=None, returnFormat=XML, defaultGraph=None, agent=__agent__):
371 """ 372 Class encapsulating a full SPARQL call. 373 @param endpoint: string of the SPARQL endpoint's URI 374 @type endpoint: string 375 @param updateEndpoint: string of the SPARQL endpoint's URI for update operations (if it's a different one) 376 @type updateEndpoint: string 377 @param returnFormat: Default: L{XML}. 378 Can be set to JSON or Turtle/N3 379 380 No local check is done, the parameter is simply 381 sent to the endpoint. Eg, if the value is set to JSON and a construct query is issued, it 382 is up to the endpoint to react or not, this wrapper does not check. 383 384 Possible values: 385 L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDFXML}, L{CSV}, L{TSV} (constants in this module). The value can also be set via explicit 386 call, see below. 387 @type returnFormat: string 388 @param defaultGraph: URI for the default graph. Default is C{None}, the value can be set either via an L{explicit call<addDefaultGraph>} or as part of the query string. 389 @type defaultGraph: string 390 @param agent: The User-Agent for the HTTP request header. 391 @type agent: string 392 """ 393 self.endpoint = endpoint 394 self.updateEndpoint = updateEndpoint if updateEndpoint else endpoint 395 self.agent = agent 396 self.user = None 397 self.passwd = None 398 self.http_auth = BASIC 399 self._defaultGraph = defaultGraph 400 self.onlyConneg = False # Only Content Negotiation 401 self.customHttpHeaders = {} 402 403 if returnFormat in _allowedFormats: 404 self._defaultReturnFormat = returnFormat 405 else: 406 self._defaultReturnFormat = XML 407 408 self.resetQuery()
409
410 - def resetQuery(self):
411 """Reset the query, ie, return format, method, query, default or named graph settings, etc, 412 are reset to their default values. 413 """ 414 self.parameters = {} 415 if self._defaultGraph: 416 self.addParameter("default-graph-uri", self._defaultGraph) 417 self.returnFormat = self._defaultReturnFormat 418 self.method = GET 419 self.setQuery("""SELECT * WHERE{ ?s ?p ?o }""") 420 self.timeout = None 421 self.requestMethod = URLENCODED
422 423
424 - def setReturnFormat(self, format):
425 """Set the return format. If not an allowed value, the setting is ignored. 426 427 @param format: Possible values are L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDF}, L{RDFXML}, L{CSV}, L{TSV}, L{JSONLD} (constants in this module). All other cases are ignored. 428 @type format: string 429 @raise ValueError: If L{JSONLD} is tried to set and the current instance does not support JSON-LD. 430 """ 431 if format in _allowedFormats: 432 self.returnFormat = format 433 elif format == JSONLD: 434 raise ValueError("Current instance does not support JSON-LD; you might want to install the rdflib-jsonld package.") 435 else: 436 warnings.warn("Ignore format '%s'; current instance supports: %s." %(format, ", ".join(_allowedFormats)), SyntaxWarning)
437
438 - def supportsReturnFormat(self, format):
439 """Check if a return format is supported. 440 441 @param format: Possible values are L{JSON}, L{XML}, L{TURTLE}, L{N3}, L{RDF}, L{RDFXML}, L{CSV}, L{TSV} (constants in this module). All other cases are ignored. 442 @type format: string 443 @return: Returns a boolean after checking if a return format is supported. 444 @rtype: bool 445 """ 446 return (format in _allowedFormats)
447
448 - def setTimeout(self, timeout):
449 """Set the timeout (in seconds) to use for querying the endpoint. 450 451 @param timeout: Timeout in seconds. 452 @type timeout: int 453 """ 454 self.timeout = int(timeout)
455
456 - def setOnlyConneg(self, onlyConneg):
457 """Set this option for allowing (or not) only HTTP Content Negotiation (so dismiss the use of HTTP parameters). 458 @since: 1.8.1 459 460 @param onlyConneg: True if only HTTP Content Negotiation is allowed; False is HTTP parameters are allowed also. 461 @type onlyConneg: bool 462 """ 463 self.onlyConneg = onlyConneg
464
465 - def setRequestMethod(self, method):
466 """Set the internal method to use to perform the request for query or 467 update operations, either URL-encoded (L{SPARQLWrapper.URLENCODED}) or 468 POST directly (L{SPARQLWrapper.POSTDIRECTLY}). 469 Further details at U{http://www.w3.org/TR/sparql11-protocol/#query-operation} 470 and U{http://www.w3.org/TR/sparql11-protocol/#update-operation}. 471 472 @param method: Possible values are L{SPARQLWrapper.URLENCODED} (URL-encoded) or L{SPARQLWrapper.POSTDIRECTLY} (POST directly). All other cases are ignored. 473 @type method: string 474 """ 475 if method in _REQUEST_METHODS: 476 self.requestMethod = method 477 else: 478 warnings.warn("invalid update method '%s'" % method, RuntimeWarning)
479
480 - def addDefaultGraph(self, uri):
481 """ 482 Add a default graph URI. 483 @param uri: URI of the graph 484 @type uri: string 485 @deprecated: use addParameter("default-graph-uri", uri) instead of this method 486 """ 487 self.addParameter("default-graph-uri", uri)
488
489 - def addNamedGraph(self, uri):
490 """ 491 Add a named graph URI. 492 @param uri: URI of the graph 493 @type uri: string 494 @deprecated: use addParameter("named-graph-uri", uri) instead of this method 495 """ 496 self.addParameter("named-graph-uri", uri)
497
498 - def addExtraURITag(self, key, value):
499 """ 500 Some SPARQL endpoints require extra key value pairs. 501 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing 502 virtuoso to retrieve graphs that are not stored in its local database. 503 Alias of L{SPARQLWrapper.addParameter} method. 504 @param key: key of the query part 505 @type key: string 506 @param value: value of the query part 507 @type value: string 508 @deprecated: use addParameter(key, value) instead of this method 509 """ 510 self.addParameter(key, value)
511
512 - def addCustomParameter(self, name, value):
513 """ 514 Method is kept for backwards compatibility. Historically, it "replaces" parameters instead of adding. 515 @param name: name 516 @type name: string 517 @param value: value 518 @type value: string 519 @return: Returns a boolean indicating if the adding has been accomplished. 520 @rtype: bool 521 @deprecated: use addParameter(name, value) instead of this method 522 """ 523 self.clearParameter(name) 524 return self.addParameter(name, value)
525
526 - def addParameter(self, name, value):
527 """ 528 Some SPARQL endpoints allow extra key value pairs. 529 E.g., in virtuoso, one would add C{should-sponge=soft} to the query forcing 530 virtuoso to retrieve graphs that are not stored in its local database. 531 If the param C{query} is tried to be set, this intent is dismissed. 532 Returns a boolean indicating if the set has been accomplished. 533 @param name: name 534 @type name: string 535 @param value: value 536 @type value: string 537 @return: Returns a boolean indicating if the adding has been accomplished. 538 @rtype: bool 539 """ 540 if name in _SPARQL_PARAMS: 541 return False 542 else: 543 if name not in self.parameters: 544 self.parameters[name] = [] 545 self.parameters[name].append(value) 546 return True
547
548 - def addCustomHttpHeader(self, httpHeaderName, httpHeaderValue):
549 """ 550 Add a custom HTTP header (this method can override all HTTP headers). 551 IMPORTANT: Take into acount that each previous value for the header field names 552 C{Content-Type}, C{User-Agent}, C{Accept} and C{Authorization} would be overriden 553 if the header field name is present as value of the parameter C{httpHeaderName}. 554 @since: 1.8.2 555 556 @param httpHeaderName: The header field name. 557 @type httpHeaderName: string 558 @param httpHeaderValue: The header field value. 559 @type httpHeaderValue: string 560 """ 561 self.customHttpHeaders[httpHeaderName] = httpHeaderValue
562
563 - def clearCustomHttpHeader(self, httpHeaderName):
564 """ 565 Clear the values of a custom Http Header previously setted. 566 Returns a boolean indicating if the clearing has been accomplished. 567 @since: 1.8.2 568 569 @param httpHeaderName: name 570 @type httpHeaderName: string 571 @return: Returns a boolean indicating if the clearing has been accomplished. 572 @rtype: bool 573 """ 574 try: 575 del self.customHttpHeaders[httpHeaderName] 576 return True 577 except KeyError: 578 return False
579
580 - def clearParameter(self, name):
581 """ 582 Clear the values of a concrete parameter. 583 Returns a boolean indicating if the clearing has been accomplished. 584 @param name: name 585 @type name: string 586 @return: Returns a boolean indicating if the clearing has been accomplished. 587 @rtype: bool 588 """ 589 if name in _SPARQL_PARAMS: 590 return False 591 else: 592 try: 593 del self.parameters[name] 594 return True 595 except KeyError: 596 return False
597
598 - def setCredentials(self, user, passwd, realm="SPARQL"):
599 """ 600 Set the credentials for querying the current endpoint. 601 @param user: username 602 @type user: string 603 @param passwd: password 604 @type passwd: string 605 @param realm: realm. Only used for L{DIGEST} authentication. Default is C{SPARQL} 606 @type realm: string 607 @change: Added C{realm} parameter since version C{1.8.3}. 608 """ 609 self.user = user 610 self.passwd = passwd 611 self.realm = realm
612
613 - def setHTTPAuth(self, auth):
614 """ 615 Set the HTTP Authentication type. Possible values are L{BASIC} or L{DIGEST}. 616 @param auth: auth type 617 @type auth: string 618 @raise TypeError: If the C{auth} parameter is not an string. 619 @raise ValueError: If the C{auth} parameter has not one of the valid values: L{BASIC} or L{DIGEST}. 620 """ 621 if not isinstance(auth, str): 622 raise TypeError('setHTTPAuth takes a string') 623 elif auth.upper() in _allowedAuth: 624 self.http_auth = auth.upper() 625 else: 626 valid_types = ", ".join(_allowedAuth) 627 raise ValueError("Value should be one of {0}".format(valid_types))
628
629 - def setQuery(self, query):
630 """ 631 Set the SPARQL query text. Note: no check is done on the validity of the query 632 (syntax or otherwise) by this module, except for testing the query type (SELECT, 633 ASK, etc). Syntax and validity checking is done by the SPARQL service itself. 634 @param query: query text 635 @type query: string 636 @raise TypeError: If the C{query} parameter is not an unicode-string or utf-8 encoded byte-string. 637 """ 638 if sys.version < '3': # have to write it like this, for 2to3 compatibility 639 if isinstance(query, unicode): 640 pass 641 elif isinstance(query, str): 642 query = query.decode('utf-8') 643 else: 644 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings') 645 else: 646 if isinstance(query, str): 647 pass 648 elif isinstance(query, bytes): 649 query = query.decode('utf-8') 650 else: 651 raise TypeError('setQuery takes either unicode-strings or utf-8 encoded byte-strings') 652 653 self.queryString = query 654 self.queryType = self._parseQueryType(query)
655
656 - def _parseQueryType(self, query):
657 """ 658 Internal method for parsing the SPARQL query and return its type (ie, L{SELECT}, L{ASK}, etc). 659 660 Note that the method returns L{SELECT} if nothing is specified. This is just to get all other 661 methods running; in fact, this means that the query is erroneous, because the query must be, 662 according to the SPARQL specification, one of Select, Ask, Describe, or Construct. The 663 SPARQL endpoint should raise an exception (via urllib) for such syntax error. 664 665 @param query: query text 666 @type query: string 667 @return: the type of SPARQL query (aka SPARQL query form) 668 @rtype: string 669 """ 670 try: 671 query = query if (isinstance(query, str)) else query.encode('ascii', 'ignore') 672 query = self._cleanComments(query) 673 query_for_queryType = re.sub(self.prefix_pattern, "", query.strip()) 674 r_queryType = self.pattern.search(query_for_queryType).group("queryType").upper() 675 except AttributeError: 676 warnings.warn("not detected query type for query '%s'" % query.replace("\n", " "), RuntimeWarning) 677 r_queryType = None 678 679 if r_queryType in _allowedQueryTypes: 680 return r_queryType 681 else: 682 #raise Exception("Illegal SPARQL Query; must be one of SELECT, ASK, DESCRIBE, or CONSTRUCT") 683 warnings.warn("unknown query type '%s'" % r_queryType, RuntimeWarning) 684 return SELECT
685
686 - def setMethod(self, method):
687 """Set the invocation method. By default, this is L{GET}, but can be set to L{POST}. 688 @param method: should be either L{GET} or L{POST}. Other cases are ignored. 689 @type method: string 690 """ 691 if method in _allowedRequests: 692 self.method = method
693
694 - def setUseKeepAlive(self):
695 """Make urllib2 use keep-alive. 696 @raise ImportError: when could not be imported keepalive.HTTPHandler 697 """ 698 try: 699 from keepalive import HTTPHandler 700 701 if urllib2._opener and any(isinstance(h, HTTPHandler) for h in urllib2._opener.handlers): 702 # already installed 703 return 704 705 keepalive_handler = HTTPHandler() 706 opener = urllib2.build_opener(keepalive_handler) 707 urllib2.install_opener(opener) 708 except ImportError: 709 warnings.warn("keepalive support not available, so the execution of this method has no effect")
710
711 - def isSparqlUpdateRequest(self):
712 """ Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Update request. 713 @return: Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Update request 714 @rtype: bool 715 """ 716 return self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD]
717
718 - def isSparqlQueryRequest(self):
719 """ Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Query request. 720 @return: Returns C{TRUE} if SPARQLWrapper is configured for executing SPARQL Query request. 721 @rtype: bool 722 """ 723 return not self.isSparqlUpdateRequest()
724
725 - def _cleanComments(self, query):
726 """ Internal method for returning the query after all occurrence of singleline comments are removed (issues #32 and #77). 727 @param query: The query 728 @type query: string 729 @return: the query after all occurrence of singleline comments are removed. 730 @rtype: string 731 """ 732 return re.sub(self.comments_pattern, "\n\n", query)
733
734 - def _getRequestEncodedParameters(self, query=None):
735 """ Internal method for getting the request encoded parameters. 736 @param query: a tuple of two items. The first item can be the string 737 C{query} (for L{SELECT}, L{DESCRIBE}, L{ASK}, L{CONSTRUCT} query) or the string C{update} 738 (for SPARQL Update queries, like L{DELETE} or L{INSERT}). The second item of the tuple 739 is the query string itself. 740 @type query: tuple 741 @return: the request encoded parameters. 742 @rtype: string 743 """ 744 query_parameters = self.parameters.copy() 745 746 # in case of query = tuple("query"/"update", queryString) 747 if query and (isinstance(query, tuple)) and len(query) == 2: 748 query_parameters[query[0]] = [query[1]] 749 750 if not self.isSparqlUpdateRequest(): 751 # This is very ugly. The fact is that the key for the choice of the output format is not defined. 752 # Virtuoso uses 'format',sparqler uses 'output' 753 # However, these processors are (hopefully) oblivious to the parameters they do not understand. 754 # So: just repeat all possibilities in the final URI. UGLY!!!!!!! 755 if not self.onlyConneg: 756 for f in _returnFormatSetting: 757 query_parameters[f] = [self.returnFormat] 758 # Virtuoso is not supporting a correct Accept header and an unexpected "output"/"format" parameter value. It returns a 406. 759 # "tsv", "rdf+xml" and "json-ld" are not supported as a correct "output"/"format" parameter value but "text/tab-separated-values" or "application/rdf+xml" are a valid values, 760 # and there is no problem to send both (4store does not support unexpected values). 761 if self.returnFormat in [TSV, JSONLD, RDFXML]: 762 acceptHeader = self._getAcceptHeader() # to obtain the mime-type "text/tab-separated-values" or "application/rdf+xml" 763 if "*/*" in acceptHeader: 764 acceptHeader = "" # clear the value in case of "*/*" 765 query_parameters[f] += [acceptHeader] 766 767 pairs = ( 768 "%s=%s" % ( 769 urllib.quote_plus(param.encode('UTF-8'), safe='/'), 770 urllib.quote_plus(value.encode('UTF-8'), safe='/') 771 ) 772 for param, values in query_parameters.items() for value in values 773 ) 774 return '&'.join(pairs)
775
776 - def _getAcceptHeader(self):
777 """ Internal method for getting the HTTP Accept Header. 778 @see: U{Hypertext Transfer Protocol -- HTTP/1.1 - Header Field Definitions<https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1>} 779 """ 780 if self.queryType in [SELECT, ASK]: 781 if self.returnFormat == XML: 782 acceptHeader = ",".join(_SPARQL_XML) 783 elif self.returnFormat == JSON: 784 acceptHeader = ",".join(_SPARQL_JSON) 785 elif self.returnFormat == CSV: # Allowed for SELECT and ASK (https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321/#query-success) but only described for SELECT (https://www.w3.org/TR/sparql11-results-csv-tsv/) 786 acceptHeader = ",".join(_CSV) 787 elif self.returnFormat == TSV: # Allowed for SELECT and ASK (https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321/#query-success) but only described for SELECT (https://www.w3.org/TR/sparql11-results-csv-tsv/) 788 acceptHeader = ",".join(_TSV) 789 else: 790 acceptHeader = ",".join(_ALL) 791 warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning) 792 elif self.queryType in [CONSTRUCT, DESCRIBE]: 793 if self.returnFormat == N3 or self.returnFormat == TURTLE: 794 acceptHeader = ",".join(_RDF_N3) 795 elif self.returnFormat == XML or self.returnFormat == RDFXML: 796 acceptHeader = ",".join(_RDF_XML) 797 elif self.returnFormat == JSONLD and JSONLD in _allowedFormats: 798 acceptHeader = ",".join(_RDF_JSONLD) 799 else: 800 acceptHeader = ",".join(_ALL) 801 warnings.warn("Sending Accept header '*/*' because unexpected returned format '%s' in a '%s' SPARQL query form" % (self.returnFormat, self.queryType), RuntimeWarning) 802 elif self.queryType in [INSERT, DELETE, CREATE, CLEAR, DROP, LOAD, COPY, MOVE, ADD]: 803 if self.returnFormat == XML: 804 acceptHeader = ",".join(_SPARQL_XML) 805 elif self.returnFormat == JSON: 806 acceptHeader = ",".join(_SPARQL_JSON) 807 else: 808 acceptHeader = ",".join(_ALL) 809 else: 810 acceptHeader = "*/*" 811 return acceptHeader
812
813 - def _createRequest(self):
814 """Internal method to create request according a HTTP method. Returns a 815 C{urllib2.Request} object of the urllib2 Python library 816 @raise NotImplementedError: If the C{HTTP authentification} method is not one of the valid values: L{BASIC} or L{DIGEST}. 817 @return: request a C{urllib2.Request} object of the urllib2 Python library 818 """ 819 request = None 820 821 if self.isSparqlUpdateRequest(): 822 #protocol details at http://www.w3.org/TR/sparql11-protocol/#update-operation 823 uri = self.updateEndpoint 824 825 if self.method != POST: 826 warnings.warn("update operations MUST be done by POST") 827 828 if self.requestMethod == POSTDIRECTLY: 829 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters()) 830 request.add_header("Content-Type", "application/sparql-update") 831 request.data = self.queryString.encode('UTF-8') 832 else: # URL-encoded 833 request = urllib2.Request(uri) 834 request.add_header("Content-Type", "application/x-www-form-urlencoded") 835 request.data = self._getRequestEncodedParameters(("update", self.queryString)).encode('ascii') 836 else: 837 #protocol details at http://www.w3.org/TR/sparql11-protocol/#query-operation 838 uri = self.endpoint 839 840 if self.method == POST: 841 if self.requestMethod == POSTDIRECTLY: 842 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters()) 843 request.add_header("Content-Type", "application/sparql-query") 844 request.data = self.queryString.encode('UTF-8') 845 else: # URL-encoded 846 request = urllib2.Request(uri) 847 request.add_header("Content-Type", "application/x-www-form-urlencoded") 848 request.data = self._getRequestEncodedParameters(("query", self.queryString)).encode('ascii') 849 else: # GET 850 request = urllib2.Request(uri + "?" + self._getRequestEncodedParameters(("query", self.queryString))) 851 852 request.add_header("User-Agent", self.agent) 853 request.add_header("Accept", self._getAcceptHeader()) 854 if self.user and self.passwd: 855 if self.http_auth == BASIC: 856 credentials = "%s:%s" % (self.user, self.passwd) 857 request.add_header("Authorization", "Basic %s" % base64.b64encode(credentials.encode('utf-8')).decode('utf-8')) 858 elif self.http_auth == DIGEST: 859 realm = self.realm 860 pwd_mgr = urllib2.HTTPPasswordMgr() 861 pwd_mgr.add_password(realm, uri, self.user, self.passwd) 862 opener = urllib2.build_opener() 863 opener.add_handler(urllib2.HTTPDigestAuthHandler(pwd_mgr)) 864 urllib2.install_opener(opener) 865 else: 866 valid_types = ", ".join(_allowedAuth) 867 raise NotImplementedError("Expecting one of: {0}, but received: {1}".format(valid_types, 868 self.http_auth)) 869 870 # The header field name is capitalized in the request.add_header method. 871 for customHttpHeader in self.customHttpHeaders: 872 request.add_header(customHttpHeader, self.customHttpHeaders[customHttpHeader]) 873 874 return request
875
876 - def _query(self):
877 """Internal method to execute the query. Returns the output of the 878 C{urllib2.urlopen} method of the standard Python library 879 880 @return: tuples with the raw request plus the expected format. 881 @raise QueryBadFormed: If the C{HTTP return code} is C{400}. 882 @raise Unauthorized: If the C{HTTP return code} is C{401}. 883 @raise EndPointNotFound: If the C{HTTP return code} is C{404}. 884 @raise URITooLong: If the C{HTTP return code} is C{414}. 885 @raise EndPointInternalError: If the C{HTTP return code} is C{500}. 886 """ 887 request = self._createRequest() 888 889 try: 890 if self.timeout: 891 response = urlopener(request, timeout=self.timeout) 892 else: 893 response = urlopener(request) 894 return response, self.returnFormat 895 except urllib2.HTTPError, e: 896 if e.code == 400: 897 raise QueryBadFormed(e.read()) 898 elif e.code == 404: 899 raise EndPointNotFound(e.read()) 900 elif e.code == 401: 901 raise Unauthorized(e.read()) 902 elif e.code == 414: 903 raise URITooLong(e.read()) 904 elif e.code == 500: 905 raise EndPointInternalError(e.read()) 906 else: 907 raise e
908
909 - def query(self):
910 """ 911 Execute the query. 912 Exceptions can be raised if either the URI is wrong or the HTTP sends back an error (this is also the 913 case when the query is syntactically incorrect, leading to an HTTP error sent back by the SPARQL endpoint). 914 The usual urllib2 exceptions are raised, which therefore cover possible SPARQL errors, too. 915 916 Note that some combinations of return formats and query types may not make sense. For example, 917 a SELECT query with Turtle response is meaningless (the output of a SELECT is not a Graph), or a CONSTRUCT 918 query with JSON output may be a problem because, at the moment, there is no accepted JSON serialization 919 of RDF (let alone one implemented by SPARQL endpoints). In such cases the returned media type of the result is 920 unpredictable and may differ from one SPARQL endpoint implementation to the other. (Endpoints usually fall 921 back to one of the "meaningful" formats, but it is up to the specific implementation to choose which 922 one that is.) 923 924 @return: query result 925 @rtype: L{QueryResult} instance 926 """ 927 return QueryResult(self._query())
928
929 - def queryAndConvert(self):
930 """Macro like method: issue a query and return the converted results. 931 @return: the converted query result. See the conversion methods for more details. 932 """ 933 res = self.query() 934 return res.convert()
935
936 - def __str__(self):
937 """This method returns the string representation of a L{SPARQLWrapper} object. 938 @return: A human-readable string of the object. 939 @rtype: string 940 @since: 1.8.3 941 """ 942 fullname = self.__module__ + "." + self.__class__.__name__ 943 items = ('"%s" : %r' % (k, v) for k, v in sorted(self.__dict__.items())) 944 str_dict_items = "{%s}" % (',\n'.join(items)) 945 return "<%s object at 0x%016X>\n%s" % (fullname, id(self), str_dict_items)
946 947 948 ####################################################################################################### 949 950
951 -class QueryResult(object):
952 """ 953 Wrapper around an a query result. Users should not create instances of this class, it is 954 generated by a L{SPARQLWrapper.query} call. The results can be 955 converted to various formats, or used directly. 956 957 If used directly: the class gives access to the direct http request results 958 L{self.response}: it is a file-like object with two additional methods: C{geturl()} to 959 return the URL of the resource retrieved and 960 C{info()} that returns the meta-information of the HTTP result as a dictionary-like object 961 (see the urllib2 standard library module of Python). 962 963 For convenience, these methods are also available on the instance. The C{__iter__} and 964 C{next} methods are also implemented (by mapping them to L{self.response}). This means that the 965 common idiom:: 966 for l in obj : do_something_with_line(l) 967 would work, too. 968 969 @ivar response: the direct HTTP response; a file-like object, as return by the C{urllib2.urlopen} library call. 970 @ivar requestedFormat: The requested format. The possible values are: L{JSON}, L{XML}, L{RDFXML}, L{TURTLE}, L{N3}, L{RDF}, L{CSV}, L{TSV}, L{JSONLD}. 971 @type requestedFormat: string 972 """
973 - def __init__(self, result):
974 """ 975 @param result: HTTP response stemming from a L{SPARQLWrapper.query} call, or a tuple with the expected format: (response,format) 976 """ 977 if isinstance(result, tuple): 978 self.response = result[0] 979 self.requestedFormat = result[1] 980 else: 981 self.response = result
982
983 - def geturl(self):
984 """Return the URL of the original call. 985 @return: URL of the original call 986 @rtype: string 987 """ 988 return self.response.geturl()
989
990 - def info(self):
991 """Return the meta-information of the HTTP result. 992 @return: meta information of the HTTP result 993 @rtype: dict 994 """ 995 return KeyCaseInsensitiveDict(self.response.info())
996
997 - def __iter__(self):
998 """Return an iterator object. This method is expected for the inclusion 999 of the object in a standard C{for} loop. 1000 """ 1001 return self.response.__iter__()
1002
1003 - def next(self):
1004 """Method for the standard iterator.""" 1005 return self.response.next()
1006
1007 - def _convertJSON(self):
1008 """ 1009 Convert a JSON result into a Python dict. This method can be overwritten in a subclass 1010 for a different conversion method. 1011 @return: converted result 1012 @rtype: dict 1013 """ 1014 return json.loads(self.response.read().decode("utf-8"))
1015
1016 - def _convertXML(self):
1017 """ 1018 Convert an XML result into a Python dom tree. This method can be overwritten in a 1019 subclass for a different conversion method. 1020 @return: converted result 1021 @rtype: PyXlib DOM node 1022 """ 1023 from xml.dom.minidom import parse 1024 return parse(self.response)
1025
1026 - def _convertRDF(self):
1027 """ 1028 Convert a RDF/XML result into an RDFLib triple store. This method can be overwritten 1029 in a subclass for a different conversion method. 1030 @return: converted result 1031 @rtype: RDFLib C{Graph} 1032 """ 1033 try: 1034 from rdflib.graph import ConjunctiveGraph 1035 except ImportError: 1036 from rdflib import ConjunctiveGraph 1037 retval = ConjunctiveGraph() 1038 # (DEPRECATED) this is a strange hack. If the publicID is not set, rdflib (or the underlying xml parser) makes a funny 1039 # (DEPRECATED) (and, as far as I could see, meaningless) error message... 1040 retval.load(self.response) # (DEPRECATED) publicID=' ') 1041 return retval
1042
1043 - def _convertN3(self):
1044 """ 1045 Convert a RDF Turtle/N3 result into a string. This method can be overwritten in a subclass 1046 for a different conversion method. 1047 @return: converted result 1048 @rtype: string 1049 """ 1050 return self.response.read()
1051
1052 - def _convertCSV(self):
1053 """ 1054 Convert a CSV result into a string. This method can be overwritten in a subclass 1055 for a different conversion method. 1056 @return: converted result 1057 @rtype: string 1058 """ 1059 return self.response.read()
1060
1061 - def _convertTSV(self):
1062 """ 1063 Convert a TSV result into a string. This method can be overwritten in a subclass 1064 for a different conversion method. 1065 @return: converted result 1066 @rtype: string 1067 """ 1068 return self.response.read()
1069
1070 - def _convertJSONLD(self):
1071 """ 1072 Convert a RDF JSON-LD result into an RDFLib triple store. This method can be overwritten 1073 in a subclass for a different conversion method. 1074 @return: converted result 1075 @rtype: RDFLib Graph 1076 """ 1077 from rdflib import ConjunctiveGraph 1078 retval = ConjunctiveGraph() 1079 retval.load(self.response, format='json-ld')# (DEPRECATED), publicID=' ') 1080 return retval
1081
1082 - def convert(self):
1083 """ 1084 Encode the return value depending on the return format: 1085 - in the case of XML, a DOM top element is returned; 1086 - in the case of JSON, a simplejson conversion will return a dictionary; 1087 - in the case of RDF/XML, the value is converted via RDFLib into a C{Graph} instance; 1088 - in the case of JSON-LD, the value is converted via RDFLib into a C{Graph} instance; 1089 - in the case of RDF Turtle/N3, a string is returned; 1090 - in the case of CSV/TSV, a string is returned. 1091 In all other cases the input simply returned. 1092 1093 @return: the converted query result. See the conversion methods for more details. 1094 """ 1095 def _content_type_in_list(real, expected): 1096 """ Internal method for checking if the content-type header received matches any of the content types of the expected list. 1097 @param real: The content-type header received. 1098 @type real: string 1099 @param expected: A list of expected content types. 1100 @type expected: list 1101 @return: Returns a boolean after checking if the content-type header received matches any of the content types of the expected list. 1102 @rtype: boolean 1103 """ 1104 return True in [real.find(mime) != -1 for mime in expected]
1105 1106 def _validate_format(format_name, allowed, mime, requested): 1107 """ Internal method for validating if the requested format is one of the allowed formats. 1108 @param format_name: The format name (to be used in the warning message). 1109 @type format_name: string 1110 @param allowed: A list of allowed content types. 1111 @type allowed: list 1112 @param mime: The content-type header received (to be used in the warning message). 1113 @type mime: string 1114 @param requested: the requested format. 1115 @type requested: string 1116 """ 1117 if requested not in allowed: 1118 message = "Format requested was %s, but %s (%s) has been returned by the endpoint" 1119 warnings.warn(message % (requested.upper(), format_name, mime), RuntimeWarning)
1120 1121 # TODO. In order to compare properly, the requested QueryType (SPARQL Query Form) is needed. For instance, the unexpected N3 requested for a SELECT would return XML 1122 if "content-type" in self.info(): 1123 ct = self.info()["content-type"] # returned Content-Type value 1124 1125 if _content_type_in_list(ct, _SPARQL_XML): 1126 _validate_format("XML", [XML], ct, self.requestedFormat) 1127 return self._convertXML() 1128 elif _content_type_in_list(ct, _XML): 1129 _validate_format("XML", [XML], ct, self.requestedFormat) 1130 return self._convertXML() 1131 elif _content_type_in_list(ct, _SPARQL_JSON): 1132 _validate_format("JSON", [JSON], ct, self.requestedFormat) 1133 return self._convertJSON() 1134 elif _content_type_in_list(ct, _RDF_XML): 1135 _validate_format("RDF/XML", [RDF, XML, RDFXML], ct, self.requestedFormat) 1136 return self._convertRDF() 1137 elif _content_type_in_list(ct, _RDF_N3): 1138 _validate_format("N3", [N3, TURTLE], ct, self.requestedFormat) 1139 return self._convertN3() 1140 elif _content_type_in_list(ct, _CSV): 1141 _validate_format("CSV", [CSV], ct, self.requestedFormat) 1142 return self._convertCSV() 1143 elif _content_type_in_list(ct, _TSV): 1144 _validate_format("TSV", [TSV], ct, self.requestedFormat) 1145 return self._convertTSV() 1146 elif _content_type_in_list(ct, _RDF_JSONLD): 1147 _validate_format("JSON(-LD)", [JSONLD, JSON], ct, self.requestedFormat) 1148 return self._convertJSONLD() 1149 else: 1150 warnings.warn("unknown response content type '%s' returning raw response..." %(ct), RuntimeWarning) 1151 return self.response.read() 1152
1153 - def _get_responseFormat(self):
1154 """ 1155 Get the response (return) format. The possible values are: L{JSON}, L{XML}, L{RDFXML}, L{TURTLE}, L{N3}, L{CSV}, L{TSV}, L{JSONLD}. 1156 In case there is no Content-Type, C{None} is return. In all other cases, the raw C{Content-Type} is return. 1157 @since: 1.8.3 1158 1159 @return: the response format. The possible values are: L{JSON}, L{XML}, L{RDFXML}, L{TURTLE}, L{N3}, L{CSV}, L{TSV}, L{JSONLD}. 1160 @rtype: string 1161 """ 1162 1163 def _content_type_in_list(real, expected): 1164 """ Internal method for checking if the content-type header received matches any of the content types of the expected list. 1165 @param real: The content-type header received. 1166 @type real: string 1167 @param expected: A list of expected content types. 1168 @type expected: list 1169 @return: Returns a boolean after checking if the content-type header received matches any of the content types of the expected list. 1170 @rtype: boolean 1171 """ 1172 return True in [real.find(mime) != -1 for mime in expected]
1173 1174 if "content-type" in self.info(): 1175 ct = self.info()["content-type"] # returned Content-Type value 1176 1177 if _content_type_in_list(ct, _SPARQL_XML): 1178 return XML 1179 elif _content_type_in_list(ct, _XML): 1180 return XML 1181 elif _content_type_in_list(ct, _SPARQL_JSON): 1182 return JSON 1183 elif _content_type_in_list(ct, _RDF_XML): 1184 return RDFXML 1185 elif _content_type_in_list(ct, _RDF_TURTLE): 1186 return TURTLE 1187 elif _content_type_in_list(ct, _RDF_N3): 1188 return N3 1189 elif _content_type_in_list(ct, _CSV): 1190 return CSV 1191 elif _content_type_in_list(ct, _TSV): 1192 return TSV 1193 elif _content_type_in_list(ct, _RDF_JSONLD): 1194 return JSONLD 1195 else: 1196 warnings.warn("Unknown response content type. Returning raw content-type ('%s')." %(ct), RuntimeWarning) 1197 return ct 1198 return None 1199
1200 - def print_results(self, minWidth=None):
1201 """This method prints a representation of a L{QueryResult} object that MUST has as response format L{JSON}. 1202 @param minWidth: The minimun width, counting as characters. The default value is C{None}. 1203 @type minWidth: string 1204 """ 1205 1206 # Check if the requested format was JSON. If not, exit. 1207 responseFormat = self._get_responseFormat() 1208 if responseFormat != JSON: 1209 message = "Format return was %s, but JSON was expected. No printing." 1210 warnings.warn(message % (responseFormat), RuntimeWarning) 1211 return 1212 1213 results = self._convertJSON() 1214 if minWidth: 1215 width = self.__get_results_width(results, minWidth) 1216 else: 1217 width = self.__get_results_width(results) 1218 index = 0 1219 for var in results["head"]["vars"]: 1220 print ("?" + var).ljust(width[index]), "|", 1221 index += 1 1222 print 1223 print "=" * (sum(width) + 3 * len(width)) 1224 for result in results["results"]["bindings"]: 1225 index = 0 1226 for var in results["head"]["vars"]: 1227 result_value = self.__get_prettyprint_string_sparql_var_result(result[var]) 1228 print result_value.ljust(width[index]), "|", 1229 index += 1 1230 print
1231
1232 - def __get_results_width(self, results, minWidth=2):
1233 width = [] 1234 for var in results["head"]["vars"]: 1235 width.append(max(minWidth, len(var)+1)) 1236 for result in results["results"]["bindings"]: 1237 index = 0 1238 for var in results["head"]["vars"]: 1239 result_value = self.__get_prettyprint_string_sparql_var_result(result[var]) 1240 width[index] = max(width[index], len(result_value)) 1241 index += 1 1242 return width
1243
1244 - def __get_prettyprint_string_sparql_var_result(self, result):
1245 value = result["value"] 1246 lang = result.get("xml:lang", None) 1247 datatype = result.get("datatype", None) 1248 if lang is not None: 1249 value += "@"+lang 1250 if datatype is not None: 1251 value += " ["+datatype+"]" 1252 return value
1253
1254 - def __str__(self):
1255 """This method returns the string representation of a L{QueryResult} object. 1256 @return: A human-readable string of the object. 1257 @rtype: string 1258 @since: 1.8.3 1259 """ 1260 fullname = self.__module__ + "." + self.__class__.__name__ 1261 str_requestedFormat = '"requestedFormat" : '+repr(self.requestedFormat) 1262 str_url = self.response.url 1263 str_code = self.response.code 1264 str_headers = self.response.info() 1265 str_response = '"response (a file-like object, as return by the urllib2.urlopen library call)" : {\n\t"url" : "%s",\n\t"code" : "%s",\n\t"headers" : %s}' % (str_url, str_code, str_headers) 1266 return "<%s object at 0x%016X>\n{%s,\n%s}" % (fullname, id(self), str_requestedFormat, str_response)
1267