<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <title>pyspark.sql.SQLContext</title>
  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  <script type="text/javascript" src="epydoc.js"></script>
</head>

<body bgcolor="white" text="black" link="blue" vlink="#204080"
      alink="#204080">
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="pyspark-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://spark.apache.org">Spark 1.0.1 Python API Docs</a></th>
          </tr></table></th>
  </tr>
</table>
<table width="100%" cellpadding="0" cellspacing="0">
  <tr valign="top">
    <td width="100%">
      <span class="breadcrumbs">
        <a href="pyspark-module.html">Package&nbsp;pyspark</a> ::
        <a href="pyspark.sql-module.html">Module&nbsp;sql</a> ::
        Class&nbsp;SQLContext
      </span>
    </td>
    <td>
      <table cellpadding="0" cellspacing="0">
        <!-- hide/show private -->
        <tr><td align="right"><span class="options"
            >[<a href="frames.html" target="_top">frames</a
            >]&nbsp;|&nbsp;<a href="pyspark.sql.SQLContext-class.html"
            target="_top">no&nbsp;frames</a>]</span></td></tr>
      </table>
    </td>
  </tr>
</table>
<!-- ==================== CLASS DESCRIPTION ==================== -->
<h1 class="epydoc">Class SQLContext</h1><p class="nomargin-top"><span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext">source&nbsp;code</a></span></p>
<p>Main entry point for SparkSQL functionality.</p>
  <p>A SQLContext can be used create <a 
  href="pyspark.sql.SchemaRDD-class.html" class="link">SchemaRDD</a>s, 
  register <a href="pyspark.sql.SchemaRDD-class.html" 
  class="link">SchemaRDD</a>s as tables, execute SQL over tables, cache 
  tables, and read parquet files.</p>

<!-- ==================== INSTANCE METHODS ==================== -->
<a name="section-InstanceMethods"></a>
<table class="summary" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
  <td align="left" colspan="2" class="table-header">
    <span class="table-header">Instance Methods</span></td>
</tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.sql.SQLContext-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">sparkContext</span>,
        <span class="summary-sig-arg">sqlContext</span>=<span class="summary-sig-default">None</span>)</span><br />
      Create a new SQLContext.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.__init__">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.sql.SQLContext-class.html#inferSchema" class="summary-sig-name">inferSchema</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">rdd</span>)</span><br />
      Infer and apply a schema to an RDD of <code 
      class="link">dict</code>s.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.inferSchema">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.sql.SQLContext-class.html#registerRDDAsTable" class="summary-sig-name">registerRDDAsTable</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">rdd</span>,
        <span class="summary-sig-arg">tableName</span>)</span><br />
      Registers the given RDD as a temporary table in the catalog.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.registerRDDAsTable">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.sql.SQLContext-class.html#parquetFile" class="summary-sig-name">parquetFile</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">path</span>)</span><br />
      Loads a Parquet file, returning the result as a <a 
      href="pyspark.sql.SchemaRDD-class.html" class="link">SchemaRDD</a>.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.parquetFile">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.sql.SQLContext-class.html#jsonFile" class="summary-sig-name">jsonFile</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">path</span>)</span><br />
      Loads a text file storing one JSON object per line,
   returning the result as a L{SchemaRDD}.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.jsonFile">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.sql.SQLContext-class.html#jsonRDD" class="summary-sig-name">jsonRDD</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">rdd</span>)</span><br />
      Loads an RDD storing one JSON object per string, returning the result as a L{SchemaRDD}.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.jsonRDD">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.sql.SQLContext-class.html#sql" class="summary-sig-name">sql</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">sqlQuery</span>)</span><br />
      Return a <a href="pyspark.sql.SchemaRDD-class.html" 
      class="link">SchemaRDD</a> representing the result of the given 
      query.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.sql">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.sql.SQLContext-class.html#table" class="summary-sig-name">table</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">tableName</span>)</span><br />
      Returns the specified table as a <a 
      href="pyspark.sql.SchemaRDD-class.html" class="link">SchemaRDD</a>.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.table">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a name="cacheTable"></a><span class="summary-sig-name">cacheTable</span>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">tableName</span>)</span><br />
      Caches the specified table in-memory.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.cacheTable">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a name="uncacheTable"></a><span class="summary-sig-name">uncacheTable</span>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">tableName</span>)</span><br />
      Removes the specified table from the in-memory cache.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.uncacheTable">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
</table>
<!-- ==================== METHOD DETAILS ==================== -->
<a name="section-MethodDetails"></a>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
  <td align="left" colspan="2" class="table-header">
    <span class="table-header">Method Details</span></td>
</tr>
</table>
<a name="__init__"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">sparkContext</span>,
        <span class="sig-arg">sqlContext</span>=<span class="sig-default">None</span>)</span>
    <br /><em class="fname">(Constructor)</em>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.__init__">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Create a new SQLContext.</p>
  <dl class="fields">
    <dt>Parameters:</dt>
    <dd><ul class="nomargin-top">
        <li><strong class="pname"><code>sparkContext</code></strong> - The SparkContext to wrap.
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>srdd = sqlCtx.inferSchema(rdd)
<span class="py-prompt">&gt;&gt;&gt; </span>sqlCtx.inferSchema(srdd) <span class="py-comment"># doctest: +IGNORE_EXCEPTION_DETAIL</span>
<span class="py-except">Traceback (most recent call last):</span>
<span class="py-except">    ...</span>
<span class="py-except">ValueError:...</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>bad_rdd = sc.parallelize([1,2,3])
<span class="py-prompt">&gt;&gt;&gt; </span>sqlCtx.inferSchema(bad_rdd) <span class="py-comment"># doctest: +IGNORE_EXCEPTION_DETAIL</span>
<span class="py-except">Traceback (most recent call last):</span>
<span class="py-except">    ...</span>
<span class="py-except">ValueError:...</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>allTypes = sc.parallelize([{<span class="py-string">&quot;int&quot;</span> : 1, <span class="py-string">&quot;string&quot;</span> : <span class="py-string">&quot;string&quot;</span>, <span class="py-string">&quot;double&quot;</span> : 1.0, <span class="py-string">&quot;long&quot;</span>: 1L,
<span class="py-more">... </span><span class="py-string">&quot;boolean&quot;</span> : True}])
<span class="py-prompt">&gt;&gt;&gt; </span>srdd = sqlCtx.inferSchema(allTypes).map(<span class="py-keyword">lambda</span> x: (x.int, x.string, x.double, x.long,
<span class="py-more">... </span>x.boolean))
<span class="py-prompt">&gt;&gt;&gt; </span>srdd.collect()[0]
<span class="py-output">(1, u'string', 1.0, 1, True)</span></pre></li>
    </ul></dd>
  </dl>
</td></tr></table>
</div>
<a name="inferSchema"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">inferSchema</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">rdd</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.inferSchema">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Infer and apply a schema to an RDD of <code 
  class="link">dict</code>s.</p>
  <p>We peek at the first row of the RDD to determine the fields names and 
  types, and then use that to extract all the dictionaries. Nested 
  collections are supported, which include array, dict, list, set, and 
  tuple.</p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>srdd = sqlCtx.inferSchema(rdd)
<span class="py-prompt">&gt;&gt;&gt; </span>srdd.collect() == [{<span class="py-string">&quot;field1&quot;</span> : 1, <span class="py-string">&quot;field2&quot;</span> : <span class="py-string">&quot;row1&quot;</span>}, {<span class="py-string">&quot;field1&quot;</span> : 2, <span class="py-string">&quot;field2&quot;</span>: <span class="py-string">&quot;row2&quot;</span>},
<span class="py-more">... </span>                   {<span class="py-string">&quot;field1&quot;</span> : 3, <span class="py-string">&quot;field2&quot;</span>: <span class="py-string">&quot;row3&quot;</span>}]
<span class="py-output">True</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">from</span> array <span class="py-keyword">import</span> array
<span class="py-prompt">&gt;&gt;&gt; </span>srdd = sqlCtx.inferSchema(nestedRdd1)
<span class="py-prompt">&gt;&gt;&gt; </span>srdd.collect() == [{<span class="py-string">&quot;f1&quot;</span> : array(<span class="py-string">'i'</span>, [1, 2]), <span class="py-string">&quot;f2&quot;</span> : {<span class="py-string">&quot;row1&quot;</span> : 1.0}},
<span class="py-more">... </span>                   {<span class="py-string">&quot;f1&quot;</span> : array(<span class="py-string">'i'</span>, [2, 3]), <span class="py-string">&quot;f2&quot;</span> : {<span class="py-string">&quot;row2&quot;</span> : 2.0}}]
<span class="py-output">True</span></pre>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>srdd = sqlCtx.inferSchema(nestedRdd2)
<span class="py-prompt">&gt;&gt;&gt; </span>srdd.collect() == [{<span class="py-string">&quot;f1&quot;</span> : [[1, 2], [2, 3]], <span class="py-string">&quot;f2&quot;</span> : set([1, 2]), <span class="py-string">&quot;f3&quot;</span> : (1, 2)},
<span class="py-more">... </span>                   {<span class="py-string">&quot;f1&quot;</span> : [[2, 3], [3, 4]], <span class="py-string">&quot;f2&quot;</span> : set([2, 3]), <span class="py-string">&quot;f3&quot;</span> : (2, 3)}]
<span class="py-output">True</span></pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="registerRDDAsTable"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">registerRDDAsTable</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">rdd</span>,
        <span class="sig-arg">tableName</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.registerRDDAsTable">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Registers the given RDD as a temporary table in the catalog.</p>
  <p>Temporary tables exist only during the lifetime of this instance of 
  SQLContext.</p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>srdd = sqlCtx.inferSchema(rdd)
<span class="py-prompt">&gt;&gt;&gt; </span>sqlCtx.registerRDDAsTable(srdd, <span class="py-string">&quot;table1&quot;</span>)</pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="parquetFile"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">parquetFile</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">path</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.parquetFile">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Loads a Parquet file, returning the result as a <a 
  href="pyspark.sql.SchemaRDD-class.html" class="link">SchemaRDD</a>.</p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">import</span> tempfile, shutil
<span class="py-prompt">&gt;&gt;&gt; </span>parquetFile = tempfile.mkdtemp()
<span class="py-prompt">&gt;&gt;&gt; </span>shutil.rmtree(parquetFile)
<span class="py-prompt">&gt;&gt;&gt; </span>srdd = sqlCtx.inferSchema(rdd)
<span class="py-prompt">&gt;&gt;&gt; </span>srdd.saveAsParquetFile(parquetFile)
<span class="py-prompt">&gt;&gt;&gt; </span>srdd2 = sqlCtx.parquetFile(parquetFile)
<span class="py-prompt">&gt;&gt;&gt; </span>sorted(srdd.collect()) == sorted(srdd2.collect())
<span class="py-output">True</span></pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="jsonFile"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">jsonFile</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">path</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.jsonFile">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <pre class="literalblock">
Loads a text file storing one JSON object per line,
   returning the result as a L{SchemaRDD}.
   It goes through the entire dataset once to determine the schema.

&gt;&gt;&gt; import tempfile, shutil
&gt;&gt;&gt; jsonFile = tempfile.mkdtemp()
&gt;&gt;&gt; shutil.rmtree(jsonFile)
&gt;&gt;&gt; ofn = open(jsonFile, 'w')
&gt;&gt;&gt; for json in jsonStrings:
...   print&gt;&gt;ofn, json
&gt;&gt;&gt; ofn.close()
&gt;&gt;&gt; srdd = sqlCtx.jsonFile(jsonFile)
&gt;&gt;&gt; sqlCtx.registerRDDAsTable(srdd, &quot;table1&quot;)
&gt;&gt;&gt; srdd2 = sqlCtx.sql(&quot;SELECT field1 AS f1, field2 as f2, field3 as f3 from table1&quot;)
&gt;&gt;&gt; srdd2.collect() == [{&quot;f1&quot;: 1, &quot;f2&quot;: &quot;row1&quot;, &quot;f3&quot;:{&quot;field4&quot;:11}},
...                     {&quot;f1&quot;: 2, &quot;f2&quot;: &quot;row2&quot;, &quot;f3&quot;:{&quot;field4&quot;:22}},
...                     {&quot;f1&quot;: 3, &quot;f2&quot;: &quot;row3&quot;, &quot;f3&quot;:{&quot;field4&quot;:33}}]
True

</pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="jsonRDD"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">jsonRDD</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">rdd</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.jsonRDD">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <pre class="literalblock">
Loads an RDD storing one JSON object per string, returning the result as a L{SchemaRDD}.
   It goes through the entire dataset once to determine the schema.

&gt;&gt;&gt; srdd = sqlCtx.jsonRDD(json)
&gt;&gt;&gt; sqlCtx.registerRDDAsTable(srdd, &quot;table1&quot;)
&gt;&gt;&gt; srdd2 = sqlCtx.sql(&quot;SELECT field1 AS f1, field2 as f2, field3 as f3 from table1&quot;)
&gt;&gt;&gt; srdd2.collect() == [{&quot;f1&quot;: 1, &quot;f2&quot;: &quot;row1&quot;, &quot;f3&quot;:{&quot;field4&quot;:11}},
...                     {&quot;f1&quot;: 2, &quot;f2&quot;: &quot;row2&quot;, &quot;f3&quot;:{&quot;field4&quot;:22}},
...                     {&quot;f1&quot;: 3, &quot;f2&quot;: &quot;row3&quot;, &quot;f3&quot;:{&quot;field4&quot;:33}}]
True

</pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="sql"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">sql</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">sqlQuery</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.sql">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Return a <a href="pyspark.sql.SchemaRDD-class.html" 
  class="link">SchemaRDD</a> representing the result of the given 
  query.</p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>srdd = sqlCtx.inferSchema(rdd)
<span class="py-prompt">&gt;&gt;&gt; </span>sqlCtx.registerRDDAsTable(srdd, <span class="py-string">&quot;table1&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>srdd2 = sqlCtx.sql(<span class="py-string">&quot;SELECT field1 AS f1, field2 as f2 from table1&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>srdd2.collect() == [{<span class="py-string">&quot;f1&quot;</span> : 1, <span class="py-string">&quot;f2&quot;</span> : <span class="py-string">&quot;row1&quot;</span>}, {<span class="py-string">&quot;f1&quot;</span> : 2, <span class="py-string">&quot;f2&quot;</span>: <span class="py-string">&quot;row2&quot;</span>},
<span class="py-more">... </span>                    {<span class="py-string">&quot;f1&quot;</span> : 3, <span class="py-string">&quot;f2&quot;</span>: <span class="py-string">&quot;row3&quot;</span>}]
<span class="py-output">True</span></pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="table"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">table</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">tableName</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.sql-pysrc.html#SQLContext.table">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Returns the specified table as a <a 
  href="pyspark.sql.SchemaRDD-class.html" class="link">SchemaRDD</a>.</p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>srdd = sqlCtx.inferSchema(rdd)
<span class="py-prompt">&gt;&gt;&gt; </span>sqlCtx.registerRDDAsTable(srdd, <span class="py-string">&quot;table1&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>srdd2 = sqlCtx.table(<span class="py-string">&quot;table1&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>sorted(srdd.collect()) == sorted(srdd2.collect())
<span class="py-output">True</span></pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<br />
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="pyspark-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://spark.apache.org">Spark 1.0.1 Python API Docs</a></th>
          </tr></table></th>
  </tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
  <tr>
    <td align="left" class="footer">
    Generated by Epydoc 3.0.1 on Fri Jul  4 18:52:26 2014
    </td>
    <td align="right" class="footer">
      <a target="mainFrame" href="http://epydoc.sourceforge.net"
        >http://epydoc.sourceforge.net</a>
    </td>
  </tr>
</table>

<script type="text/javascript">
  <!--
  // Private objects are initially displayed (because if
  // javascript is turned off then we want them to be
  // visible); but by default, we want to hide them.  So hide
  // them unless we have a cookie that says to show them.
  checkCookie();
  // -->
</script>
</body>
</html>
