<?xml version="1.0" encoding="ascii"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
          "DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
<head>
  <title>pyspark.context.SparkContext</title>
  <link rel="stylesheet" href="epydoc.css" type="text/css" />
  <script type="text/javascript" src="epydoc.js"></script>
</head>

<body bgcolor="white" text="black" link="blue" vlink="#204080"
      alink="#204080">
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="pyspark-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://spark.apache.org">Spark 1.0.1 Python API Docs</a></th>
          </tr></table></th>
  </tr>
</table>
<table width="100%" cellpadding="0" cellspacing="0">
  <tr valign="top">
    <td width="100%">
      <span class="breadcrumbs">
        <a href="pyspark-module.html">Package&nbsp;pyspark</a> ::
        <a href="pyspark.context-module.html">Module&nbsp;context</a> ::
        Class&nbsp;SparkContext
      </span>
    </td>
    <td>
      <table cellpadding="0" cellspacing="0">
        <!-- hide/show private -->
        <tr><td align="right"><span class="options"
            >[<a href="frames.html" target="_top">frames</a
            >]&nbsp;|&nbsp;<a href="pyspark.context.SparkContext-class.html"
            target="_top">no&nbsp;frames</a>]</span></td></tr>
      </table>
    </td>
  </tr>
</table>
<!-- ==================== CLASS DESCRIPTION ==================== -->
<h1 class="epydoc">Class SparkContext</h1><p class="nomargin-top"><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext">source&nbsp;code</a></span></p>
<pre class="base-tree">
object --+
         |
        <strong class="uidshort">SparkContext</strong>
</pre>

<hr />
<p>Main entry point for Spark functionality. A SparkContext represents 
  the connection to a Spark cluster, and can be used to create <a 
  href="pyspark.rdd.RDD-class.html" class="link">RDD</a>s and broadcast 
  variables on that cluster.</p>

<!-- ==================== INSTANCE METHODS ==================== -->
<a name="section-InstanceMethods"></a>
<table class="summary" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
  <td align="left" colspan="2" class="table-header">
    <span class="table-header">Instance Methods</span></td>
</tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#__init__" class="summary-sig-name">__init__</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">master</span>=<span class="summary-sig-default">None</span>,
        <span class="summary-sig-arg">appName</span>=<span class="summary-sig-default">None</span>,
        <span class="summary-sig-arg">sparkHome</span>=<span class="summary-sig-default">None</span>,
        <span class="summary-sig-arg">pyFiles</span>=<span class="summary-sig-default">None</span>,
        <span class="summary-sig-arg">environment</span>=<span class="summary-sig-default">None</span>,
        <span class="summary-sig-arg">batchSize</span>=<span class="summary-sig-default">1024</span>,
        <span class="summary-sig-arg">serializer</span>=<span class="summary-sig-default">PickleSerializer()</span>,
        <span class="summary-sig-arg">conf</span>=<span class="summary-sig-default">None</span>,
        <span class="summary-sig-arg">gateway</span>=<span class="summary-sig-default">None</span>)</span><br />
      Create a new SparkContext.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.__init__">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#defaultParallelism" class="summary-sig-name">defaultParallelism</a>(<span class="summary-sig-arg">self</span>)</span><br />
      Default level of parallelism to use when not given by user (e.g.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.defaultParallelism">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#defaultMinPartitions" class="summary-sig-name">defaultMinPartitions</a>(<span class="summary-sig-arg">self</span>)</span><br />
      Default min number of partitions for Hadoop RDDs when not given by 
      user</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.defaultMinPartitions">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a name="__del__"></a><span class="summary-sig-name">__del__</span>(<span class="summary-sig-arg">self</span>)</span></td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.__del__">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a name="stop"></a><span class="summary-sig-name">stop</span>(<span class="summary-sig-arg">self</span>)</span><br />
      Shut down the SparkContext.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.stop">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#parallelize" class="summary-sig-name">parallelize</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">c</span>,
        <span class="summary-sig-arg">numSlices</span>=<span class="summary-sig-default">None</span>)</span><br />
      Distribute a local Python collection to form an RDD.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.parallelize">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#textFile" class="summary-sig-name">textFile</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">name</span>,
        <span class="summary-sig-arg">minPartitions</span>=<span class="summary-sig-default">None</span>)</span><br />
      Read a text file from HDFS, a local file system (available on all 
      nodes), or any Hadoop-supported file system URI, and return it as an 
      RDD of Strings.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.textFile">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#wholeTextFiles" class="summary-sig-name">wholeTextFiles</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">path</span>,
        <span class="summary-sig-arg">minPartitions</span>=<span class="summary-sig-default">None</span>)</span><br />
      Read a directory of text files from HDFS, a local file system 
      (available on all nodes), or any  Hadoop-supported file system URI.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.wholeTextFiles">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#union" class="summary-sig-name">union</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">rdds</span>)</span><br />
      Build the union of a list of RDDs.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.union">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#broadcast" class="summary-sig-name">broadcast</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">value</span>)</span><br />
      Broadcast a read-only variable to the cluster, returning a <a 
      href="pyspark.broadcast.Broadcast-class.html" 
      class="link">Broadcast</a> object for reading it in distributed 
      functions.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.broadcast">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#accumulator" class="summary-sig-name">accumulator</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">value</span>,
        <span class="summary-sig-arg">accum_param</span>=<span class="summary-sig-default">None</span>)</span><br />
      Create an <a href="pyspark.accumulators.Accumulator-class.html" 
      class="link">Accumulator</a> with the given initial value, using a 
      given <a href="pyspark.accumulators.AccumulatorParam-class.html" 
      class="link">AccumulatorParam</a> helper object to define how to add 
      values of the data type if provided.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.accumulator">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#addFile" class="summary-sig-name">addFile</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">path</span>)</span><br />
      Add a file to be downloaded with this Spark job on every node.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.addFile">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a name="clearFiles"></a><span class="summary-sig-name">clearFiles</span>(<span class="summary-sig-arg">self</span>)</span><br />
      Clear the job's list of files added by <a 
      href="pyspark.context.SparkContext-class.html#addFile" 
      class="link">addFile</a> or <a 
      href="pyspark.context.SparkContext-class.html#addPyFile" 
      class="link">addPyFile</a> so that they do not get downloaded to any 
      new nodes.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.clearFiles">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#addPyFile" class="summary-sig-name">addPyFile</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">path</span>)</span><br />
      Add a .py or .zip dependency for all tasks to be executed on this 
      SparkContext in the future.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.addPyFile">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#setCheckpointDir" class="summary-sig-name">setCheckpointDir</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">dirName</span>)</span><br />
      Set the directory under which RDDs are going to be checkpointed.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.setCheckpointDir">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#setJobGroup" class="summary-sig-name">setJobGroup</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">groupId</span>,
        <span class="summary-sig-arg">description</span>,
        <span class="summary-sig-arg">interruptOnCancel</span>=<span class="summary-sig-default">False</span>)</span><br />
      Assigns a group ID to all the jobs started by this thread until the 
      group ID is set to a different value or cleared.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.setJobGroup">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a name="setLocalProperty"></a><span class="summary-sig-name">setLocalProperty</span>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">key</span>,
        <span class="summary-sig-arg">value</span>)</span><br />
      Set a local property that affects jobs submitted from this thread, 
      such as the Spark fair scheduler pool.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.setLocalProperty">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#getLocalProperty" class="summary-sig-name">getLocalProperty</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">key</span>)</span><br />
      Get a local property set in this thread, or null if it is missing.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.getLocalProperty">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a name="sparkUser"></a><span class="summary-sig-name">sparkUser</span>(<span class="summary-sig-arg">self</span>)</span><br />
      Get SPARK_USER for user who is running SparkContext.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.sparkUser">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#cancelJobGroup" class="summary-sig-name">cancelJobGroup</a>(<span class="summary-sig-arg">self</span>,
        <span class="summary-sig-arg">groupId</span>)</span><br />
      Cancel active jobs for the specified group.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.cancelJobGroup">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a name="cancelAllJobs"></a><span class="summary-sig-name">cancelAllJobs</span>(<span class="summary-sig-arg">self</span>)</span><br />
      Cancel all jobs that have been scheduled or are running.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.cancelAllJobs">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
  <tr>
    <td colspan="2" class="summary">
    <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
      <code>__delattr__</code>,
      <code>__format__</code>,
      <code>__getattribute__</code>,
      <code>__hash__</code>,
      <code>__new__</code>,
      <code>__reduce__</code>,
      <code>__reduce_ex__</code>,
      <code>__repr__</code>,
      <code>__setattr__</code>,
      <code>__sizeof__</code>,
      <code>__str__</code>,
      <code>__subclasshook__</code>
      </p>
    </td>
  </tr>
</table>
<!-- ==================== CLASS METHODS ==================== -->
<a name="section-ClassMethods"></a>
<table class="summary" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
  <td align="left" colspan="2" class="table-header">
    <span class="table-header">Class Methods</span></td>
</tr>
<tr>
    <td width="15%" align="right" valign="top" class="summary">
      <span class="summary-type">&nbsp;</span>
    </td><td class="summary">
      <table width="100%" cellpadding="0" cellspacing="0" border="0">
        <tr>
          <td><span class="summary-sig"><a href="pyspark.context.SparkContext-class.html#setSystemProperty" class="summary-sig-name">setSystemProperty</a>(<span class="summary-sig-arg">cls</span>,
        <span class="summary-sig-arg">key</span>,
        <span class="summary-sig-arg">value</span>)</span><br />
      Set a Java system property, such as spark.executor.memory.</td>
          <td align="right" valign="top">
            <span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.setSystemProperty">source&nbsp;code</a></span>
            
          </td>
        </tr>
      </table>
      
    </td>
  </tr>
</table>
<!-- ==================== PROPERTIES ==================== -->
<a name="section-Properties"></a>
<table class="summary" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
  <td align="left" colspan="2" class="table-header">
    <span class="table-header">Properties</span></td>
</tr>
  <tr>
    <td colspan="2" class="summary">
    <p class="indent-wrapped-lines"><b>Inherited from <code>object</code></b>:
      <code>__class__</code>
      </p>
    </td>
  </tr>
</table>
<!-- ==================== METHOD DETAILS ==================== -->
<a name="section-MethodDetails"></a>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr bgcolor="#70b0f0" class="table-header">
  <td align="left" colspan="2" class="table-header">
    <span class="table-header">Method Details</span></td>
</tr>
</table>
<a name="__init__"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">__init__</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">master</span>=<span class="sig-default">None</span>,
        <span class="sig-arg">appName</span>=<span class="sig-default">None</span>,
        <span class="sig-arg">sparkHome</span>=<span class="sig-default">None</span>,
        <span class="sig-arg">pyFiles</span>=<span class="sig-default">None</span>,
        <span class="sig-arg">environment</span>=<span class="sig-default">None</span>,
        <span class="sig-arg">batchSize</span>=<span class="sig-default">1024</span>,
        <span class="sig-arg">serializer</span>=<span class="sig-default">PickleSerializer()</span>,
        <span class="sig-arg">conf</span>=<span class="sig-default">None</span>,
        <span class="sig-arg">gateway</span>=<span class="sig-default">None</span>)</span>
    <br /><em class="fname">(Constructor)</em>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.__init__">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <pre class="literalblock">

Create a new SparkContext. At least the master and app name should be set,
either through the named parameters here or through C{conf}.

@param master: Cluster URL to connect to
       (e.g. mesos://host:port, spark://host:port, local[4]).
@param appName: A name for your job, to display on the cluster web UI.
@param sparkHome: Location where Spark is installed on cluster nodes.
@param pyFiles: Collection of .zip or .py files to send to the cluster
       and add to PYTHONPATH.  These can be paths on the local file
       system or HDFS, HTTP, HTTPS, or FTP URLs.
@param environment: A dictionary of environment variables to set on
       worker nodes.
@param batchSize: The number of Python objects represented as a single
       Java object.  Set 1 to disable batching or -1 to use an
       unlimited batch size.
@param serializer: The serializer for RDDs.
@param conf: A L{SparkConf} object setting Spark properties.
@param gateway: Use an existing gateway and JVM, otherwise a new JVM
       will be instatiated.


&gt;&gt;&gt; from pyspark.context import SparkContext
&gt;&gt;&gt; sc = SparkContext('local', 'test')

&gt;&gt;&gt; sc2 = SparkContext('local', 'test2') # doctest: +IGNORE_EXCEPTION_DETAIL
Traceback (most recent call last):
    ...
ValueError:...

</pre>
  <dl class="fields">
    <dt>Overrides:
        object.__init__
    </dt>
  </dl>
</td></tr></table>
</div>
<a name="setSystemProperty"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">setSystemProperty</span>(<span class="sig-arg">cls</span>,
        <span class="sig-arg">key</span>,
        <span class="sig-arg">value</span>)</span>
    <br /><em class="fname">Class Method</em>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.setSystemProperty">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Set a Java system property, such as spark.executor.memory. This must 
  must be invoked before instantiating SparkContext.</p>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="defaultParallelism"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">defaultParallelism</span>(<span class="sig-arg">self</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.defaultParallelism">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Default level of parallelism to use when not given by user (e.g. for 
  reduce tasks)</p>
  <dl class="fields">
    <dt>Decorators:</dt>
    <dd><ul class="nomargin-top">
        <li><code>@property</code></li>
    </ul></dd>
  </dl>
</td></tr></table>
</div>
<a name="defaultMinPartitions"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">defaultMinPartitions</span>(<span class="sig-arg">self</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.defaultMinPartitions">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Default min number of partitions for Hadoop RDDs when not given by 
  user</p>
  <dl class="fields">
    <dt>Decorators:</dt>
    <dd><ul class="nomargin-top">
        <li><code>@property</code></li>
    </ul></dd>
  </dl>
</td></tr></table>
</div>
<a name="parallelize"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">parallelize</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">c</span>,
        <span class="sig-arg">numSlices</span>=<span class="sig-default">None</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.parallelize">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Distribute a local Python collection to form an RDD.</p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>sc.parallelize(range(5), 5).glom().collect()
<span class="py-output">[[0], [1], [2], [3], [4]]</span></pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="textFile"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">textFile</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">name</span>,
        <span class="sig-arg">minPartitions</span>=<span class="sig-default">None</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.textFile">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Read a text file from HDFS, a local file system (available on all 
  nodes), or any Hadoop-supported file system URI, and return it as an RDD 
  of Strings.</p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>path = os.path.join(tempdir, <span class="py-string">&quot;sample-text.txt&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>with open(path, <span class="py-string">&quot;w&quot;</span>) <span class="py-keyword">as</span> testFile:
<span class="py-more">... </span>   testFile.write(<span class="py-string">&quot;Hello world!&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>textFile = sc.textFile(path)
<span class="py-prompt">&gt;&gt;&gt; </span>textFile.collect()
<span class="py-output">[u'Hello world!']</span></pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="wholeTextFiles"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">wholeTextFiles</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">path</span>,
        <span class="sig-arg">minPartitions</span>=<span class="sig-default">None</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.wholeTextFiles">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Read a directory of text files from HDFS, a local file system 
  (available on all nodes), or any  Hadoop-supported file system URI. Each 
  file is read as a single record and returned in a key-value pair, where 
  the key is the path of each file, the value is the content of each 
  file.</p>
  <p>For example, if you have the following files:</p>
<pre class="literalblock">
 hdfs://a-hdfs-path/part-00000
 hdfs://a-hdfs-path/part-00001
 ...
 hdfs://a-hdfs-path/part-nnnnn
</pre>
  <p>Do <code>rdd = 
  sparkContext.wholeTextFiles(&quot;hdfs://a-hdfs-path&quot;)</code>, then 
  <code>rdd</code> contains:</p>
<pre class="literalblock">
 (a-hdfs-path/part-00000, its content)
 (a-hdfs-path/part-00001, its content)
 ...
 (a-hdfs-path/part-nnnnn, its content)
</pre>
  <p>NOTE: Small files are preferred, as each file will be loaded fully in 
  memory.</p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>dirPath = os.path.join(tempdir, <span class="py-string">&quot;files&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>os.mkdir(dirPath)
<span class="py-prompt">&gt;&gt;&gt; </span>with open(os.path.join(dirPath, <span class="py-string">&quot;1.txt&quot;</span>), <span class="py-string">&quot;w&quot;</span>) <span class="py-keyword">as</span> file1:
<span class="py-more">... </span>   file1.write(<span class="py-string">&quot;1&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>with open(os.path.join(dirPath, <span class="py-string">&quot;2.txt&quot;</span>), <span class="py-string">&quot;w&quot;</span>) <span class="py-keyword">as</span> file2:
<span class="py-more">... </span>   file2.write(<span class="py-string">&quot;2&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>textFiles = sc.wholeTextFiles(dirPath)
<span class="py-prompt">&gt;&gt;&gt; </span>sorted(textFiles.collect())
<span class="py-output">[(u'.../1.txt', u'1'), (u'.../2.txt', u'2')]</span></pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="union"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">union</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">rdds</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.union">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Build the union of a list of RDDs.</p>
  <p>This supports unions() of RDDs with different serialized formats, 
  although this forces them to be reserialized using the default 
  serializer:</p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span>path = os.path.join(tempdir, <span class="py-string">&quot;union-text.txt&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>with open(path, <span class="py-string">&quot;w&quot;</span>) <span class="py-keyword">as</span> testFile:
<span class="py-more">... </span>   testFile.write(<span class="py-string">&quot;Hello&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>textFile = sc.textFile(path)
<span class="py-prompt">&gt;&gt;&gt; </span>textFile.collect()
<span class="py-output">[u'Hello']</span>
<span class="py-output"></span><span class="py-prompt">&gt;&gt;&gt; </span>parallelized = sc.parallelize([<span class="py-string">&quot;World!&quot;</span>])
<span class="py-prompt">&gt;&gt;&gt; </span>sorted(sc.union([textFile, parallelized]).collect())
<span class="py-output">[u'Hello', 'World!']</span></pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="broadcast"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">broadcast</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">value</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.broadcast">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Broadcast a read-only variable to the cluster, returning a <a 
  href="pyspark.broadcast.Broadcast-class.html" class="link">Broadcast</a> 
  object for reading it in distributed functions. The variable will be sent
  to each cluster only once.</p>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="accumulator"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">accumulator</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">value</span>,
        <span class="sig-arg">accum_param</span>=<span class="sig-default">None</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.accumulator">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Create an <a href="pyspark.accumulators.Accumulator-class.html" 
  class="link">Accumulator</a> with the given initial value, using a given 
  <a href="pyspark.accumulators.AccumulatorParam-class.html" 
  class="link">AccumulatorParam</a> helper object to define how to add 
  values of the data type if provided. Default AccumulatorParams are used 
  for integers and floating-point numbers if you do not provide one. For 
  other types, a custom AccumulatorParam can be used.</p>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="addFile"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">addFile</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">path</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.addFile">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Add a file to be downloaded with this Spark job on every node. The 
  <code>path</code> passed can be either a local file, a file in HDFS (or 
  other Hadoop-supported filesystems), or an HTTP, HTTPS or FTP URI.</p>
  <p>To access the file in Spark jobs, use <a 
  href="pyspark.files.SparkFiles-class.html#get" 
  class="link">SparkFiles.get(path)</a> to find its download location.</p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">from</span> pyspark <span class="py-keyword">import</span> SparkFiles
<span class="py-prompt">&gt;&gt;&gt; </span>path = os.path.join(tempdir, <span class="py-string">&quot;test.txt&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>with open(path, <span class="py-string">&quot;w&quot;</span>) <span class="py-keyword">as</span> testFile:
<span class="py-more">... </span>   testFile.write(<span class="py-string">&quot;100&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>sc.addFile(path)
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">def</span> <span class="py-defname">func</span>(iterator):
<span class="py-more">... </span>   with open(SparkFiles.get(<span class="py-string">&quot;test.txt&quot;</span>)) <span class="py-keyword">as</span> testFile:
<span class="py-more">... </span>       fileVal = int(testFile.readline())
<span class="py-more">... </span>       return [x * 100 <span class="py-keyword">for</span> x <span class="py-keyword">in</span> iterator]
<span class="py-prompt">&gt;&gt;&gt; </span>sc.parallelize([1, 2, 3, 4]).mapPartitions(func).collect()
<span class="py-output">[100, 200, 300, 400]</span></pre>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="addPyFile"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">addPyFile</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">path</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.addPyFile">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Add a .py or .zip dependency for all tasks to be executed on this 
  SparkContext in the future.  The <code>path</code> passed can be either a
  local file, a file in HDFS (or other Hadoop-supported filesystems), or an
  HTTP, HTTPS or FTP URI.</p>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="setCheckpointDir"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">setCheckpointDir</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">dirName</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.setCheckpointDir">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Set the directory under which RDDs are going to be checkpointed. The 
  directory must be a HDFS path if running on a cluster.</p>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="setJobGroup"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">setJobGroup</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">groupId</span>,
        <span class="sig-arg">description</span>,
        <span class="sig-arg">interruptOnCancel</span>=<span class="sig-default">False</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.setJobGroup">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Assigns a group ID to all the jobs started by this thread until the 
  group ID is set to a different value or cleared.</p>
  <p>Often, a unit of execution in an application consists of multiple 
  Spark actions or jobs. Application programmers can use this method to 
  group all those jobs together and give a group description. Once set, the
  Spark web UI will associate such jobs with this group.</p>
  <p>The application can use <a 
  href="pyspark.context.SparkContext-class.html#cancelJobGroup" 
  class="link">SparkContext.cancelJobGroup</a> to cancel all running jobs 
  in this group.</p>
<pre class="py-doctest">
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">import</span> thread, threading
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">from</span> time <span class="py-keyword">import</span> sleep
<span class="py-prompt">&gt;&gt;&gt; </span>result = <span class="py-string">&quot;Not Set&quot;</span>
<span class="py-prompt">&gt;&gt;&gt; </span>lock = threading.Lock()
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">def</span> <span class="py-defname">map_func</span>(x):
<span class="py-more">... </span>    sleep(100)
<span class="py-more">... </span>    raise Exception(<span class="py-string">&quot;Task should have been cancelled&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">def</span> <span class="py-defname">start_job</span>(x):
<span class="py-more">... </span>    <span class="py-keyword">global</span> result
<span class="py-more">... </span>    try:
<span class="py-more">... </span>        sc.setJobGroup(<span class="py-string">&quot;job_to_cancel&quot;</span>, <span class="py-string">&quot;some description&quot;</span>)
<span class="py-more">... </span>        result = sc.parallelize(range(x)).map(map_func).collect()
<span class="py-more">... </span>    <span class="py-keyword">except</span> Exception <span class="py-keyword">as</span> e:
<span class="py-more">... </span>        result = <span class="py-string">&quot;Cancelled&quot;</span>
<span class="py-more">... </span>    lock.release()
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">def</span> <span class="py-defname">stop_job</span>():
<span class="py-more">... </span>    sleep(5)
<span class="py-more">... </span>    sc.cancelJobGroup(<span class="py-string">&quot;job_to_cancel&quot;</span>)
<span class="py-prompt">&gt;&gt;&gt; </span>supress = lock.acquire()
<span class="py-prompt">&gt;&gt;&gt; </span>supress = thread.start_new_thread(start_job, (10,))
<span class="py-prompt">&gt;&gt;&gt; </span>supress = thread.start_new_thread(stop_job, tuple())
<span class="py-prompt">&gt;&gt;&gt; </span>supress = lock.acquire()
<span class="py-prompt">&gt;&gt;&gt; </span><span class="py-keyword">print</span> result
<span class="py-output">Cancelled</span></pre>
  <p>If interruptOnCancel is set to true for the job group, then job 
  cancellation will result in Thread.interrupt() being called on the job's 
  executor threads. This is useful to help ensure that the tasks are 
  actually stopped in a timely manner, but is off by default due to 
  HDFS-1208, where HDFS may respond to Thread.interrupt() by marking nodes 
  as dead.</p>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="getLocalProperty"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">getLocalProperty</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">key</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.getLocalProperty">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Get a local property set in this thread, or null if it is missing. See
  <a href="pyspark.context.SparkContext-class.html#setLocalProperty" 
  class="link">setLocalProperty</a></p>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<a name="cancelJobGroup"></a>
<div>
<table class="details" border="1" cellpadding="3"
       cellspacing="0" width="100%" bgcolor="white">
<tr><td>
  <table width="100%" cellpadding="0" cellspacing="0" border="0">
  <tr valign="top"><td>
  <h3 class="epydoc"><span class="sig"><span class="sig-name">cancelJobGroup</span>(<span class="sig-arg">self</span>,
        <span class="sig-arg">groupId</span>)</span>
  </h3>
  </td><td align="right" valign="top"
    ><span class="codelink"><a href="pyspark.context-pysrc.html#SparkContext.cancelJobGroup">source&nbsp;code</a></span>&nbsp;
    </td>
  </tr></table>
  
  <p>Cancel active jobs for the specified group. See <a 
  href="pyspark.context.SparkContext-class.html#setJobGroup" 
  class="link">SparkContext.setJobGroup</a> for more information.</p>
  <dl class="fields">
  </dl>
</td></tr></table>
</div>
<br />
<!-- ==================== NAVIGATION BAR ==================== -->
<table class="navbar" border="0" width="100%" cellpadding="0"
       bgcolor="#a0c0ff" cellspacing="0">
  <tr valign="middle">
  <!-- Home link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="pyspark-module.html">Home</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Tree link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="module-tree.html">Trees</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Index link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="identifier-index.html">Indices</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Help link -->
      <th>&nbsp;&nbsp;&nbsp;<a
        href="help.html">Help</a>&nbsp;&nbsp;&nbsp;</th>

  <!-- Project homepage -->
      <th class="navbar" align="right" width="100%">
        <table border="0" cellpadding="0" cellspacing="0">
          <tr><th class="navbar" align="center"
            ><a class="navbar" target="_top" href="http://spark.apache.org">Spark 1.0.1 Python API Docs</a></th>
          </tr></table></th>
  </tr>
</table>
<table border="0" cellpadding="0" cellspacing="0" width="100%%">
  <tr>
    <td align="left" class="footer">
    Generated by Epydoc 3.0.1 on Fri Jul  4 18:52:26 2014
    </td>
    <td align="right" class="footer">
      <a target="mainFrame" href="http://epydoc.sourceforge.net"
        >http://epydoc.sourceforge.net</a>
    </td>
  </tr>
</table>

<script type="text/javascript">
  <!--
  // Private objects are initially displayed (because if
  // javascript is turned off then we want them to be
  // visible); but by default, we want to hide them.  So hide
  // them unless we have a cookie that says to show them.
  checkCookie();
  // -->
</script>
</body>
</html>
