Package pyspark
[frames] | no frames]

Source Code for Package pyspark

 1  # 
 2  # Licensed to the Apache Software Foundation (ASF) under one or more 
 3  # contributor license agreements.  See the NOTICE file distributed with 
 4  # this work for additional information regarding copyright ownership. 
 5  # The ASF licenses this file to You under the Apache License, Version 2.0 
 6  # (the "License"); you may not use this file except in compliance with 
 7  # the License.  You may obtain a copy of the License at 
 8  # 
 9  #    http://www.apache.org/licenses/LICENSE-2.0 
10  # 
11  # Unless required by applicable law or agreed to in writing, software 
12  # distributed under the License is distributed on an "AS IS" BASIS, 
13  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
14  # See the License for the specific language governing permissions and 
15  # limitations under the License. 
16  # 
17   
18  """ 
19  PySpark is the Python API for Spark. 
20   
21  Public classes: 
22   
23      - L{SparkContext<pyspark.context.SparkContext>} 
24          Main entry point for Spark functionality. 
25      - L{RDD<pyspark.rdd.RDD>} 
26          A Resilient Distributed Dataset (RDD), the basic abstraction in Spark. 
27      - L{Broadcast<pyspark.broadcast.Broadcast>} 
28          A broadcast variable that gets reused across tasks. 
29      - L{Accumulator<pyspark.accumulators.Accumulator>} 
30          An "add-only" shared variable that tasks can only add values to. 
31      - L{SparkFiles<pyspark.files.SparkFiles>} 
32          Access files shipped with jobs. 
33      - L{StorageLevel<pyspark.storagelevel.StorageLevel>} 
34          Finer-grained cache persistence levels. 
35  """ 
36  import sys 
37  import os 
38  sys.path.insert(0, os.path.join(os.environ["SPARK_HOME"], "python/lib/py4j0.7.egg")) 
39   
40   
41  from pyspark.context import SparkContext 
42  from pyspark.rdd import RDD 
43  from pyspark.files import SparkFiles 
44  from pyspark.storagelevel import StorageLevel 
45   
46   
47  __all__ = ["SparkContext", "RDD", "SparkFiles", "StorageLevel"] 
48