Skip to content

Commit 1c5b198

Browse files
mnazbroDavies Liu
authored andcommitted
[SPARK-7899] [PYSPARK] Fix Python 3 pyspark/sql/types module conflict
This PR makes the types module in `pyspark/sql/types` work with pylint static analysis by removing the dynamic naming of the `pyspark/sql/_types` module to `pyspark/sql/types`. Tests are now loaded using `$PYSPARK_DRIVER_PYTHON -m module` rather than `$PYSPARK_DRIVER_PYTHON module.py`. The old method adds the location of `module.py` to `sys.path`, so this change prevents accidental use of relative paths in Python. Author: Michael Nazario <[email protected]> Closes apache#6439 from mnazario/feature/SPARK-7899 and squashes the following commits: 366ef30 [Michael Nazario] Remove hack on random.py bb8b04d [Michael Nazario] Make doctests consistent with other tests 6ee4f75 [Michael Nazario] Change test scripts to use "-m" 673528f [Michael Nazario] Move _types back to types
1 parent 5f48e5c commit 1c5b198

7 files changed

Lines changed: 43 additions & 63 deletions

File tree

bin/pyspark

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -90,11 +90,7 @@ if [[ -n "$SPARK_TESTING" ]]; then
9090
unset YARN_CONF_DIR
9191
unset HADOOP_CONF_DIR
9292
export PYTHONHASHSEED=0
93-
if [[ -n "$PYSPARK_DOC_TEST" ]]; then
94-
exec "$PYSPARK_DRIVER_PYTHON" -m doctest $1
95-
else
96-
exec "$PYSPARK_DRIVER_PYTHON" $1
97-
fi
93+
exec "$PYSPARK_DRIVER_PYTHON" -m $1
9894
exit
9995
fi
10096

python/pyspark/accumulators.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -261,3 +261,7 @@ def _start_update_server():
261261
thread.daemon = True
262262
thread.start()
263263
return server
264+
265+
if __name__ == "__main__":
266+
import doctest
267+
doctest.testmod()

python/pyspark/mllib/__init__.py

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,3 @@
2828

2929
__all__ = ['classification', 'clustering', 'feature', 'fpm', 'linalg', 'random',
3030
'recommendation', 'regression', 'stat', 'tree', 'util']
31-
32-
import sys
33-
from . import rand as random
34-
modname = __name__ + '.random'
35-
random.__name__ = modname
36-
random.RandomRDDs.__module__ = modname
37-
sys.modules[modname] = random
38-
del modname, sys

python/pyspark/sql/__init__.py

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -50,18 +50,6 @@ def deco(f):
5050
return f
5151
return deco
5252

53-
# fix the module name conflict for Python 3+
54-
import sys
55-
from . import _types as types
56-
modname = __name__ + '.types'
57-
types.__name__ = modname
58-
# update the __module__ for all objects, make them picklable
59-
for v in types.__dict__.values():
60-
if hasattr(v, "__module__") and v.__module__.endswith('._types'):
61-
v.__module__ = modname
62-
sys.modules[modname] = types
63-
del modname, sys
64-
6553
from pyspark.sql.types import Row
6654
from pyspark.sql.context import SQLContext, HiveContext
6755
from pyspark.sql.column import Column

python/run-tests

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -57,54 +57,54 @@ function run_test() {
5757

5858
function run_core_tests() {
5959
echo "Run core tests ..."
60-
run_test "pyspark/rdd.py"
61-
run_test "pyspark/context.py"
62-
run_test "pyspark/conf.py"
63-
PYSPARK_DOC_TEST=1 run_test "pyspark/broadcast.py"
64-
PYSPARK_DOC_TEST=1 run_test "pyspark/accumulators.py"
65-
run_test "pyspark/serializers.py"
66-
run_test "pyspark/profiler.py"
67-
run_test "pyspark/shuffle.py"
68-
run_test "pyspark/tests.py"
60+
run_test "pyspark.rdd"
61+
run_test "pyspark.context"
62+
run_test "pyspark.conf"
63+
run_test "pyspark.broadcast"
64+
run_test "pyspark.accumulators"
65+
run_test "pyspark.serializers"
66+
run_test "pyspark.profiler"
67+
run_test "pyspark.shuffle"
68+
run_test "pyspark.tests"
6969
}
7070

7171
function run_sql_tests() {
7272
echo "Run sql tests ..."
73-
run_test "pyspark/sql/_types.py"
74-
run_test "pyspark/sql/context.py"
75-
run_test "pyspark/sql/column.py"
76-
run_test "pyspark/sql/dataframe.py"
77-
run_test "pyspark/sql/group.py"
78-
run_test "pyspark/sql/functions.py"
79-
run_test "pyspark/sql/tests.py"
73+
run_test "pyspark.sql.types"
74+
run_test "pyspark.sql.context"
75+
run_test "pyspark.sql.column"
76+
run_test "pyspark.sql.dataframe"
77+
run_test "pyspark.sql.group"
78+
run_test "pyspark.sql.functions"
79+
run_test "pyspark.sql.tests"
8080
}
8181

8282
function run_mllib_tests() {
8383
echo "Run mllib tests ..."
84-
run_test "pyspark/mllib/classification.py"
85-
run_test "pyspark/mllib/clustering.py"
86-
run_test "pyspark/mllib/evaluation.py"
87-
run_test "pyspark/mllib/feature.py"
88-
run_test "pyspark/mllib/fpm.py"
89-
run_test "pyspark/mllib/linalg.py"
90-
run_test "pyspark/mllib/rand.py"
91-
run_test "pyspark/mllib/recommendation.py"
92-
run_test "pyspark/mllib/regression.py"
93-
run_test "pyspark/mllib/stat/_statistics.py"
94-
run_test "pyspark/mllib/tree.py"
95-
run_test "pyspark/mllib/util.py"
96-
run_test "pyspark/mllib/tests.py"
84+
run_test "pyspark.mllib.classification"
85+
run_test "pyspark.mllib.clustering"
86+
run_test "pyspark.mllib.evaluation"
87+
run_test "pyspark.mllib.feature"
88+
run_test "pyspark.mllib.fpm"
89+
run_test "pyspark.mllib.linalg"
90+
run_test "pyspark.mllib.random"
91+
run_test "pyspark.mllib.recommendation"
92+
run_test "pyspark.mllib.regression"
93+
run_test "pyspark.mllib.stat._statistics"
94+
run_test "pyspark.mllib.tree"
95+
run_test "pyspark.mllib.util"
96+
run_test "pyspark.mllib.tests"
9797
}
9898

9999
function run_ml_tests() {
100100
echo "Run ml tests ..."
101-
run_test "pyspark/ml/feature.py"
102-
run_test "pyspark/ml/classification.py"
103-
run_test "pyspark/ml/recommendation.py"
104-
run_test "pyspark/ml/regression.py"
105-
run_test "pyspark/ml/tuning.py"
106-
run_test "pyspark/ml/tests.py"
107-
run_test "pyspark/ml/evaluation.py"
101+
run_test "pyspark.ml.feature"
102+
run_test "pyspark.ml.classification"
103+
run_test "pyspark.ml.recommendation"
104+
run_test "pyspark.ml.regression"
105+
run_test "pyspark.ml.tuning"
106+
run_test "pyspark.ml.tests"
107+
run_test "pyspark.ml.evaluation"
108108
}
109109

110110
function run_streaming_tests() {
@@ -124,8 +124,8 @@ function run_streaming_tests() {
124124
done
125125

126126
export PYSPARK_SUBMIT_ARGS="--jars ${KAFKA_ASSEMBLY_JAR} pyspark-shell"
127-
run_test "pyspark/streaming/util.py"
128-
run_test "pyspark/streaming/tests.py"
127+
run_test "pyspark.streaming.util"
128+
run_test "pyspark.streaming.tests"
129129
}
130130

131131
echo "Running PySpark tests. Output is in python/$LOG_FILE."

0 commit comments

Comments
 (0)