Apache Spark 01 - Local Spark Installation
01 - Local Spark Installation
On Centos VM
Create working directory
1
2
3
4
mkdir spark_local
cd spark_local
mkdir datasets
Install Spark
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
curl -o spark-3.4.1-bin-hadoop3.tgz https://archive.apache.org/dist/spark/spark-3.4.1/spark-3.4.1-bin-hadoop3.tgz
tar xzf spark-3.4.1-bin-hadoop3.tgz
mv spark-3.4.1-bin-hadoop3 /opt/spark
rm -rf spark-3.4.1-bin-hadoop3.tgz
vim ~/.bashrc
# Spark Home
export SPARK_HOME=/opt/spark
export PATH=$PATH:$SPARK_HOME/bin
export PATH=$PATH:$SPARK_HOME/sbin
source ~/.bashrc
Install Java
1
sudo yum -y install java-11-openjdk-devel.x86_64
Create virtual environment
1
2
conda create --name sparkenv python=3.8
conda activate sparkenv
Create requirements.txt
1
2
3
jupyterlab
findspark
pandas>=1.0.5
Install python packages
1
python -m pip install -r requirements.txt
Start Jupyter lab
1
jupyter lab --ip 0.0.0.0 --port 8888
This post is licensed under CC BY 4.0 by the author.