Post

Apache Spark 01 - Local Spark Installation

01 - Local Spark Installation

On Centos VM

Create working directory

1
2
3
4
mkdir spark_local
cd spark_local

mkdir datasets

Install Spark

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
curl -o spark-3.4.1-bin-hadoop3.tgz https://archive.apache.org/dist/spark/spark-3.4.1/spark-3.4.1-bin-hadoop3.tgz

tar xzf spark-3.4.1-bin-hadoop3.tgz
mv spark-3.4.1-bin-hadoop3 /opt/spark
rm -rf spark-3.4.1-bin-hadoop3.tgz


vim ~/.bashrc


# Spark Home
export SPARK_HOME=/opt/spark
export PATH=$PATH:$SPARK_HOME/bin
export PATH=$PATH:$SPARK_HOME/sbin


source ~/.bashrc

Install Java

1
sudo yum -y install java-11-openjdk-devel.x86_64

Create virtual environment

1
2
conda create --name sparkenv python=3.8
conda activate sparkenv

Create requirements.txt

1
2
3
jupyterlab
findspark
pandas>=1.0.5

Install python packages

1
python -m pip install -r requirements.txt

Start Jupyter lab

1
jupyter lab --ip 0.0.0.0 --port 8888
This post is licensed under CC BY 4.0 by the author.