Hadoop Install Ubuntu
Ubuntu 20.4 üzerinde Hadoop 3.3.6 sürümü
Örnek: docker-compose.yml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
version: "3"
services:
hadoop:
hostname: hdserver
container_name: hadoop
image: emrekarakas/hadoop:latest
ports:
- "9000:9000" # HDFS
- "9870:9870" # NameNode
- "8088:8088" # ResourceManager
- "9864:9864"
- "9866:9866"
volumes:
- ./data:/home/data
stdin_open: true
tty: true
Install OpenJDK on Ubuntu
ubuntu 20.04
sudo apt update
sudo apt install openjdk-8-jdk -y
java -version; javac -version
Install OpenSSH on Ubuntu
sudo apt install openssh-server openssh-client -y
Create Hadoop User
sudo passwd hduser
su - hduser
Enable Passwordless SSH for Hadoop User
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys
sudo vim /etc/ssh/sshd_config
PORT=22 açılacak
sudo service ssh restart
ssh i test etmek için
ssh localhost
Download and Install Hadoop on Ubuntu
wget https://dlcdn.apache.org/hadoop/common/hadoop-3.3.6/hadoop-3.3.6.tar.gz
tar xzf hadoop-3.3.6.tar.gz
Configure Hadoop Environment Variables (bashrc)
sudo vim ~/.bashrc
1
2
3
4
5
6
7
8
9
10
#Hadoop Related Options
export HADOOP_HOME=/opt/hadoop
export HADOOP_INSTALL=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export PATH=$PATH:$HADOOP_HOME/sbin:$HADOOP_HOME/bin
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib/native"
1
2
3
4
5
6
7
8
9
10
alias start-dfs='${HADOOP_HOME}/sbin/start-dfs.sh'
alias start-yarn='${HADOOP_HOME}/sbin/start-yarn.sh'
alias start-all='${HADOOP_HOME}/sbin/start-all.sh'
alias stop-dfs='${HADOOP_HOME}/sbin/stop-dfs.sh'
alias stop-yarn='${HADOOP_HOME}/sbin/stop-yarn.sh'
alias stop-all='${HADOOP_HOME}/sbin/stop-all.sh'
alias hdfs='${HADOOP_HOME}/bin/hdfs'
alias yarn='${HADOOP_HOME}/bin/yarn'
source ~/.bashrc
Edit hadoop-env.sh File
sudo vim $HADOOP_HOME/etc/hadoop/hadoop-env.sh
export JAVA_HOME=/usr/lib/jvm/java-8-openjdk-amd64
which javac
readlink -f /usr/bin/javac
1
2
3
export JAVA_HOME=$(readlink -f /usr/bin/java | sed "s:bin/java::")
export HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$HADOOP_HOME/lib/native"
export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
Edit core-site.xml File
sudo vim $HADOOP_HOME/etc/hadoop/core-site.xml
1
2
3
4
5
6
7
8
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/tmpdata</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://127.0.0.1:9000</value>
</property>
Edit hdfs-site.xml File
sudo vim $HADOOP_HOME/etc/hadoop/hdfs-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/hadoop/data/hdfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hadoop/data/hdfs/datanode</value>
</property>
Edit mapred-site.xml File
sudo vim $HADOOP_HOME/etc/hadoop/mapred-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop/</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop/</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/opt/hadoop/</value>
</property>
Edit yarn-site.xml File
sudo vim $HADOOP_HOME/etc/hadoop/yarn-site.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>127.0.0.1</value>
</property>
<property>
<name>yarn.acl.enable</name>
<value>0</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name><value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PERPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
Format HDFS NameNode
hdfs namenode -format
Start Hadoop Cluster
start-dfs
start-yarn
jps
Access Hadoop UI from Browser
http://localhost:9000
http://localhost:9870 # namenode
http://localhost:9864 # datanode
http://localhost:8088 # YARN resourcemanager
hdfs dfs -ls /