Total Pageviews

Install Apache hadoop V1 Pseudo mode on CentOS6.5

Purpose:

We have CentOS 6.5 on which we have to install Hadoop 1.2.1 with Java JDK-8.




High Level Of Steps:
1. Prerequisites:
  • Java installation jdk7 and above (jdk8 is the latest version available at present)
  • Hadoop installation TAR file.
  • Create Hadoop components specific directories
  • Disable Firewalls
  • Enable static IP
  • Disable SUSE-Linux
  • FQDN (Fully qualified node name)
  • Set Hadoop and JAVA environment
2. Begin Hadoop installation:
  • Setup and configuration of Hadoop basic configuration files.
  • Start the Hadoop on single node with all daemons on single node.
  • Start the Cluster

Detailed Steps:


Download- 
Java Installation –


Hadoop installation –



Prerequisites:


Add hadoop group 
[root@hadoopdn Shared_Training]# groupadd hadoop

Add hadoop user 
[root@hadoopdn Shared_Training]# useradd hadoop -g hadoop -G hadoop


Make Root direcroty for JDK and Hadoop installation 
[root@hadoopdn Shared_Training]# mkdir -p /opt/jdk8
[root@hadoopdn Shared_Training]# chown -R hadoop:hadoop /opt/jdk8

[root@hadoopdn Shared_Training]# mkdir -p /opt/hadoop
[root@hadoopdn Shared_Training]# chown -R hadoop:hadoop /opt/hadoop

[root@hadoopdn Shared_Training]# chown -R hadoop:hadoop /opt/

Nevigate to the software repository Directory
[root@hadoopdn Shared_Training]# cd /mnt/hgfs/Shared_Training/

List the software files - 

[root@hadoopnn ~]# cd /mnt/hgfs/Shared_Training/
[root@hadoopnn Shared_Training]# ls -lrt
total 241636
-rwxrwxrwx. 1 root root  63851630 Feb  3 18:41 hadoop-1.2.1.tar.gz
-rwxrwxrwx. 1 root root 183246769 Feb  3 19:54 jdk-8u121-linux-x64.tar.gz
-rwxrwxrwx. 1 root root    335532 Feb  5 13:11 openssh-server-5.3p1-117.el6.x86_64.rpm
[root@hadoopnn Shared_Training]#

Copy unzip the Jdk Jar file to /opt/jdk folder 
[root@hadoopsnn ~]# cd /mnt/hgfs/Shared_Training/
[root@hadoopsnn Shared_Training]# ls -lrt
total 241636
-rwxrwxrwx. 1 root root  63851630 Feb  3 18:41 hadoop-1.2.1.tar.gz
-rwxrwxrwx. 1 root root 183246769 Feb  3 19:54 jdk-8u121-linux-x64.tar.gz
-rwxrwxrwx. 1 root root    335532 Feb  5 13:11 openssh-server-5.3p1-117.el6.x86_64.rpm
[root@hadoopsnn Shared_Training]#  cp jdk-8u121-linux-x64.tar.gz /opt/jdk8/

[root@hadoopsnn Shared_Training]#  cd /opt/jdk8/
[root@hadoopsnn jdk8]# ls -lrt
total 178956
-rwxr-xr-x. 1 root root 183246769 Feb  9 21:39 jdk-8u121-linux-x64.tar.gz
[root@hadoopsnn jdk8]# tar -zxf jdk-8u121-linux-x64.tar.gz
  
[root@hadoopnn jdk8]# ln -s jdk1.8.0_121 jdk1811
[root@hadoopnn jdk8]# ls -lrt
total 178960
drwxr-xr-x. 8 uucp  143      4096 Dec 13 06:20 jdk1.8.0_121
-rwxr-xr-x. 1 root root 183246769 Feb  9 21:39 jdk-8u121-linux-x64.tar.gz
lrwxrwxrwx. 1 root root        12 Feb  9 21:41 jdk1811 -> jdk1.8.0_121


[root@hadoopnn jdk8]# cd jdk1811/
[root@hadoopnn jdk1811]# pwd
/opt/jdk8/jdk1811
[root@hadoopnn jdk1811]# ls -lrt
total 25924
-rwxr-xr-x. 1 uucp 143   110114 Dec 13 01:40 THIRDPARTYLICENSEREADME-JAVAFX.txt
-rwxr-xr-x. 1 uucp 143  5094116 Dec 13 01:40 javafx-src.zip
-r--r--r--. 1 uucp 143   177094 Dec 13 06:16 THIRDPARTYLICENSEREADME.txt
-r--r--r--. 1 uucp 143      159 Dec 13 06:16 README.html
-r--r--r--. 1 uucp 143       40 Dec 13 06:16 LICENSE
drwxr-xr-x. 3 uucp 143     4096 Dec 13 06:16 include
drwxr-xr-x. 4 uucp 143     4096 Dec 13 06:16 db
-r--r--r--. 1 uucp 143     3244 Dec 13 06:16 COPYRIGHT
-rw-r--r--. 1 uucp 143 21116095 Dec 13 06:16 src.zip
-rw-r--r--. 1 uucp 143      526 Dec 13 06:16 release
drwxr-xr-x. 4 uucp 143     4096 Dec 13 06:16 man
drwxr-xr-x. 5 uucp 143     4096 Dec 13 06:16 jre
drwxr-xr-x. 5 uucp 143     4096 Dec 13 06:17 lib
drwxr-xr-x. 2 uucp 143     4096 Dec 13 06:17 bin


[root@hadoopnn jdk1811]# vi ~/.bashrc
[root@hadoopnn jdk1811]# cat ~/.bashrc
# .bashrc

# User specific aliases and functions

alias rm='rm -i'
alias cp='cp -i'
alias mv='mv -i'
export JAVA_HOME=/opt/jdk8/jdk1811
export PATH=$JAVA_HOME/bin:$PATH
# Source global definitions
if [ -f /etc/bashrc ]; then
        . /etc/bashrc
fi


<Save and Close the File> 


[root@hadoopnn jdk1811]# . ~/.bashrc


[root@hadoopnn jdk1811]# which java
/opt/jdk8/jdk1811/bin/java

[root@hadoopnn jdk1811]# java -version
java version "1.8.0_121"
Java(TM) SE Runtime Environment (build 1.8.0_121-b13)
Java HotSpot(TM) 64-Bit Server VM (build 25.121-b13, mixed mode)

[root@hadoopdn Shared_Training]#  cp hadoop-1.2.1.tar.gz /opt/hadoop/
[root@hadoopdn Shared_Training]# cd /opt/hadoop/
[root@hadoopdn hadoop]# ls -lrt
total 62356
-rwxr-xr-x. 1 root root 63851630 Feb  9 21:46 hadoop-1.2.1.tar.gz
[root@hadoopdn hadoop]#
[root@hadoopnn hadoop]# chown hadoop:hadoop hadoop-1.2.1.tar.gz
[root@hadoopnn hadoop]# ls -lrt
total 62356
-rwxr-xr-x. 1 hadoop hadoop 63851630 Feb  9 21:45 hadoop-1.2.1.tar.gz
[root@hadoopnn hadoop]#


Now login with hadoop user - 
su - hadoop
<Provide the passowrd > 

[hadoop@hadoopsnn ~]$ cat  ~/.bash_profile
# .bash_profile

# Get the aliases and functions
if [ -f ~/.bashrc ]; then
        . ~/.bashrc
fi
export JAVA_HOME=/opt/jdk8/jdk1811
export PATH=$JAVA_HOME/bin:$PATH

# User specific environment and startup programs

PATH=$PATH:$HOME/bin

export PATH

<Save and Close the File> 

[hadoop@hadoopnn hadoop]$ cd /opt/hadoop/
[hadoop@hadoopnn hadoop]$ ls -lrt
total 62356
-rwxr-xr-x. 1 hadoop hadoop 63851630 Feb  9 21:45 hadoop-1.2.1.tar.gz
[hadoop@hadoopnn hadoop]$
[hadoop@hadoopdn hadoop]$ ln -s hadoop-1.2.1  hadoop1
[hadoop@hadoopdn hadoop]$
[hadoop@hadoopdn hadoop]$ ls -lrt
total 62360
drwxr-xr-x. 15 hadoop hadoop     4096 Jul 23  2013 hadoop-1.2.1
-rwxr-xr-x.  1 hadoop hadoop 63851630 Feb  9 21:46 hadoop-1.2.1.tar.gz
lrwxrwxrwx.  1 hadoop hadoop       12 Feb  9 21:55 hadoop1 -> hadoop-1.2.1
[hadoop@hadoopdn hadoop]$ cd hadoop1
[hadoop@hadoopdn hadoop1]$ pwd
/opt/hadoop/hadoop1


Setupt the HADOOP_CONF_PATH  in bash_profile

[hadoop@hadoopdn hadoop1]$ cat ~/.bash_profile
# .bash_profile

# Get the aliases and functions
if [ -f ~/.bashrc ]; then
        . ~/.bashrc
fi
export JAVA_HOME=/opt/jdk8/jdk1811
export HADOOP_CONF_DIR=/opt/hadoop/hadoop1/conf
export PATH=$JAVA_HOME/bin:${HADOOP_CONF_DIR}/../bin:$PATH
# User specific environment and startup programs

PATH=$PATH:$HOME/bin

export PATH
<Save and Close the File> 

Create HADOOP LOG and DATA directory 

[hadoop@hadoopdn hadoop1]$ mkdir -p /opt/logs_hadoop
[hadoop@hadoopdn hadoop1]$
[hadoop@hadoopdn hadoop1]$ mkdir -p /opt/data_hadoop
[hadoop@hadoopdn hadoop1]$



[hadoop@hadoopdn hadoop1]$ . ~/.bash_profile
[hadoop@hadoopdn hadoop1]$


[hadoop@hadoopdn hadoop1]$ vi /opt/hadoop/hadoop1/conf/hadoop-env.sh

# The java implementation to use.  Required.
export JAVA_HOME=/opt/jdk8/jdk1811

# Where log files are stored.  $HADOOP_HOME/logs by default.
export HADOOP_LOG_DIR=/opt/logs_hadoop

<Save and Close the File> 




[hadoop@hadoopdn hadoop1]$ java -version
java version "1.8.0_121"
Java(TM) SE Runtime Environment (build 1.8.0_121-b13)
Java HotSpot(TM) 64-Bit Server VM (build 25.121-b13, mixed mode)
[hadoop@hadoopdn hadoop1]$
[hadoop@hadoopdn hadoop1]$
[hadoop@hadoopdn hadoop1]$ hadoop version
Hadoop 1.2.1
Subversion https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1.2 -r 1503152
Compiled by mattf on Mon Jul 22 15:23:09 PDT 2013
From source with checksum 6923c86528809c4e7e6f493b6b413a9a
This command was run using /opt/hadoop/hadoop-1.2.1/hadoop-core-1.2.1.jar
[hadoop@hadoopdn hadoop1]$
[hadoop@hadoopdn hadoop1]$
[hadoop@hadoopdn hadoop1]$ hadoop fs -ls /tmp
Found 15 items
-r--r--r--   1 root   root           11 2017-02-09 21:05 /tmp/.X0-lock
drwx------   - root   root         4096 2017-02-09 21:07 /tmp/keyring-pCmKEX
drwx------   - root   root         4096 2017-02-09 21:07 /tmp/.esd-0
drwx------   - root   root         4096 2017-02-09 21:07 /tmp/pulse-QM6JPWEpMsIo
drwxr-xr-x   - root   root         4096 2017-02-09 01:42 /tmp/vmware-config0
drwxr-xr-x   - hadoop hadoop       4096 2017-02-09 22:06 /tmp/hsperfdata_hadoop
drwx------   - gdm    gdm          4096 2017-02-09 21:07 /tmp/pulse-QYFSuKxTLjL1
drwxrwxrwx   - root   root         4096 2017-02-09 01:43 /tmp/VMwareDnD
drwx------   - gdm    gdm          4096 2017-02-09 21:07 /tmp/orbit-gdm
drwxrwxrwx   - root   root         4096 2017-02-09 21:05 /tmp/.X11-unix
drwx------   - root   root         4096 2017-02-09 21:07 /tmp/vmware-root-2999069680
drwxr-xr-x   - root   root         4096 2017-02-09 21:05 /tmp/vmware-root
-rw-------   1 root   root            0 2017-02-09 01:15 /tmp/yum.log
drwxrwxrwx   - root   root         4096 2017-02-09 21:07 /tmp/.ICE-unix
drwx------   - root   root         4096 2017-02-09 21:08 /tmp/orbit-root
[hadoop@hadoopdn hadoop1]$ ls -lrt /tmp
total 40
-rw-------. 1 root   root      0 Feb  9 01:15 yum.log
drwxr-xr-x. 2 root   root   4096 Feb  9 01:42 vmware-config0
drwxrwxrwt. 2 root   root   4096 Feb  9 01:43 VMwareDnD
drwxr-xr-x. 2 root   root   4096 Feb  9 21:05 vmware-root
drwx------. 2 gdm    gdm    4096 Feb  9 21:07 orbit-gdm
drwx------. 2 root   root   4096 Feb  9 21:07 keyring-pCmKEX
drwx------. 2 root   root   4096 Feb  9 21:07 vmware-root-2999069680
drwx------. 2 root   root   4096 Feb  9 21:07 pulse-QM6JPWEpMsIo
drwx------. 2 gdm    gdm    4096 Feb  9 21:07 pulse-QYFSuKxTLjL1
drwx------. 2 root   root   4096 Feb  9 21:08 orbit-root
drwxr-xr-x. 2 hadoop hadoop 4096 Feb  9 22:06 hsperfdata_hadoop
[hadoop@hadoopdn hadoop1]$



>>>> At this point, our hadoop binaries are installed perfectly. 

Till this point, steps will be same on all multi nodes of the cluster ( like DataNode, NameNode, SecondaryNameNode) 

With following above steps, we have created 3 Machines in VM with CentOS6.5 and Hadoop V1 (1.2.1)


Next Step id to modify the configuration files (like hdfs-site.xml, core-site.xml, mapred-site.xml, masters, slaves)

Now steps will be little modified in single node and Multiple Node cluster.  Lets Begin with Single node cluster - 

[hadoop@hadoopdn hadoop1]$ cd $HADOOP_CONF_DIR
[hadoop@hadoopdn conf]$ ls -lrt *.xml
-rw-rw-r--. 1 hadoop hadoop  178 Jul 23  2013 mapred-site.xml
-rw-rw-r--. 1 hadoop hadoop 2033 Jul 23  2013 mapred-queue-acls.xml
-rw-rw-r--. 1 hadoop hadoop  178 Jul 23  2013 hdfs-site.xml
-rw-rw-r--. 1 hadoop hadoop 4644 Jul 23  2013 hadoop-policy.xml
-rw-rw-r--. 1 hadoop hadoop  327 Jul 23  2013 fair-scheduler.xml
-rw-rw-r--. 1 hadoop hadoop  178 Jul 23  2013 core-site.xml
-rw-rw-r--. 1 hadoop hadoop 7457 Jul 23  2013 capacity-scheduler.xml

[hadoop@hadoopnn conf]$ ls -lrt masters slaves
-rw-rw-r--. 1 hadoop hadoop 10 Jul 23  2013 slaves
-rw-rw-r--. 1 hadoop hadoop 10 Jul 23  2013 masters
[hadoop@hadoopnn conf]$

[hadoop@hadoopdn hadoop1]$ cd $HADOOP_CONF_DIR
[hadoop@hadoopdn conf]$ ls -lrt *.xml
-rw-rw-r--. 1 hadoop hadoop  178 Jul 23  2013 mapred-site.xml
-rw-rw-r--. 1 hadoop hadoop 2033 Jul 23  2013 mapred-queue-acls.xml
-rw-rw-r--. 1 hadoop hadoop  178 Jul 23  2013 hdfs-site.xml
-rw-rw-r--. 1 hadoop hadoop 4644 Jul 23  2013 hadoop-policy.xml
-rw-rw-r--. 1 hadoop hadoop  327 Jul 23  2013 fair-scheduler.xml
-rw-rw-r--. 1 hadoop hadoop  178 Jul 23  2013 core-site.xml
-rw-rw-r--. 1 hadoop hadoop 7457 Jul 23  2013 capacity-scheduler.xml

[hadoop@hadoopnn conf]$ ls -lrt masters slaves
-rw-rw-r--. 1 hadoop hadoop 10 Jul 23  2013 slaves
-rw-rw-r--. 1 hadoop hadoop 10 Jul 23  2013 masters
[hadoop@hadoopnn conf]$

$ mkdir -p /opt/data_hadoop/mapred/system 
$ mkdir -p /opt/data_hadoop/mapred/local/  
$ mkdir -p /opt/data_hadoop/nn  
$ mkdir -p /opt/data_hadoop/dn 
$ mkdir -p /opt/data_hadoop/snn

$ chmod 755  
[hadoop@hadoopnn hadoop1]$ cd $HADOOP_CONF_DIR
[hadoop@hadoopnn conf]$ cat core-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://hadoopnn</value>
</property>
</configuration>
[hadoop@hadoopnn conf]$
[hadoop@hadoopnn conf]$
[hadoop@hadoopnn conf]$ cat hdfs-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>

<property>
<name>dfs.name.dir</name>
<value>/opt/data_hadoop/nn</value>
</property>

<property>
<name>dfs.permission</name>
<value>false</value>
</property>

<property>
<name>dfs.data.dir</name>
<value>/opt/data_hadoop/dn</value>
</property>

<property>
<name>dfs.checkpoint.dir</name>
<value>/opt/data_hadoop/snn</value>
</property>

<property>
<name>replication</name>
<value>1</value>
</property>

</configuration>
[hadoop@hadoopnn conf]$
[hadoop@hadoopnn conf]$ cat mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>

<property>
<name>mapred.job.tracker</name>
<value>hadoopnn:8021</value>
</property>

<property>
<name>mapred.local.dir</name>
<value>/opt/data_hadoop/mapred/local</value>
</property>

<property>
<name>mapred.system.dir</name>
<value>/opt/data_hadoop/mapred/system</value>
</property>

<property>
<name>mapred.tasktracker.map.tasks.maximum</name>
<value>2</value>
</property>

<property>
<name>mapred.tasktracker.reduce.tasks.maximum</name>
<value>2</value>
</property>

</configuration>
[hadoop@hadoopnn conf]$


[hadoop@hadoopnn hadoop1]$ tar -zcvf tar_conf.tgz  conf/*
conf/capacity-scheduler.xml
conf/configuration.xsl
conf/core-site.xml
conf/fair-scheduler.xml
conf/hadoop-env.sh
conf/hadoop-metrics2.properties
conf/hadoop-policy.xml
conf/hdfs-site.xml
conf/log4j.properties
conf/mapred-queue-acls.xml
conf/mapred-site.xml
conf/masters
conf/slaves
conf/ssl-client.xml.example
conf/ssl-server.xml.example
conf/taskcontroller.cfg
conf/task-log4j.properties



[hadoop@hadoopnn data_hadoop]$ hadoop namenode -format
17/02/10 12:09:22 INFO namenode.NameNode: STARTUP_MSG:
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG:   host = hadoopnn/192.168.1.15
STARTUP_MSG:   args = [-format]
STARTUP_MSG:   version = 1.2.1
STARTUP_MSG:   build = https://svn.apache.org/repos/asf/hadoop/common/branches/branch-1.2 -r 1503152; compiled by 'mattf' on Mon Jul 22 15:23:09 PDT 2013
STARTUP_MSG:   java = 1.8.0_121
************************************************************/
17/02/10 12:09:23 INFO util.GSet: Computing capacity for map BlocksMap
17/02/10 12:09:23 INFO util.GSet: VM type       = 64-bit
17/02/10 12:09:23 INFO util.GSet: 2.0% max memory = 1013645312
17/02/10 12:09:23 INFO util.GSet: capacity      = 2^21 = 2097152 entries
17/02/10 12:09:23 INFO util.GSet: recommended=2097152, actual=2097152
17/02/10 12:09:23 INFO namenode.FSNamesystem: fsOwner=hadoop
17/02/10 12:09:24 INFO namenode.FSNamesystem: supergroup=supergroup
17/02/10 12:09:24 INFO namenode.FSNamesystem: isPermissionEnabled=true
17/02/10 12:09:24 INFO namenode.FSNamesystem: dfs.block.invalidate.limit=100
17/02/10 12:09:24 INFO namenode.FSNamesystem: isAccessTokenEnabled=false accessKeyUpdateInterval=0 min(s), accessTokenLifetime=0 min(s)
17/02/10 12:09:24 INFO namenode.FSEditLog: dfs.namenode.edits.toleration.length = 0
17/02/10 12:09:24 INFO namenode.NameNode: Caching file names occuring more than 10 times
17/02/10 12:09:24 INFO common.Storage: Image file /opt/data_hadoop/nn/current/fsimage of size 112 bytes saved in 0 seconds.
17/02/10 12:09:24 INFO namenode.FSEditLog: closing edit log: position=4, editlog=/opt/data_hadoop/nn/current/edits
17/02/10 12:09:24 INFO namenode.FSEditLog: close success: truncate to 4, editlog=/opt/data_hadoop/nn/current/edits
17/02/10 12:09:24 INFO common.Storage: Storage directory /opt/data_hadoop/nn has been successfully formatted.
17/02/10 12:09:24 INFO namenode.NameNode: SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at hadoopnn/192.168.1.15
************************************************************/


[hadoop@hadoopnn data_hadoop]$ start-all.sh
starting namenode, logging to /opt/logs_hadoop/hadoop-hadoop-namenode-hadoopnn.out
hadoopnn: starting datanode, logging to /opt/logs_hadoop/hadoop-hadoop-datanode-hadoopnn.out
hadoopnn: starting secondarynamenode, logging to /opt/logs_hadoop/hadoop-hadoop-secondarynamenode-hadoopnn.out
starting jobtracker, logging to /opt/logs_hadoop/hadoop-hadoop-jobtracker-hadoopnn.out
hadoopnn: starting tasktracker, logging to /opt/logs_hadoop/hadoop-hadoop-tasktracker-hadoopnn.out
[hadoop@hadoopnn data_hadoop]$



[hadoop@hadoopnn snn]$ jps
9538 NameNode
9987 TaskTracker
9781 SecondaryNameNode
10216 Jps
9657 DataNode
9867 JobTracker
[hadoop@hadoopnn snn]$






No comments:

Post a Comment