升级本机Hadoop/Pig版本
安装java
参考安装java8
安装hadoop
从hadoop官网下载hadoop-2.8.0并解压,拷贝本机的hadoop配置文件/etc/hadoop/conf
到当前路径下。
设置环境变量
export HADOOP_HOME=/home/zhangkai/data/tools/runtime/hadoop-2.8.0
export HADOOP_CONF_DIR=$HADOOP_HOME/conf
export YARN_HOME=$HADOOP_HOME
export YARN_CONF_DIR=$HADOOP_CONF_DIR
export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$HADOOP_HOME/lib/native
export PATH=$HADOOP_HOME/bin:$PATH
安装pig
从pig官网下载pig-0.16.0并解压,设置环境变量
export PIG_HOME=/home/zhangkai/data/tools/runtime/pig-0.16.0
export PIG_CLASSPATH=$HADOOP_CONF_DIR
export PATH=$PIG_HOME/bin:$PATH
安装spark
从spark官网下载spark-2.1.1-bin-hadoop2.7并解压,设置环境变量
export SPARK_HOME=/home/zhangkai/data/tools/runtime/spark-2.1.1-bin-hadoop2.7
export SPARK_LIBRARY_PATH=$SPARK_HOME/classpath/emr/*:$SPARK_HOME/classpath/emrfs/*:$SPARK_HOME/lib/*
export PATH=$PARK_HOME/bin:$PATH
安装maven
从maven官网下载maven-3.5.0并解压,设置环境变量
export PATH=/home/zhangkai/data/tools/runtime/apache-maven-3.5.0/bin:$PATH
安装hadoop-lzo
安装lzo native library
wget http://www.oberhumer.com/opensource/lzo/download/lzo-2.10.tar.gz
tar xzf lzo-2.10.tar.gz && cd lzo-2.10
export CFLAGS=-m64
export CXXFLAGS=-m64
./configure --prefix=/home/zhangkai/data/tools/opt
make -j10 && make install
安装hadoop-lzo
下载 git clone https://github.com/twitter/hadoop-lzo.git
修改pom.xml中的hadoop.current.version
为当前hadoop版本。
export CFLAGS=-m64
export CXXFLAGS=-m64
export C_INCLUDE_PATH=/home/zhangkai/data/tools/opt/include:$C_INCLUDE_PATH
export LIBRARY_PATH=/home/zhangkai/data/tools/opt/lib:$LIBRARY_PATH
mvn clean package -Dmaven.test.skip=true
cd target/native/Linux-amd64-64
tar -cBf - -C lib . | tar -xBvf - -C ../
cp ../libgplcompression* $HADOOP_HOME/lib/native/
cp target/hadoop-lzo-0.4.18-SNAPSHOT.jar $HADOOP_HOME/share/hadoop/common/