#
# Cookbook Name:: hadoop
# Recipe:: cdh
#
# Copyright 2013, whitestar
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

::Chef::Recipe.send(:include, Helper)

install_root = '/usr/lib/hadoop'
conf_dir = '/etc/hadoop/conf'
conf_tpl_dir = '/etc/hadoop/conf.dist/'

daemons = nil
conf_files = nil

daemons_v1 = [
  '0.20-namenode',
  '0.20-datanode',
  '0.20-secondarynamenode',
  '0.20-jobtracker',
  '0.20-tasktracker'
]

daemons_v2_hdfs = [
  'hdfs-namenode',
  'hdfs-secondarynamenode',
  'hdfs-datanode'
]

daemons_v2_yarn = [
  'yarn-resourcemanager',
  'yarn-nodemanager',
  'mapreduce-historyserver'
]

daemons_v2_mrv1 = [
  '0.20-mapreduce-jobtracker',
  '0.20-mapreduce-tasktracker'
]

conf_files_v1 = [
  'capacity-scheduler.xml',
  'configuration.xsl',
  'core-site.xml',
  'fair-scheduler.xml',
  'hadoop-env.sh',
  'hadoop-metrics.properties',
  'hadoop-policy.xml',
  'hdfs-site.xml',
  'hosts.include',
  'hosts.exclude',
  #'log4j.properties',  # CDH3: old (Hadoop Metrics v1)
  'mapred-queue-acls.xml',
  'mapred-site.xml',
  'masters',
  'org-xerial-snappy.properties',
  'slaves',
  # not supported by this recipe.
  #'ssl-client.xml',
  #'ssl-server.xml'
]

conf_files_v2_yarn = \
  conf_files_v1 \
  - [
    'configuration.xsl',
    'hadoop-metrics.properties',
    'hadoop-policy.xml',  # missing!?
    'mapred-queue-acls.xml',
    'masters',
    'org-xerial-snappy.properties',
  ] \
  + [
    'hadoop-metrics2.properties',
    # TODO: -> cdh_httpfs recipe. (hadoop-httpfs rpm package)
    #'httpfs-env.sh',
    #'httpfs-log4j.properties',
    #'httpfs-signature.secret',
    #'httpfs-site.xml',
    'log4j.properties',
    #'mapred-env.sh',  # version >= 4.? 
    'yarn-env.sh',
    'yarn-site.xml'
  ]

conf_files_v2_mrv1 = \
  conf_files_v1 \
  - [
    #'configuration.xsl',
    'masters',
    'org-xerial-snappy.properties',
  ] \
  + [
    'hadoop-metrics2.properties',
    'log4j.properties',
    #'mapred-env.sh',  # version >= 4.? 
  ] 

version, major_version, middle_version \
  = parse_cdh_version_number(node['cdh']['version'])
equivalent_apache_hadoop_middle_version \
  = get_equivalent_apache_hadoop_version_of('cdh', middle_version)
equivalent_apache_hadoop_major_version \
  = equivalent_apache_hadoop_middle_version.split('.')[0]
unless (('3.5' <= middle_version && middle_version <= '3.6') \
  || ('4.2' <= middle_version && middle_version <= '4.3')) then
  Chef::Application.fatal!("Non supported version: #{version}")
end

resource_negotiator_framework = node['hadoop']['cdh']['resource_negotiator_framework']

users = get_users(equivalent_apache_hadoop_major_version)
# do not add any hadoop accounts by Chef.

case equivalent_apache_hadoop_major_version
when '1'
  conf_tpl_dir = '/etc/hadoop/conf.empty'
  daemons = daemons_v1
  conf_files = conf_files_v1
when '2'
  conf_tpl_dir = '/etc/hadoop/conf.dist/'
  case resource_negotiator_framework
  when 'YARN'
    daemons = daemons_v2_hdfs + daemons_v2_yarn
    conf_files = conf_files_v2_yarn
  when 'MRv1'
    daemons = daemons_v2_hdfs + daemons_v2_mrv1
    conf_files = conf_files_v2_mrv1
  end
end

daemons.each {|daemon|
  package "hadoop-#{daemon}" do
    action :install
  end

  service "hadoop-#{daemon}" do
    action [:disable, :stop]
  end
}

# libhadoop, libsnappy, libpipes
if equivalent_apache_hadoop_major_version == '1' \
  && node[:kernel][:machine] == 'x86_64' then
  %w{
    0.20-native
    0.20-pipes
  }.each {|native_lib|
    package "hadoop-#{native_lib}" do
      action :install
    end
  }
else
  # do nothing in the ver. 2
  # native -> hadoop package
  # pipes -> hadoop package
end

link node['hadoop']['HADOOP_PREFIX'] do
  to install_root
  action [:delete, :create]
end

active_vol_nums = setup_directories(
  (major_version == '4' && resource_negotiator_framework == 'MRv1') \
    ? '1' : equivalent_apache_hadoop_major_version
)
log "This node active volumes: #{active_vol_nums}"

alt_conf_path = node['hadoop']['cdh']['alternatives']['hadoop-conf']['path']
alt_conf_priority \
  = node['hadoop']['cdh']['alternatives']['hadoop-conf']['priority']
bash 'alternatives_hadoop-conf' do
  code <<-EOC
    cp -R #{conf_tpl_dir} #{alt_conf_path}
    alternatives --install #{conf_dir} hadoop-conf #{alt_conf_path} #{alt_conf_priority}
  EOC
  creates alt_conf_path
end

tpl_vars = {
  :active_vol_nums => active_vol_nums
}
conf_template(conf_dir, equivalent_apache_hadoop_middle_version, conf_files, tpl_vars)

# with security
if node['hadoop']['with_security'] then
  directory node['hadoop']['this.keytab.dir'] do
    owner 'root'
    group 'root'
    mode '0755'
    action :create
    recursive true
  end
  
  case equivalent_apache_hadoop_major_version
  when '1'
    # task-controller, jsvc (Note: supports x86_64 only.)
    if node[:kernel][:machine] == 'x86_64' then
      package 'hadoop-0.20-sbin' do
        action :install
      end
=begin
      file "#{install_root}/sbin/Linux-amd64-64/task-controller" do
        owner 'root'
        group users[:mapred][:name]
        mode '4754'
      end
=end
    end

    template "#{conf_dir}/taskcontroller.cfg" do
      source "conf-#{equivalent_apache_hadoop_middle_version}/taskcontroller.cfg"
      owner 'root'
      group 'root'
      mode '0400'  # '0644' by the distribution?!
      variables({
        :active_vol_nums => active_vol_nums
      })
    end
  when '2'
    # jsvc installation is not necessary.
    # hadoop-hdfs requires bigtop-jsvc package.
    template "/etc/default/hadoop-hdfs-datanode" do
      source "etc-#{equivalent_apache_hadoop_middle_version}/default/hadoop-hdfs-datanode"
      owner 'root'
      group 'root'
      mode '0644'
    end

    case resource_negotiator_framework
    when 'YARN'
      file "/usr/lib/hadoop-yarn/bin/container-executor" do
        owner 'root'
        group users[:yarn][:name]
        mode '6050'
      end
      
      template "#{conf_dir}/container-executor.cfg" do
        source "etc-#{equivalent_apache_hadoop_middle_version}/hadoop/container-executor.cfg"
        owner 'root'
        group 'root'
        mode '0400'
        variables({
          :active_vol_nums => active_vol_nums
        })
      end
    when 'MRv1'
      # hadoop-0.20-mapreduce package includes task-controller
      # (Note: supports both i386 and x86_64.)
=begin
      file '/usr/lib/hadoop-0.20-mapreduce/sbin/Linux-{i386-32,x86-64}/task-controller' do
        owner 'root'
        group users[:mapred][:name]
        mode '4754'
      end
=end
      template "#{conf_dir}/taskcontroller.cfg" do
        source "etc-#{equivalent_apache_hadoop_middle_version}/hadoop/taskcontroller.cfg"
        owner 'root'
        group 'root'
        mode '0400'  # '0644' by the distribution?!
        variables({
          :active_vol_nums => active_vol_nums
        })
      end
    end
  end
end

case equivalent_apache_hadoop_major_version
when '1'
  log <<-EOM
Note:
You must initialize HDFS in the first installation:
  $ sudo -u hdfs hadoop namenode -format
  $ sudo service hadoop-0.20-namenode start
  $ sudo service hadoop-0.20-datanamenode start
  $ sudo -u hdfs hadoop fs -chown hdfs:hdfs /
  $ sudo -u hdfs hadoop fs -chmod 755 /
  $ sudo -u hdfs hadoop fs -mkdir /user
  $ sudo -u hdfs hadoop fs -mkdir #{node['grid']['vol_root']}/0/var/lib/mapred
  $ sudo -u hdfs hadoop fs -chown mapred:mapred #{node['grid']['vol_root']}/0/var/lib/mapred
  EOM

  examples_jar = '/usr/lib/hadoop/hadoop-examples.jar'
when '2'
  yarn_hdfs_dirs_setup = (resource_negotiator_framework == 'YARN') \
    ? <<-EOM
  $ sudo -u hdfs hadoop fs -mkdir -p #{node['grid']['vol_root']}/0/var/log/yarn/nm
  $ sudo -u hdfs hadoop fs -chown yarn:hadoop #{node['grid']['vol_root']}/0/var/log/yarn/nm
  $ sudo -u hdfs hadoop fs -chmod 1777        #{node['grid']['vol_root']}/0/var/log/yarn/nm
    EOM
    : ''

  log <<-EOM
Note:
You must initialize HDFS in the first installation:
  $ sudo -u hdfs hdfs namenode -format
  $ sudo service hadoop-hdfs-namenode start
  $ sudo service hadoop-hdfs-datanamenode start
  $ sudo -u hdfs hadoop fs -chown hdfs:hdfs /
  $ sudo -u hdfs hadoop fs -chmod 755 /
  $ sudo -u hdfs hadoop fs -mkdir /user
#{yarn_hdfs_dirs_setup}
  $ sudo -u hdfs hadoop fs -mkdir -p #{node['grid']['vol_root']}/0/var/lib/mapred/history
  $ sudo -u hdfs hadoop fs -chown -R mapred:hadoop #{node['grid']['vol_root']}/0/var/lib/mapred
  $ sudo -u hdfs hadoop fs -chmod -R 755           #{node['grid']['vol_root']}/0/var/lib/mapred
  EOM

  examples_jar = (resource_negotiator_framework == 'MRv1') \
    ? '/usr/lib/hadoop-0.20-mapreduce/hadoop-examples.jar' \
    : '/usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar'
end

if node['hadoop']['with_security'] then
  log <<-EOM
Note:
Example MapReduce job execution:
  $ sudo -u alice kinit
  Password for alice@LOCALDOMAIN: 
  $ sudo -u alice hadoop jar #{examples_jar} pi \\
  > -D mapreduce.job.acl-view-job=* -D mapreduce.job.acl-modify-job=alice 5 10
  EOM
else
  log <<-EOM
Note:
Example MapReduce job execution:
  $ sudo adduser alice
  $ sudo -u hdfs hadoop fs -mkdir /user/alice
  $ sudo -u hdfs hadoop fs -chown alice:alice /user/alice
  $ sudo -u alice hadoop jar #{examples_jar} pi 5 10
  EOM
end
