BIGTOP-1041. Add Puppet support for Spark cluster deployment
authorRoman Shaposhnik <rvs@cloudera.com>
Sat, 5 Oct 2013 02:48:09 +0000 (19:48 -0700)
committerRoman Shaposhnik <rvs@cloudera.com>
Sat, 5 Oct 2013 02:48:09 +0000 (19:48 -0700)
bigtop-deploy/puppet/manifests/cluster.pp
bigtop-deploy/puppet/modules/spark/manifests/init.pp [new file with mode: 0644]
bigtop-deploy/puppet/modules/spark/templates/spark-env.sh [new file with mode: 0644]
bigtop-deploy/puppet/modules/spark/tests/init.pp [new file with mode: 0644]

index cec9cec..b4bb36d 100644 (file)
@@ -69,6 +69,8 @@ class hadoop_cluster_node {
   $hadoop_rm_proxy_port              = extlookup("hadoop_rm_proxy_port", "8088")
   $hadoop_history_server_port        = extlookup("hadoop_history_server_port", "19888")
   $hbase_thrift_port                 = extlookup("hbase_thrift_port", "9090")
+  $spark_master_port                 = extlookup("spark_master_port", "7077")
+  $spark_master_ui_port              = extlookup("spark_master_ui_port", "18080")
 
   $hadoop_ha_zookeeper_quorum        = "${hadoop_head_node}:${hadoop_zookeeper_port}"
   $solrcloud_zk                      = "${hadoop_head_node}:${hadoop_zookeeper_port}"
@@ -95,6 +97,8 @@ class hadoop_cluster_node {
 
   $giraph_zookeeper_quorum       = $hadoop_head_node
 
+  $spark_master_host             = $hadoop_head_node
+
   $hadoop_zookeeper_ensemble = ["$hadoop_head_node:2888:3888"]
 
   # Set from facter if available
@@ -171,6 +175,12 @@ class hadoop_worker_node inherits hadoop_cluster_node {
        root_url    => $hadoop_namenode_uri,
        kerberos_realm => $kerberos_realm,
   }
+
+  spark::worker { "spark worker":
+       master_host    => $spark_master_host,
+       master_port    => $spark_master_port,
+       master_ui_port => $spark_master_ui_port,
+  }
 }
 
 class hadoop_head_node inherits hadoop_worker_node {
@@ -239,6 +249,12 @@ class hadoop_head_node inherits hadoop_worker_node {
         kerberos_realm => $kerberos_realm,
   }
 
+  spark::master { "spark master":
+       master_host    => $spark_master_host,
+       master_port    => $spark_master_port,
+       master_ui_port => $spark_master_ui_port,
+  }
+
   hadoop-zookeeper::server { "zookeeper":
         myid => "0",
         ensemble => $hadoop_zookeeper_ensemble,
diff --git a/bigtop-deploy/puppet/modules/spark/manifests/init.pp b/bigtop-deploy/puppet/modules/spark/manifests/init.pp
new file mode 100644 (file)
index 0000000..f23387e
--- /dev/null
@@ -0,0 +1,51 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+class spark {
+  class common {
+    package { "spark":
+      ensure => latest,
+    }
+
+    file { "/etc/spark/conf/spark-env.sh":
+        content => template("spark/spark-env.sh"),
+        require => [Package["spark"]],
+    }
+  }
+
+  define master($master_host = $fqdn, $master_port = "7077", $master_ui_port = "18080") {
+    include common   
+
+    service { "spark-master":
+      ensure => running,
+      require => [ Package["spark"], File["/etc/spark/conf/spark-env.sh"], ],
+      subscribe => [Package["spark"], File["/etc/spark/conf/spark-env.sh"] ],
+      hasrestart => true,
+      hasstatus => true,
+    } 
+  }
+
+  define worker($master_host = $fqdn, $master_port = "7077", $master_ui_port = "18080") {
+    include common
+
+    service { "spark-worker":
+      ensure => running,
+      require => [ Package["spark"], File["/etc/spark/conf/spark-env.sh"], ],
+      subscribe => [Package["spark"], File["/etc/spark/conf/spark-env.sh"] ],
+      hasrestart => true,
+      hasstatus => true,
+    } 
+  }
+}
diff --git a/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh b/bigtop-deploy/puppet/modules/spark/templates/spark-env.sh
new file mode 100644 (file)
index 0000000..5f14cc0
--- /dev/null
@@ -0,0 +1,38 @@
+#!/usr/bin/env bash
+
+# This file contains environment variables required to run Spark. Copy it as
+# spark-env.sh and edit that to configure Spark for your site.
+#
+# The following variables can be set in this file:
+# - SPARK_LOCAL_IP, to set the IP address Spark binds to on this node
+# - MESOS_NATIVE_LIBRARY, to point to your libmesos.so if you use Mesos
+# - SPARK_JAVA_OPTS, to set node-specific JVM options for Spark. Note that
+#   we recommend setting app-wide options in the application's driver program.
+#     Examples of node-specific options : -Dspark.local.dir, GC options
+#     Examples of app-wide options : -Dspark.serializer
+#
+# If using the standalone deploy mode, you can also set variables for it here:
+# - SPARK_MASTER_IP, to bind the master to a different IP address or hostname
+# - SPARK_MASTER_PORT / SPARK_MASTER_WEBUI_PORT, to use non-default ports
+# - SPARK_WORKER_CORES, to set the number of cores to use on this machine
+# - SPARK_WORKER_MEMORY, to set how much memory to use (e.g. 1000m, 2g)
+# - SPARK_WORKER_PORT / SPARK_WORKER_WEBUI_PORT
+# - SPARK_WORKER_INSTANCES, to set the number of worker processes per node
+
+
+### Let's run everything with JVM runtime, instead of Scala
+export SPARK_LAUNCH_WITH_SCALA=0
+export SPARK_LIBRARY_PATH=${SPARK_HOME}/lib
+export SCALA_LIBRARY_PATH=${SPARK_HOME}/lib
+export SPARK_MASTER_WEBUI_PORT=<%= master_ui_port %>
+export SPARK_MASTER_PORT=<%= master_port %>
+
+### Comment above 2 lines and uncomment the following if
+### you want to run with scala version, that is included with the package
+#export SCALA_HOME=${SCALA_HOME:-/usr/lib/spark/scala}
+#export PATH=$PATH:$SCALA_HOME/bin
+
+### change the following to specify a real cluster's Master host
+export STANDALONE_SPARK_MASTER_HOST=<%= master_host %>
+
+
diff --git a/bigtop-deploy/puppet/modules/spark/tests/init.pp b/bigtop-deploy/puppet/modules/spark/tests/init.pp
new file mode 100644 (file)
index 0000000..fab86f1
--- /dev/null
@@ -0,0 +1,25 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+spark::master { "spark master": 
+  master_port        => "1978",
+  master_ui_port     => "1979",
+}
+
+spark::worker { "spark worker": 
+  master_host        => "somewhere.com",
+  master_port        => "1978",
+  master_ui_port     => "1979",
+}