Here’s a comprehensive explanation and structure for setting up a Zookeeper and Kafka ensemble using Ansible:
Zookeeper Configuration (zoo.cfg
)
This is the configuration file for Zookeeper:
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial synchronization phase can take
initLimit=10
# The number of ticks that can pass between sending a request and getting an acknowledgement
syncLimit=5
# The directory where the snapshot is stored
dataDir=/tmp/data1/zookeeper
# The port at which the clients will connect
clientPort=2181
# The maximum number of client connections
maxClientCnxns=1000
# Server configurations
server.1=192.168.33.20:2888:3888
server.2=192.168.33.30:2888:3888
server.3=192.168.33.40:2888:3888
Create the Data Directory for Zookeeper
mkdir -p /tmp/data1/zookeeper
Log4j Properties for Zookeeper Logging (log4j.properties
)
# Default values
zookeeper.root.logger=INFO, CONSOLE
zookeeper.console.threshold=INFO
zookeeper.log.dir=/var/log/zookeeper
zookeeper.log.file=zookeeper.log
zookeeper.log.threshold=DEBUG
# Logging Configuration
log4j.rootLogger=${zookeeper.root.logger}
log4j.appender.CONSOLE=org.apache.log4j.ConsoleAppender
log4j.appender.CONSOLE.Threshold=${zookeeper.console.threshold}
log4j.appender.CONSOLE.layout=org.apache.log4j.PatternLayout
log4j.appender.CONSOLE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
log4j.appender.ROLLINGFILE=org.apache.log4j.RollingFileAppender
log4j.appender.ROLLINGFILE.Threshold=${zookeeper.log.threshold}
log4j.appender.ROLLINGFILE.File=${zookeeper.log.dir}/${zookeeper.log.file}
log4j.appender.ROLLINGFILE.MaxFileSize=10MB
log4j.appender.ROLLINGFILE.layout=org.apache.log4j.PatternLayout
log4j.appender.ROLLINGFILE.layout.ConversionPattern=%d{ISO8601} [myid:%X{myid}] - %-5p [%t:%C{1}@%L] - %m%n
Create the Log Directory for Zookeeper
mkdir -p /var/log/zookeeper
Ansible Playbook for Zookeeper Setup
- hosts: ensemble
vars:
ZOOKEEPER_VERSION: 3.4.12
tasks:
- name: Add user zookeeper
user:
name: zookeeper
- name: Download and Unarchive Zookeeper
unarchive:
src: http://www-us.apache.org/dist/zookeeper/zookeeper-{{ ZOOKEEPER_VERSION }}.tar.gz
dest: /opt
remote_src: yes
owner: zookeeper
group: zookeeper
mode: 0755
- name: Create symlink for Zookeeper base directory
file:
src: /opt/zookeeper-{{ ZOOKEEPER_VERSION }}
path: /opt/zookeeper
state: link
owner: zookeeper
group: zookeeper
- name: Copy configuration files
synchronize:
src: "{{ base_dir }}/files/opt/zookeeper/conf"
dest: /opt/zookeeper
- name: Create data and log directories
file:
path: "{{ item }}"
state: directory
owner: zookeeper
group: zookeeper
mode: 0755
loop:
- /tmp/data1/zookeeper
- /var/log/zookeeper
Start Zookeeper Using Ansible
ansible ensemble \
-i hosts \
-m script \
-a "./scripts/zookeeper-service.sh start" \
--become-user zookeeper \
--become
Create Zookeeper Service Script (zookeeper-service.sh
)
#!/bin/bash
export ZOO_LOG_DIR=/var/log/zookeeper
/opt/zookeeper/bin/zkServer.sh $1
Set myid
for Each Zookeeper Node
ssh vagrant@192.168.33.20 "sudo -u zookeeper sh -c 'echo 1 > /tmp/data1/zookeeper/myid'"
ssh vagrant@192.168.33.30 "sudo -u zookeeper sh -c 'echo 2 > /tmp/data1/zookeeper/myid'"
ssh vagrant@192.168.33.40 "sudo -u zookeeper sh -c 'echo 3 > /tmp/data1/zookeeper/myid'"
Kafka Configuration (server.properties
)
# Basic Server Settings
broker.id=0
log.dirs=/tmp/data1/kafka-logs,/tmp/data2/kafka-logs
num.network.threads=3
num.io.threads=8
socket.send.buffer.bytes=102400
socket.receive.buffer.bytes=102400
socket.request.max.bytes=104857600
# Log Basics
log.segment.bytes=1073741824
log.retention.hours=168
log.retention.check.interval.ms=300000
# Zookeeper Settings
zookeeper.connect=192.168.33.20:2181,192.168.33.30:2181,192.168.33.40:2181
zookeeper.connection.timeout.ms=6000
Create Kafka Log Directories
mkdir -p /tmp/data1/kafka-logs /tmp/data2/kafka-logs
Ansible Playbook for Kafka Setup
- hosts: brokers
vars:
SCALA_VERSION: 2.11
KAFKA_VERSION: 2.0.0
tasks:
- name: Add user kafka
user:
name: kafka
- name: Download Kafka Tar
unarchive:
src: http://www-us.apache.org/dist/kafka/{{ KAFKA_VERSION }}/kafka_{{ SCALA_VERSION }}-{{ KAFKA_VERSION }}.tgz
dest: /opt
remote_src: yes
owner: kafka
group: kafka
mode: 0755
- name: Create symlink for Kafka base directory
file:
src: /opt/kafka_{{ SCALA_VERSION }}-{{ KAFKA_VERSION }}
path: /opt/kafka
state: link
owner: kafka
group: kafka
- name: Copy server.properties
synchronize:
src: "{{ base_dir }}/files/opt/kafka/config"
dest: /opt/kafka
- name: Change permissions on /opt/kafka/config
file:
path: /opt/kafka/config
recurse: yes
owner: kafka
group: kafka
mode: 0755
- name: Create directories for Kafka data with owner as kafka
file:
path: "{{ item }}"
owner: kafka
group: kafka
state: directory
mode: 0755
loop:
- /tmp/data1/kafka-logs
- /tmp/data2/kafka-logs
Start Kafka Using Ansible
ansible brokers \
-i hosts \
-m script \
-a "./scripts/kafka-start.sh" \
--become-user kafka \
--become
Create Kafka Start Script (kafka-start.sh
)
#!/bin/bash
# Update server.properties for kafka with broker id
ssh vagrant@192.168.33.50 "sudo -u kafka sh -c \"sed -i 's/broker.id=0/broker.id=1/g' /opt/kafka/config/server.properties\""
ssh vagrant@192.168.33.60 "sudo -u kafka sh -c \"sed -i 's/broker.id=0/broker.id=2/g' /opt/kafka/config/server.properties\""
ssh vagrant@192.168.33.70 "sudo -u kafka sh -c \"sed -i 's/broker.id=0/broker.id=3/g' /opt/kafka/config/server.properties\""
/opt/kafka/bin/kafka-server-start.sh -daemon /opt/kafka/config/server.properties
Create a Kafka Topic
/opt/kafka/bin/kafka-topics.sh \
--create \
--zookeeper \
192.168.33.20:2181,192.168.33.30:2181,192.168.33.40:2181 \
--replication-factor 1 \
--partitions 1 \
--topic test
Produce Messages to Kafka Topic
/opt/kafka/bin/kafka-console-producer.sh \
--broker-list \
192.168.33.50:9092,192.168.33.60:9092,192.168.33.70:9092 \
--topic test
Consume Messages from Kafka Topic
/opt/kafka/bin/kafka-console-consumer.sh \
--bootstrap-server \
192.168.33.50:9092,192
.168.33.60:9092,192.168.33.70:9092 \
--topic test \
--from-beginning
Final Steps
- Ensure all nodes have the necessary directory structures and permissions.
- Execute the Ansible playbooks to set up Zookeeper and Kafka on all nodes.
- Verify the Zookeeper ensemble and Kafka brokers are running correctly.
- Create and manage Kafka topics as needed.
This setup ensures a robust and scalable Zookeeper and Kafka environment, managed efficiently using Ansible.