diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg index 22b1d8ab473..8f5474bb857 100644 --- a/ansible/ansible.cfg +++ b/ansible/ansible.cfg @@ -119,7 +119,7 @@ deprecation_warnings = False action_plugins = plugins/action callback_plugins = plugins/callback connection_plugins = plugins/connection -# lookup_plugins = /usr/share/ansible_plugins/lookup_plugins +lookup_plugins = plugins/lookup # vars_plugins = /usr/share/ansible_plugins/vars_plugins filter_plugins = plugins/filter callback_whitelist = profile_tasks diff --git a/ansible/config_connection_db.yml b/ansible/config_connection_db.yml new file mode 100644 index 00000000000..b00f3ae7fae --- /dev/null +++ b/ansible/config_connection_db.yml @@ -0,0 +1,46 @@ +# This playbook configures the connection_db +# +# To start connection db +# ansible-playbook -i veos config_connection_db.yml --vault-password-file=~/password.txt -e duts_name=str-msn2700-01,str-msn2700-02 -e configure_action=start_db +# ansible-playbook -i veos config_connection_db.yml --vault-password-file=~/password.txt -e connection_graph_filename=lab_connection_graph.xml -e configure_action=start_db +# Parameters: +# -e duts_name=str-msn2700-01,str-msn2700-02 - used to located connection graph file +# -e connection_graph_filename=lab_connection_graph.xml - pass connection graph file directly +# -e configure_action=start_db - could be one of: start_db, provision_db or stop_db + +- name: Configure connection db + hosts: localhost + gather_facts: False + + tasks: + - name: Check that variable configure_action is defined + fail: msg="Please pass extra variable 'configure_action'" + when: configure_action is not defined + + - name: Check that variable connection_graph_filename or duts_name is defined + fail: msg="Please pass extra variable 'connection_grap_filename' or 'duts_name'" + when: + - provision_connection_db is not defined + - duts_name is not defined + + - name: Find connection graph file + set_fact: + connection_graph_filename: "{{ lookup('graphfile', duts_name.split(',')) }}" + when: connection_graph_filename is not defined + + - name: Set connection db server hostname + set_fact: + connection_db_host: "{{ item.value }}" + loop: "{{ connection_db_host_mapping | dict2items }}" + when: item.key == connection_graph_filename + + - name: Get connection db server ip + set_fact: + connection_db_host_ip: "{{ ansible_ssh_host }}" + delegate_to: "{{ connection_db_host }}" + + - name: Start connection_db + include_role: + name: connection_db + vars: + action: "{{ configure_action }}" diff --git a/ansible/files/creategraph.py b/ansible/files/creategraph.py index 2068daeedf8..2a1a598e96f 100755 --- a/ansible/files/creategraph.py +++ b/ansible/files/creategraph.py @@ -14,6 +14,7 @@ LAB_CONNECTION_GRAPH_ROOT_NAME = 'LabConnectionGraph' LAB_CONNECTION_GRAPH_DPGL2_NAME = 'DevicesL2Info' + class LabGraph(object): """ @@ -40,7 +41,6 @@ def __init__(self, dev_csvfile=None, link_csvfile=None, cons_csvfile=None, pdu_c self.csgroot = etree.Element('ConsoleGraphDeclaration') self.pcgroot = etree.Element('PowerControlGraphDeclaration') - def read_devices(self): with open(self.devcsv) as csv_dev: csv_devices = csv.DictReader(filter(lambda row: row[0]!='#' and len(row.strip())!=0, csv_dev)) @@ -108,11 +108,17 @@ def generate_dpg(self): hostname = dev.get('Hostname', '') managementip = dev.get('ManagementIp', '') devtype = dev['Type'].lower() - if hostname and ('fanout' in devtype or 'ixiachassis' in devtype): - ###### Build Management interface IP here, if we create each device indivial minigraph file, we may comment this out + if not hostname: + continue + if devtype in ('server', 'devsonic'): + # Build Management interface IP for server and DUT + l3inforoot = etree.SubElement(self.dpgroot, 'DevicesL3Info', {'Hostname': hostname}) + etree.SubElement(l3inforoot, 'ManagementIPInterface', {'Name': 'ManagementIp', 'Prefix': managementip}) + elif 'fanout' in devtype or 'ixiachassis' in devtype: + # Build Management interface IP here, if we create each device indivial minigraph file, we may comment this out l3inforoot = etree.SubElement(self.dpgroot, 'DevicesL3Info', {'Hostname': hostname}) etree.SubElement(l3inforoot, 'ManagementIPInterface', {'Name': 'ManagementIp', 'Prefix': managementip}) - ####### Build L2 information Here + # Build L2 information Here l2inforoot = etree.SubElement(self.dpgroot, LAB_CONNECTION_GRAPH_DPGL2_NAME, {'Hostname': hostname}) vlanattr = {} for link in self.links: @@ -140,7 +146,7 @@ def create_xml(self): ''' onexml = open(self.one_xmlfile, 'w') - root=etree.Element(LAB_CONNECTION_GRAPH_ROOT_NAME) + root = etree.Element(LAB_CONNECTION_GRAPH_ROOT_NAME) root.append(self.pngroot) root.append(self.dpgroot) root.append(self.csgroot) diff --git a/ansible/files/lab_connection_graph.xml b/ansible/files/lab_connection_graph.xml index a4f4e48afdf..646ce4d43c9 100644 --- a/ansible/files/lab_connection_graph.xml +++ b/ansible/files/lab_connection_graph.xml @@ -44,6 +44,9 @@ + + + @@ -89,6 +92,9 @@ + + + diff --git a/ansible/group_vars/all/connection_db.yml b/ansible/group_vars/all/connection_db.yml new file mode 100644 index 00000000000..2e767ee3a8b --- /dev/null +++ b/ansible/group_vars/all/connection_db.yml @@ -0,0 +1,3 @@ +--- +connection_db_host_mapping: + lab_connection_graph.xml: STR-ACS-SERV-01 diff --git a/ansible/plugins/lookup/graphfile.py b/ansible/plugins/lookup/graphfile.py new file mode 100644 index 00000000000..77f41216e79 --- /dev/null +++ b/ansible/plugins/lookup/graphfile.py @@ -0,0 +1,48 @@ +from __future__ import (absolute_import, division, print_function) +import os.path +import yaml +import xml.etree.ElementTree as ET + +from ansible.utils.display import Display +from ansible.plugins.lookup import LookupBase +from ansible.errors import AnsibleError +__metaclass__ = type + +DOCUMENTATION = """ + lookup: graphfile + version_added: "1.0" + short_description: find connection graph file that has DUTs listed defined. + description: + - This lookup returns the connection graph file contains the DUTs. + options: + _terms: + description: list of DUT hostnames + required: True +""" + +display = Display() +LAB_CONNECTION_GRAPH_FILE = 'graph_files.yml' + + +class LookupModule(LookupBase): + + def run(self, terms, variables=None, **kwargs): + hostnames = terms[0] + display.debug('Graph file lookup DUTs: %s' % hostnames) + graph_list_file = self.find_file_in_search_path(variables, 'files', LAB_CONNECTION_GRAPH_FILE) + if not graph_list_file: + raise AnsibleError('Unable to locate %s' % LAB_CONNECTION_GRAPH_FILE) + with open(graph_list_file) as fd: + file_list = yaml.safe_load(fd) + + for gf in file_list: + display.debug('Looking at conn graph file: %s' % gf) + gf = self.find_file_in_search_path(variables, 'files', gf) + if not gf: + continue + with open(gf) as fd: + root = ET.fromstring(fd.read()) + hosts_all = [d.attrib['Hostname'] for d in root.iter('Device')] + if set(hostnames) <= set(hosts_all): + return [os.path.basename(gf)] + return [] diff --git a/ansible/plugins/lookup/servercfgd_client.py b/ansible/plugins/lookup/servercfgd_client.py new file mode 100644 index 00000000000..2ca929ecc3f --- /dev/null +++ b/ansible/plugins/lookup/servercfgd_client.py @@ -0,0 +1,67 @@ +from __future__ import (absolute_import, division, print_function) +import os.path + +try: + from xmlrpclib import ServerProxy +except ImportError: + from xmllib.client import ServerProxy +from ansible.utils.display import Display +from ansible.plugins.lookup import LookupBase +from ansible.errors import AnsibleError +__metaclass__ = type + +DOCUMENTATION = """ + lookup: servercfgd_client + version_added: "1.0" + short_description: Dispatches calls to remote functions registered in servercfgd + description: + - This lookup will make servercfgd procedure calls. + options: + _terms: + description: list of servercfgd registered function names + required: True + servercfgd_host: + description: IP address of target server that running servercfgd + type: string + conn_graph_file_content: + description: Content of connection graph file to provision db + type: string + required: False + enforce_provision: + description: True to enforce provisioning db + type: boolean + required: False + scripts: + description: List of Lua scripts to register + type: list + required: False +""" + +display = Display() + + +class LookupModule(LookupBase): + + def run(self, terms, variables=None, **kwargs): + """Dispatches calls to servercfgd register functions.""" + self.set_options(var_options=variables, direct=kwargs) + servercfgd_host = self.get_option('servercfgd_host') + display.vvv('servercfgd host address: %s' % servercfgd_host) + servercfgd = ServerProxy('http://%s:10033' % servercfgd_host) + display.vvv('servercfgd supported remote calls: %s' % servercfgd.system.listMethods()) + for fname in terms: + if fname == 'init_connection_db': + servercfgd.init_connection_db() + elif fname == 'provision_connection_db': + conn_graph_file_content = str(self.get_option('conn_graph_file_content')) + enforce_provision = self.get_option('enforce_provision') + if not conn_graph_file_content: + raise AnsibleError("'conn_graph_file_content' is required for %s" % fname) + servercfgd.provision_connection_db(conn_graph_file_content, enforce_provision) + elif fname == 'register_scripts': + for script in self.get_option('scripts'): + script_name = os.path.splitext(os.path.basename(script))[0] + script_content = open(script).read() + servercfgd.register_script(script_name, script_content) + else: + raise AnsibleError('%s unsupported by servercfgd.' % fname) diff --git a/ansible/roles/connection_db/files/add_phy_link.lua b/ansible/roles/connection_db/files/add_phy_link.lua new file mode 100644 index 00000000000..6b802b59541 --- /dev/null +++ b/ansible/roles/connection_db/files/add_phy_link.lua @@ -0,0 +1,29 @@ +-- KEYS[1] - start device port list key +-- KEYS[2] - start device port table key +-- KEYS[3] - end device port list key +-- KEYS[4] - end device port table key +-- ARGV[1] - start device +-- ARGV[2] - start port +-- ARGV[3] - end device +-- ARGV[4] - end port +-- ARGV[5] - band width +-- ARGV[6] - vlan mode + +local start_device = ARGV[1] +local start_port = ARGV[2] +local end_device = ARGV[3] +local end_port = ARGV[4] +local bandwidth = ARGV[5] +local vlan_mode = ARGV[6] +local endport0 = start_device .. ':' .. start_port +local endport1 = end_device .. ':' .. end_port + +local link_detail = string.format('%s:%s <--%s, %s--> %s:%s', start_device, start_port, vlan_mode, bandwidth, end_device, end_port) + +redis.log(redis.LOG_NOTICE, 'Add physical link, details: ' .. link_detail) +redis.call('SADD', KEYS[1], start_port) +redis.call('SADD', KEYS[3], end_port) +redis.call('HSET', KEYS[2], unpack{'BandWidth', bandwidth, 'PhyPeerPort', endport1, 'VlanType', vlan_mode}) +redis.call('HSET', KEYS[4], unpack{'BandWidth', bandwidth, 'PhyPeerPort', endport0, 'VlanType', vlan_mode}) + +return redis.status_reply("Finish adding physical link: " .. link_detail) diff --git a/ansible/roles/connection_db/files/add_server.lua b/ansible/roles/connection_db/files/add_server.lua new file mode 100644 index 00000000000..4f3acf256c2 --- /dev/null +++ b/ansible/roles/connection_db/files/add_server.lua @@ -0,0 +1,15 @@ +-- KEYS[1] - server table key +-- ARGV[1] - JSONified dictinary contains server meta + +redis.log(redis.LOG_NOTICE, 'Add server: ' .. ARGV[1]) +local device_table_name = KEYS[1] +local device_meta = cjson.decode(ARGV[1]) +local payload = {'HwSku', device_meta['HwSku'], 'ServerStatus', 'active'} + +if device_meta['ManagementIp'] then + table.insert(payload, 'ManagementIp') + table.insert(payload, device_meta['ManagementIp']) +end + +redis.call('HSET', device_table_name, unpack(payload)) +return redis.status_reply('Finish adding server: ' .. ARGV[1]) diff --git a/ansible/roles/connection_db/files/add_switch.lua b/ansible/roles/connection_db/files/add_switch.lua new file mode 100644 index 00000000000..b7d844f2e43 --- /dev/null +++ b/ansible/roles/connection_db/files/add_switch.lua @@ -0,0 +1,34 @@ +-- KEYS[1] - switch table key +-- KEYS[2] - DUT list key +-- ARGV[1] - JSONified dictinary contains switch meta + +redis.log(redis.LOG_NOTICE, 'Add switch: ' .. ARGV[1]) +local switch_table_name = KEYS[1] +local dut_list_name = KEYS[2] +local switch_meta = cjson.decode(ARGV[1]) +local switch_type = switch_meta['Type'] +local payload = {'HwSku', switch_meta['HwSku']} + +if switch_meta['ManagementIp'] then + table.insert(payload, 'ManagementIp') + table.insert(payload, switch_meta['ManagementIp']) +end + +if string.find(switch_type, 'FanoutLeaf') then + table.insert(payload, 'Type') + table.insert(payload, 'leaf_fanout') +elseif string.find(switch_type, 'FanoutRoot') then + table.insert(payload, 'Type') + table.insert(payload, 'root_fanout') +elseif switch_type == 'DevSonic' then + table.insert(payload, 'Type') + table.insert(payload, 'dev_sonic') + table.insert(payload, 'ProvisionStatus') + table.insert(payload, 'not_provisioned') + redis.call('SADD', dut_list_name, switch_meta['Hostname']) +else + return redis.error_reply('Unsupported device: ' .. ARGV[1]) +end + +redis.call('HSET', switch_table_name, unpack(payload)) +return redis.status_reply('Finish adding switch: ' .. ARGV[1]) diff --git a/ansible/roles/connection_db/files/cleanup.lua b/ansible/roles/connection_db/files/cleanup.lua new file mode 100644 index 00000000000..43a0fbeb8f9 --- /dev/null +++ b/ansible/roles/connection_db/files/cleanup.lua @@ -0,0 +1,13 @@ +-- ARGV[1:] glob-style key pattern to remove + +local result = 0 +for i = 1, #ARGV, 1 do + local matches = redis.call('KEYS', ARGV[i]) + if next(matches) ~= nil then + for _, key in ipairs(matches) do + result = result + redis.call('DEL', key) + end + end +end + +return result diff --git a/ansible/roles/connection_db/files/redis.conf b/ansible/roles/connection_db/files/redis.conf new file mode 100644 index 00000000000..4571414de0a --- /dev/null +++ b/ansible/roles/connection_db/files/redis.conf @@ -0,0 +1,1316 @@ +# Redis configuration file example. +# +# Note that in order to read the configuration file, Redis must be +# started with the file path as first argument: +# +# ./redis-server /path/to/redis.conf + +# Note on units: when memory size is needed, it is possible to specify +# it in the usual form of 1k 5GB 4M and so forth: +# +# 1k => 1000 bytes +# 1kb => 1024 bytes +# 1m => 1000000 bytes +# 1mb => 1024*1024 bytes +# 1g => 1000000000 bytes +# 1gb => 1024*1024*1024 bytes +# +# units are case insensitive so 1GB 1Gb 1gB are all the same. + +################################## INCLUDES ################################### + +# Include one or more other config files here. This is useful if you +# have a standard template that goes to all Redis servers but also need +# to customize a few per-server settings. Include files can include +# other files, so use this wisely. +# +# Notice option "include" won't be rewritten by command "CONFIG REWRITE" +# from admin or Redis Sentinel. Since Redis always uses the last processed +# line as value of a configuration directive, you'd better put includes +# at the beginning of this file to avoid overwriting config change at runtime. +# +# If instead you are interested in using includes to override configuration +# options, it is better to use include as the last line. +# +# include /path/to/local.conf +# include /path/to/other.conf + +################################## MODULES ##################################### + +# Load modules at startup. If the server is not able to load modules +# it will abort. It is possible to use multiple loadmodule directives. +# +# loadmodule /path/to/my_module.so +# loadmodule /path/to/other_module.so + +################################## NETWORK ##################################### + +# By default, if no "bind" configuration directive is specified, Redis listens +# for connections from all the network interfaces available on the server. +# It is possible to listen to just one or multiple selected interfaces using +# the "bind" configuration directive, followed by one or more IP addresses. +# +# Examples: +# +# bind 192.168.1.100 10.0.0.1 +# bind 127.0.0.1 ::1 +# +# ~~~ WARNING ~~~ If the computer running Redis is directly exposed to the +# internet, binding to all the interfaces is dangerous and will expose the +# instance to everybody on the internet. So by default we uncomment the +# following bind directive, that will force Redis to listen only into +# the IPv4 lookback interface address (this means Redis will be able to +# accept connections only from clients running into the same computer it +# is running). +# +# IF YOU ARE SURE YOU WANT YOUR INSTANCE TO LISTEN TO ALL THE INTERFACES +# JUST COMMENT THE FOLLOWING LINE. +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +bind 0.0.0.0 + +# Protected mode is a layer of security protection, in order to avoid that +# Redis instances left open on the internet are accessed and exploited. +# +# When protected mode is on and if: +# +# 1) The server is not binding explicitly to a set of addresses using the +# "bind" directive. +# 2) No password is configured. +# +# The server only accepts connections from clients connecting from the +# IPv4 and IPv6 loopback addresses 127.0.0.1 and ::1, and from Unix domain +# sockets. +# +# By default protected mode is enabled. You should disable it only if +# you are sure you want clients from other hosts to connect to Redis +# even if no authentication is configured, nor a specific set of interfaces +# are explicitly listed using the "bind" directive. +protected-mode no + +# Accept connections on the specified port, default is 6379 (IANA #815344). +# If port 0 is specified Redis will not listen on a TCP socket. +port 6379 + +# TCP listen() backlog. +# +# In high requests-per-second environments you need an high backlog in order +# to avoid slow clients connections issues. Note that the Linux kernel +# will silently truncate it to the value of /proc/sys/net/core/somaxconn so +# make sure to raise both the value of somaxconn and tcp_max_syn_backlog +# in order to get the desired effect. +tcp-backlog 511 + +# Unix socket. +# +# Specify the path for the Unix socket that will be used to listen for +# incoming connections. There is no default, so Redis will not listen +# on a unix socket when not specified. +# +# unixsocket /var/run/redis/redis-server.sock +# unixsocketperm 700 + +# Close the connection after a client is idle for N seconds (0 to disable) +timeout 0 + +# TCP keepalive. +# +# If non-zero, use SO_KEEPALIVE to send TCP ACKs to clients in absence +# of communication. This is useful for two reasons: +# +# 1) Detect dead peers. +# 2) Take the connection alive from the point of view of network +# equipment in the middle. +# +# On Linux, the specified value (in seconds) is the period used to send ACKs. +# Note that to close the connection the double of the time is needed. +# On other kernels the period depends on the kernel configuration. +# +# A reasonable value for this option is 300 seconds, which is the new +# Redis default starting with Redis 3.2.1. +tcp-keepalive 300 + +################################# GENERAL ##################################### + +# By default Redis does not run as a daemon. Use 'yes' if you need it. +# Note that Redis will write a pid file in /var/run/redis.pid when daemonized. +daemonize yes + +# If you run Redis from upstart or systemd, Redis can interact with your +# supervision tree. Options: +# supervised no - no supervision interaction +# supervised upstart - signal upstart by putting Redis into SIGSTOP mode +# supervised systemd - signal systemd by writing READY=1 to $NOTIFY_SOCKET +# supervised auto - detect upstart or systemd method based on +# UPSTART_JOB or NOTIFY_SOCKET environment variables +# Note: these supervision methods only signal "process is ready." +# They do not enable continuous liveness pings back to your supervisor. +supervised no + +# If a pid file is specified, Redis writes it where specified at startup +# and removes it at exit. +# +# When the server runs non daemonized, no pid file is created if none is +# specified in the configuration. When the server is daemonized, the pid file +# is used even if not specified, defaulting to "/var/run/redis.pid". +# +# Creating a pid file is best effort: if Redis is not able to create it +# nothing bad happens, the server will start and run normally. +pidfile /var/run/redis/redis-server.pid + +# Specify the server verbosity level. +# This can be one of: +# debug (a lot of information, useful for development/testing) +# verbose (many rarely useful info, but not a mess like the debug level) +# notice (moderately verbose, what you want in production probably) +# warning (only very important / critical messages are logged) +loglevel verbose + +# Specify the log file name. Also the empty string can be used to force +# Redis to log on the standard output. Note that if you use standard +# output for logging but daemonize, logs will be sent to /dev/null +logfile /var/log/redis/redis-server.log + +# To enable logging to the system logger, just set 'syslog-enabled' to yes, +# and optionally update the other syslog parameters to suit your needs. +# syslog-enabled no + +# Specify the syslog identity. +# syslog-ident redis + +# Specify the syslog facility. Must be USER or between LOCAL0-LOCAL7. +# syslog-facility local0 + +# Set the number of databases. The default database is DB 0, you can select +# a different one on a per-connection basis using SELECT where +# dbid is a number between 0 and 'databases'-1 +databases 16 + +# By default Redis shows an ASCII art logo only when started to log to the +# standard output and if the standard output is a TTY. Basically this means +# that normally a logo is displayed only in interactive sessions. +# +# However it is possible to force the pre-4.0 behavior and always show a +# ASCII art logo in startup logs by setting the following option to yes. +always-show-logo yes + +################################ SNAPSHOTTING ################################ +# +# Save the DB on disk: +# +# save +# +# Will save the DB if both the given number of seconds and the given +# number of write operations against the DB occurred. +# +# In the example below the behaviour will be to save: +# after 900 sec (15 min) if at least 1 key changed +# after 300 sec (5 min) if at least 10 keys changed +# after 60 sec if at least 10000 keys changed +# +# Note: you can disable saving completely by commenting out all "save" lines. +# +# It is also possible to remove all the previously configured save +# points by adding a save directive with a single empty string argument +# like in the following example: +# +# save "" + +save 900 1 +save 300 10 +save 60 10000 + +# By default Redis will stop accepting writes if RDB snapshots are enabled +# (at least one save point) and the latest background save failed. +# This will make the user aware (in a hard way) that data is not persisting +# on disk properly, otherwise chances are that no one will notice and some +# disaster will happen. +# +# If the background saving process will start working again Redis will +# automatically allow writes again. +# +# However if you have setup your proper monitoring of the Redis server +# and persistence, you may want to disable this feature so that Redis will +# continue to work as usual even if there are problems with disk, +# permissions, and so forth. +stop-writes-on-bgsave-error yes + +# Compress string objects using LZF when dump .rdb databases? +# For default that's set to 'yes' as it's almost always a win. +# If you want to save some CPU in the saving child set it to 'no' but +# the dataset will likely be bigger if you have compressible values or keys. +rdbcompression no + +# Since version 5 of RDB a CRC64 checksum is placed at the end of the file. +# This makes the format more resistant to corruption but there is a performance +# hit to pay (around 10%) when saving and loading RDB files, so you can disable it +# for maximum performances. +# +# RDB files created with checksum disabled have a checksum of zero that will +# tell the loading code to skip the check. +rdbchecksum yes + +# The filename where to dump the DB +dbfilename dump.rdb + +# The working directory. +# +# The DB will be written inside this directory, with the filename specified +# above using the 'dbfilename' configuration directive. +# +# The Append Only File will also be created inside this directory. +# +# Note that you must specify a directory here, not a file name. +dir /var/lib/redis + +################################# REPLICATION ################################# + +# Master-Slave replication. Use slaveof to make a Redis instance a copy of +# another Redis server. A few things to understand ASAP about Redis replication. +# +# 1) Redis replication is asynchronous, but you can configure a master to +# stop accepting writes if it appears to be not connected with at least +# a given number of slaves. +# 2) Redis slaves are able to perform a partial resynchronization with the +# master if the replication link is lost for a relatively small amount of +# time. You may want to configure the replication backlog size (see the next +# sections of this file) with a sensible value depending on your needs. +# 3) Replication is automatic and does not need user intervention. After a +# network partition slaves automatically try to reconnect to masters +# and resynchronize with them. +# +# slaveof + +# If the master is password protected (using the "requirepass" configuration +# directive below) it is possible to tell the slave to authenticate before +# starting the replication synchronization process, otherwise the master will +# refuse the slave request. +# +# masterauth + +# When a slave loses its connection with the master, or when the replication +# is still in progress, the slave can act in two different ways: +# +# 1) if slave-serve-stale-data is set to 'yes' (the default) the slave will +# still reply to client requests, possibly with out of date data, or the +# data set may just be empty if this is the first synchronization. +# +# 2) if slave-serve-stale-data is set to 'no' the slave will reply with +# an error "SYNC with master in progress" to all the kind of commands +# but to INFO and SLAVEOF. +# +slave-serve-stale-data yes + +# You can configure a slave instance to accept writes or not. Writing against +# a slave instance may be useful to store some ephemeral data (because data +# written on a slave will be easily deleted after resync with the master) but +# may also cause problems if clients are writing to it because of a +# misconfiguration. +# +# Since Redis 2.6 by default slaves are read-only. +# +# Note: read only slaves are not designed to be exposed to untrusted clients +# on the internet. It's just a protection layer against misuse of the instance. +# Still a read only slave exports by default all the administrative commands +# such as CONFIG, DEBUG, and so forth. To a limited extent you can improve +# security of read only slaves using 'rename-command' to shadow all the +# administrative / dangerous commands. +slave-read-only yes + +# Replication SYNC strategy: disk or socket. +# +# ------------------------------------------------------- +# WARNING: DISKLESS REPLICATION IS EXPERIMENTAL CURRENTLY +# ------------------------------------------------------- +# +# New slaves and reconnecting slaves that are not able to continue the replication +# process just receiving differences, need to do what is called a "full +# synchronization". An RDB file is transmitted from the master to the slaves. +# The transmission can happen in two different ways: +# +# 1) Disk-backed: The Redis master creates a new process that writes the RDB +# file on disk. Later the file is transferred by the parent +# process to the slaves incrementally. +# 2) Diskless: The Redis master creates a new process that directly writes the +# RDB file to slave sockets, without touching the disk at all. +# +# With disk-backed replication, while the RDB file is generated, more slaves +# can be queued and served with the RDB file as soon as the current child producing +# the RDB file finishes its work. With diskless replication instead once +# the transfer starts, new slaves arriving will be queued and a new transfer +# will start when the current one terminates. +# +# When diskless replication is used, the master waits a configurable amount of +# time (in seconds) before starting the transfer in the hope that multiple slaves +# will arrive and the transfer can be parallelized. +# +# With slow disks and fast (large bandwidth) networks, diskless replication +# works better. +repl-diskless-sync no + +# When diskless replication is enabled, it is possible to configure the delay +# the server waits in order to spawn the child that transfers the RDB via socket +# to the slaves. +# +# This is important since once the transfer starts, it is not possible to serve +# new slaves arriving, that will be queued for the next RDB transfer, so the server +# waits a delay in order to let more slaves arrive. +# +# The delay is specified in seconds, and by default is 5 seconds. To disable +# it entirely just set it to 0 seconds and the transfer will start ASAP. +repl-diskless-sync-delay 5 + +# Slaves send PINGs to server in a predefined interval. It's possible to change +# this interval with the repl_ping_slave_period option. The default value is 10 +# seconds. +# +# repl-ping-slave-period 10 + +# The following option sets the replication timeout for: +# +# 1) Bulk transfer I/O during SYNC, from the point of view of slave. +# 2) Master timeout from the point of view of slaves (data, pings). +# 3) Slave timeout from the point of view of masters (REPLCONF ACK pings). +# +# It is important to make sure that this value is greater than the value +# specified for repl-ping-slave-period otherwise a timeout will be detected +# every time there is low traffic between the master and the slave. +# +# repl-timeout 60 + +# Disable TCP_NODELAY on the slave socket after SYNC? +# +# If you select "yes" Redis will use a smaller number of TCP packets and +# less bandwidth to send data to slaves. But this can add a delay for +# the data to appear on the slave side, up to 40 milliseconds with +# Linux kernels using a default configuration. +# +# If you select "no" the delay for data to appear on the slave side will +# be reduced but more bandwidth will be used for replication. +# +# By default we optimize for low latency, but in very high traffic conditions +# or when the master and slaves are many hops away, turning this to "yes" may +# be a good idea. +repl-disable-tcp-nodelay no + +# Set the replication backlog size. The backlog is a buffer that accumulates +# slave data when slaves are disconnected for some time, so that when a slave +# wants to reconnect again, often a full resync is not needed, but a partial +# resync is enough, just passing the portion of data the slave missed while +# disconnected. +# +# The bigger the replication backlog, the longer the time the slave can be +# disconnected and later be able to perform a partial resynchronization. +# +# The backlog is only allocated once there is at least a slave connected. +# +# repl-backlog-size 1mb + +# After a master has no longer connected slaves for some time, the backlog +# will be freed. The following option configures the amount of seconds that +# need to elapse, starting from the time the last slave disconnected, for +# the backlog buffer to be freed. +# +# Note that slaves never free the backlog for timeout, since they may be +# promoted to masters later, and should be able to correctly "partially +# resynchronize" with the slaves: hence they should always accumulate backlog. +# +# A value of 0 means to never release the backlog. +# +# repl-backlog-ttl 3600 + +# The slave priority is an integer number published by Redis in the INFO output. +# It is used by Redis Sentinel in order to select a slave to promote into a +# master if the master is no longer working correctly. +# +# A slave with a low priority number is considered better for promotion, so +# for instance if there are three slaves with priority 10, 100, 25 Sentinel will +# pick the one with priority 10, that is the lowest. +# +# However a special priority of 0 marks the slave as not able to perform the +# role of master, so a slave with priority of 0 will never be selected by +# Redis Sentinel for promotion. +# +# By default the priority is 100. +slave-priority 100 + +# It is possible for a master to stop accepting writes if there are less than +# N slaves connected, having a lag less or equal than M seconds. +# +# The N slaves need to be in "online" state. +# +# The lag in seconds, that must be <= the specified value, is calculated from +# the last ping received from the slave, that is usually sent every second. +# +# This option does not GUARANTEE that N replicas will accept the write, but +# will limit the window of exposure for lost writes in case not enough slaves +# are available, to the specified number of seconds. +# +# For example to require at least 3 slaves with a lag <= 10 seconds use: +# +# min-slaves-to-write 3 +# min-slaves-max-lag 10 +# +# Setting one or the other to 0 disables the feature. +# +# By default min-slaves-to-write is set to 0 (feature disabled) and +# min-slaves-max-lag is set to 10. + +# A Redis master is able to list the address and port of the attached +# slaves in different ways. For example the "INFO replication" section +# offers this information, which is used, among other tools, by +# Redis Sentinel in order to discover slave instances. +# Another place where this info is available is in the output of the +# "ROLE" command of a master. +# +# The listed IP and address normally reported by a slave is obtained +# in the following way: +# +# IP: The address is auto detected by checking the peer address +# of the socket used by the slave to connect with the master. +# +# Port: The port is communicated by the slave during the replication +# handshake, and is normally the port that the slave is using to +# list for connections. +# +# However when port forwarding or Network Address Translation (NAT) is +# used, the slave may be actually reachable via different IP and port +# pairs. The following two options can be used by a slave in order to +# report to its master a specific set of IP and port, so that both INFO +# and ROLE will report those values. +# +# There is no need to use both the options if you need to override just +# the port or the IP address. +# +# slave-announce-ip 5.5.5.5 +# slave-announce-port 1234 + +################################## SECURITY ################################### + +# Require clients to issue AUTH before processing any other +# commands. This might be useful in environments in which you do not trust +# others with access to the host running redis-server. +# +# This should stay commented out for backward compatibility and because most +# people do not need auth (e.g. they run their own servers). +# +# Warning: since Redis is pretty fast an outside user can try up to +# 150k passwords per second against a good box. This means that you should +# use a very strong password otherwise it will be very easy to break. +# +# requirepass foobared + +# Command renaming. +# +# It is possible to change the name of dangerous commands in a shared +# environment. For instance the CONFIG command may be renamed into something +# hard to guess so that it will still be available for internal-use tools +# but not available for general clients. +# +# Example: +# +# rename-command CONFIG b840fc02d524045429941cc15f59e41cb7be6c52 +# +# It is also possible to completely kill a command by renaming it into +# an empty string: +# +# rename-command CONFIG "" +# +# Please note that changing the name of commands that are logged into the +# AOF file or transmitted to slaves may cause problems. + +################################### CLIENTS #################################### + +# Set the max number of connected clients at the same time. By default +# this limit is set to 10000 clients, however if the Redis server is not +# able to configure the process file limit to allow for the specified limit +# the max number of allowed clients is set to the current file limit +# minus 32 (as Redis reserves a few file descriptors for internal uses). +# +# Once the limit is reached Redis will close all the new connections sending +# an error 'max number of clients reached'. +# +# maxclients 10000 + +############################## MEMORY MANAGEMENT ################################ + +# Set a memory usage limit to the specified amount of bytes. +# When the memory limit is reached Redis will try to remove keys +# according to the eviction policy selected (see maxmemory-policy). +# +# If Redis can't remove keys according to the policy, or if the policy is +# set to 'noeviction', Redis will start to reply with errors to commands +# that would use more memory, like SET, LPUSH, and so on, and will continue +# to reply to read-only commands like GET. +# +# This option is usually useful when using Redis as an LRU or LFU cache, or to +# set a hard memory limit for an instance (using the 'noeviction' policy). +# +# WARNING: If you have slaves attached to an instance with maxmemory on, +# the size of the output buffers needed to feed the slaves are subtracted +# from the used memory count, so that network problems / resyncs will +# not trigger a loop where keys are evicted, and in turn the output +# buffer of slaves is full with DELs of keys evicted triggering the deletion +# of more keys, and so forth until the database is completely emptied. +# +# In short... if you have slaves attached it is suggested that you set a lower +# limit for maxmemory so that there is some free RAM on the system for slave +# output buffers (but this is not needed if the policy is 'noeviction'). +# +# maxmemory + +# MAXMEMORY POLICY: how Redis will select what to remove when maxmemory +# is reached. You can select among five behaviors: +# +# volatile-lru -> Evict using approximated LRU among the keys with an expire set. +# allkeys-lru -> Evict any key using approximated LRU. +# volatile-lfu -> Evict using approximated LFU among the keys with an expire set. +# allkeys-lfu -> Evict any key using approximated LFU. +# volatile-random -> Remove a random key among the ones with an expire set. +# allkeys-random -> Remove a random key, any key. +# volatile-ttl -> Remove the key with the nearest expire time (minor TTL) +# noeviction -> Don't evict anything, just return an error on write operations. +# +# LRU means Least Recently Used +# LFU means Least Frequently Used +# +# Both LRU, LFU and volatile-ttl are implemented using approximated +# randomized algorithms. +# +# Note: with any of the above policies, Redis will return an error on write +# operations, when there are no suitable keys for eviction. +# +# At the date of writing these commands are: set setnx setex append +# incr decr rpush lpush rpushx lpushx linsert lset rpoplpush sadd +# sinter sinterstore sunion sunionstore sdiff sdiffstore zadd zincrby +# zunionstore zinterstore hset hsetnx hmset hincrby incrby decrby +# getset mset msetnx exec sort +# +# The default is: +# +# maxmemory-policy noeviction + +# LRU, LFU and minimal TTL algorithms are not precise algorithms but approximated +# algorithms (in order to save memory), so you can tune it for speed or +# accuracy. For default Redis will check five keys and pick the one that was +# used less recently, you can change the sample size using the following +# configuration directive. +# +# The default of 5 produces good enough results. 10 Approximates very closely +# true LRU but costs more CPU. 3 is faster but not very accurate. +# +# maxmemory-samples 5 + +############################# LAZY FREEING #################################### + +# Redis has two primitives to delete keys. One is called DEL and is a blocking +# deletion of the object. It means that the server stops processing new commands +# in order to reclaim all the memory associated with an object in a synchronous +# way. If the key deleted is associated with a small object, the time needed +# in order to execute the DEL command is very small and comparable to most other +# O(1) or O(log_N) commands in Redis. However if the key is associated with an +# aggregated value containing millions of elements, the server can block for +# a long time (even seconds) in order to complete the operation. +# +# For the above reasons Redis also offers non blocking deletion primitives +# such as UNLINK (non blocking DEL) and the ASYNC option of FLUSHALL and +# FLUSHDB commands, in order to reclaim memory in background. Those commands +# are executed in constant time. Another thread will incrementally free the +# object in the background as fast as possible. +# +# DEL, UNLINK and ASYNC option of FLUSHALL and FLUSHDB are user-controlled. +# It's up to the design of the application to understand when it is a good +# idea to use one or the other. However the Redis server sometimes has to +# delete keys or flush the whole database as a side effect of other operations. +# Specifically Redis deletes objects independently of a user call in the +# following scenarios: +# +# 1) On eviction, because of the maxmemory and maxmemory policy configurations, +# in order to make room for new data, without going over the specified +# memory limit. +# 2) Because of expire: when a key with an associated time to live (see the +# EXPIRE command) must be deleted from memory. +# 3) Because of a side effect of a command that stores data on a key that may +# already exist. For example the RENAME command may delete the old key +# content when it is replaced with another one. Similarly SUNIONSTORE +# or SORT with STORE option may delete existing keys. The SET command +# itself removes any old content of the specified key in order to replace +# it with the specified string. +# 4) During replication, when a slave performs a full resynchronization with +# its master, the content of the whole database is removed in order to +# load the RDB file just transfered. +# +# In all the above cases the default is to delete objects in a blocking way, +# like if DEL was called. However you can configure each case specifically +# in order to instead release memory in a non-blocking way like if UNLINK +# was called, using the following configuration directives: + +lazyfree-lazy-eviction no +lazyfree-lazy-expire no +lazyfree-lazy-server-del no +slave-lazy-flush no + +############################## APPEND ONLY MODE ############################### + +# By default Redis asynchronously dumps the dataset on disk. This mode is +# good enough in many applications, but an issue with the Redis process or +# a power outage may result into a few minutes of writes lost (depending on +# the configured save points). +# +# The Append Only File is an alternative persistence mode that provides +# much better durability. For instance using the default data fsync policy +# (see later in the config file) Redis can lose just one second of writes in a +# dramatic event like a server power outage, or a single write if something +# wrong with the Redis process itself happens, but the operating system is +# still running correctly. +# +# AOF and RDB persistence can be enabled at the same time without problems. +# If the AOF is enabled on startup Redis will load the AOF, that is the file +# with the better durability guarantees. +# +# Please check http://redis.io/topics/persistence for more information. + +appendonly yes + +# The name of the append only file (default: "appendonly.aof") + +appendfilename "appendonly.aof" + +# The fsync() call tells the Operating System to actually write data on disk +# instead of waiting for more data in the output buffer. Some OS will really flush +# data on disk, some other OS will just try to do it ASAP. +# +# Redis supports three different modes: +# +# no: don't fsync, just let the OS flush the data when it wants. Faster. +# always: fsync after every write to the append only log. Slow, Safest. +# everysec: fsync only one time every second. Compromise. +# +# The default is "everysec", as that's usually the right compromise between +# speed and data safety. It's up to you to understand if you can relax this to +# "no" that will let the operating system flush the output buffer when +# it wants, for better performances (but if you can live with the idea of +# some data loss consider the default persistence mode that's snapshotting), +# or on the contrary, use "always" that's very slow but a bit safer than +# everysec. +# +# More details please check the following article: +# http://antirez.com/post/redis-persistence-demystified.html +# +# If unsure, use "everysec". + +# appendfsync always +appendfsync everysec +# appendfsync no + +# When the AOF fsync policy is set to always or everysec, and a background +# saving process (a background save or AOF log background rewriting) is +# performing a lot of I/O against the disk, in some Linux configurations +# Redis may block too long on the fsync() call. Note that there is no fix for +# this currently, as even performing fsync in a different thread will block +# our synchronous write(2) call. +# +# In order to mitigate this problem it's possible to use the following option +# that will prevent fsync() from being called in the main process while a +# BGSAVE or BGREWRITEAOF is in progress. +# +# This means that while another child is saving, the durability of Redis is +# the same as "appendfsync none". In practical terms, this means that it is +# possible to lose up to 30 seconds of log in the worst scenario (with the +# default Linux settings). +# +# If you have latency problems turn this to "yes". Otherwise leave it as +# "no" that is the safest pick from the point of view of durability. + +no-appendfsync-on-rewrite no + +# Automatic rewrite of the append only file. +# Redis is able to automatically rewrite the log file implicitly calling +# BGREWRITEAOF when the AOF log size grows by the specified percentage. +# +# This is how it works: Redis remembers the size of the AOF file after the +# latest rewrite (if no rewrite has happened since the restart, the size of +# the AOF at startup is used). +# +# This base size is compared to the current size. If the current size is +# bigger than the specified percentage, the rewrite is triggered. Also +# you need to specify a minimal size for the AOF file to be rewritten, this +# is useful to avoid rewriting the AOF file even if the percentage increase +# is reached but it is still pretty small. +# +# Specify a percentage of zero in order to disable the automatic AOF +# rewrite feature. + +auto-aof-rewrite-percentage 100 +auto-aof-rewrite-min-size 64mb + +# An AOF file may be found to be truncated at the end during the Redis +# startup process, when the AOF data gets loaded back into memory. +# This may happen when the system where Redis is running +# crashes, especially when an ext4 filesystem is mounted without the +# data=ordered option (however this can't happen when Redis itself +# crashes or aborts but the operating system still works correctly). +# +# Redis can either exit with an error when this happens, or load as much +# data as possible (the default now) and start if the AOF file is found +# to be truncated at the end. The following option controls this behavior. +# +# If aof-load-truncated is set to yes, a truncated AOF file is loaded and +# the Redis server starts emitting a log to inform the user of the event. +# Otherwise if the option is set to no, the server aborts with an error +# and refuses to start. When the option is set to no, the user requires +# to fix the AOF file using the "redis-check-aof" utility before to restart +# the server. +# +# Note that if the AOF file will be found to be corrupted in the middle +# the server will still exit with an error. This option only applies when +# Redis will try to read more data from the AOF file but not enough bytes +# will be found. +aof-load-truncated yes + +# When rewriting the AOF file, Redis is able to use an RDB preamble in the +# AOF file for faster rewrites and recoveries. When this option is turned +# on the rewritten AOF file is composed of two different stanzas: +# +# [RDB file][AOF tail] +# +# When loading Redis recognizes that the AOF file starts with the "REDIS" +# string and loads the prefixed RDB file, and continues loading the AOF +# tail. +# +# This is currently turned off by default in order to avoid the surprise +# of a format change, but will at some point be used as the default. +aof-use-rdb-preamble no + +################################ LUA SCRIPTING ############################### + +# Max execution time of a Lua script in milliseconds. +# +# If the maximum execution time is reached Redis will log that a script is +# still in execution after the maximum allowed time and will start to +# reply to queries with an error. +# +# When a long running script exceeds the maximum execution time only the +# SCRIPT KILL and SHUTDOWN NOSAVE commands are available. The first can be +# used to stop a script that did not yet called write commands. The second +# is the only way to shut down the server in the case a write command was +# already issued by the script but the user doesn't want to wait for the natural +# termination of the script. +# +# Set it to 0 or a negative value for unlimited execution without warnings. +lua-time-limit 5000 + +################################ REDIS CLUSTER ############################### +# +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +# WARNING EXPERIMENTAL: Redis Cluster is considered to be stable code, however +# in order to mark it as "mature" we need to wait for a non trivial percentage +# of users to deploy it in production. +# ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +# +# Normal Redis instances can't be part of a Redis Cluster; only nodes that are +# started as cluster nodes can. In order to start a Redis instance as a +# cluster node enable the cluster support uncommenting the following: +# +# cluster-enabled yes + +# Every cluster node has a cluster configuration file. This file is not +# intended to be edited by hand. It is created and updated by Redis nodes. +# Every Redis Cluster node requires a different cluster configuration file. +# Make sure that instances running in the same system do not have +# overlapping cluster configuration file names. +# +# cluster-config-file nodes-6379.conf + +# Cluster node timeout is the amount of milliseconds a node must be unreachable +# for it to be considered in failure state. +# Most other internal time limits are multiple of the node timeout. +# +# cluster-node-timeout 15000 + +# A slave of a failing master will avoid to start a failover if its data +# looks too old. +# +# There is no simple way for a slave to actually have an exact measure of +# its "data age", so the following two checks are performed: +# +# 1) If there are multiple slaves able to failover, they exchange messages +# in order to try to give an advantage to the slave with the best +# replication offset (more data from the master processed). +# Slaves will try to get their rank by offset, and apply to the start +# of the failover a delay proportional to their rank. +# +# 2) Every single slave computes the time of the last interaction with +# its master. This can be the last ping or command received (if the master +# is still in the "connected" state), or the time that elapsed since the +# disconnection with the master (if the replication link is currently down). +# If the last interaction is too old, the slave will not try to failover +# at all. +# +# The point "2" can be tuned by user. Specifically a slave will not perform +# the failover if, since the last interaction with the master, the time +# elapsed is greater than: +# +# (node-timeout * slave-validity-factor) + repl-ping-slave-period +# +# So for example if node-timeout is 30 seconds, and the slave-validity-factor +# is 10, and assuming a default repl-ping-slave-period of 10 seconds, the +# slave will not try to failover if it was not able to talk with the master +# for longer than 310 seconds. +# +# A large slave-validity-factor may allow slaves with too old data to failover +# a master, while a too small value may prevent the cluster from being able to +# elect a slave at all. +# +# For maximum availability, it is possible to set the slave-validity-factor +# to a value of 0, which means, that slaves will always try to failover the +# master regardless of the last time they interacted with the master. +# (However they'll always try to apply a delay proportional to their +# offset rank). +# +# Zero is the only value able to guarantee that when all the partitions heal +# the cluster will always be able to continue. +# +# cluster-slave-validity-factor 10 + +# Cluster slaves are able to migrate to orphaned masters, that are masters +# that are left without working slaves. This improves the cluster ability +# to resist to failures as otherwise an orphaned master can't be failed over +# in case of failure if it has no working slaves. +# +# Slaves migrate to orphaned masters only if there are still at least a +# given number of other working slaves for their old master. This number +# is the "migration barrier". A migration barrier of 1 means that a slave +# will migrate only if there is at least 1 other working slave for its master +# and so forth. It usually reflects the number of slaves you want for every +# master in your cluster. +# +# Default is 1 (slaves migrate only if their masters remain with at least +# one slave). To disable migration just set it to a very large value. +# A value of 0 can be set but is useful only for debugging and dangerous +# in production. +# +# cluster-migration-barrier 1 + +# By default Redis Cluster nodes stop accepting queries if they detect there +# is at least an hash slot uncovered (no available node is serving it). +# This way if the cluster is partially down (for example a range of hash slots +# are no longer covered) all the cluster becomes, eventually, unavailable. +# It automatically returns available as soon as all the slots are covered again. +# +# However sometimes you want the subset of the cluster which is working, +# to continue to accept queries for the part of the key space that is still +# covered. In order to do so, just set the cluster-require-full-coverage +# option to no. +# +# cluster-require-full-coverage yes + +# This option, when set to yes, prevents slaves from trying to failover its +# master during master failures. However the master can still perform a +# manual failover, if forced to do so. +# +# This is useful in different scenarios, especially in the case of multiple +# data center operations, where we want one side to never be promoted if not +# in the case of a total DC failure. +# +# cluster-slave-no-failover no + +# In order to setup your cluster make sure to read the documentation +# available at http://redis.io web site. + +########################## CLUSTER DOCKER/NAT support ######################## + +# In certain deployments, Redis Cluster nodes address discovery fails, because +# addresses are NAT-ted or because ports are forwarded (the typical case is +# Docker and other containers). +# +# In order to make Redis Cluster working in such environments, a static +# configuration where each node knows its public address is needed. The +# following two options are used for this scope, and are: +# +# * cluster-announce-ip +# * cluster-announce-port +# * cluster-announce-bus-port +# +# Each instruct the node about its address, client port, and cluster message +# bus port. The information is then published in the header of the bus packets +# so that other nodes will be able to correctly map the address of the node +# publishing the information. +# +# If the above options are not used, the normal Redis Cluster auto-detection +# will be used instead. +# +# Note that when remapped, the bus port may not be at the fixed offset of +# clients port + 10000, so you can specify any port and bus-port depending +# on how they get remapped. If the bus-port is not set, a fixed offset of +# 10000 will be used as usually. +# +# Example: +# +# cluster-announce-ip 10.1.1.5 +# cluster-announce-port 6379 +# cluster-announce-bus-port 6380 + +################################## SLOW LOG ################################### + +# The Redis Slow Log is a system to log queries that exceeded a specified +# execution time. The execution time does not include the I/O operations +# like talking with the client, sending the reply and so forth, +# but just the time needed to actually execute the command (this is the only +# stage of command execution where the thread is blocked and can not serve +# other requests in the meantime). +# +# You can configure the slow log with two parameters: one tells Redis +# what is the execution time, in microseconds, to exceed in order for the +# command to get logged, and the other parameter is the length of the +# slow log. When a new command is logged the oldest one is removed from the +# queue of logged commands. + +# The following time is expressed in microseconds, so 1000000 is equivalent +# to one second. Note that a negative number disables the slow log, while +# a value of zero forces the logging of every command. +slowlog-log-slower-than 10000 + +# There is no limit to this length. Just be aware that it will consume memory. +# You can reclaim memory used by the slow log with SLOWLOG RESET. +slowlog-max-len 128 + +################################ LATENCY MONITOR ############################## + +# The Redis latency monitoring subsystem samples different operations +# at runtime in order to collect data related to possible sources of +# latency of a Redis instance. +# +# Via the LATENCY command this information is available to the user that can +# print graphs and obtain reports. +# +# The system only logs operations that were performed in a time equal or +# greater than the amount of milliseconds specified via the +# latency-monitor-threshold configuration directive. When its value is set +# to zero, the latency monitor is turned off. +# +# By default latency monitoring is disabled since it is mostly not needed +# if you don't have latency issues, and collecting data has a performance +# impact, that while very small, can be measured under big load. Latency +# monitoring can easily be enabled at runtime using the command +# "CONFIG SET latency-monitor-threshold " if needed. +latency-monitor-threshold 0 + +############################# EVENT NOTIFICATION ############################## + +# Redis can notify Pub/Sub clients about events happening in the key space. +# This feature is documented at http://redis.io/topics/notifications +# +# For instance if keyspace events notification is enabled, and a client +# performs a DEL operation on key "foo" stored in the Database 0, two +# messages will be published via Pub/Sub: +# +# PUBLISH __keyspace@0__:foo del +# PUBLISH __keyevent@0__:del foo +# +# It is possible to select the events that Redis will notify among a set +# of classes. Every class is identified by a single character: +# +# K Keyspace events, published with __keyspace@__ prefix. +# E Keyevent events, published with __keyevent@__ prefix. +# g Generic commands (non-type specific) like DEL, EXPIRE, RENAME, ... +# $ String commands +# l List commands +# s Set commands +# h Hash commands +# z Sorted set commands +# x Expired events (events generated every time a key expires) +# e Evicted events (events generated when a key is evicted for maxmemory) +# A Alias for g$lshzxe, so that the "AKE" string means all the events. +# +# The "notify-keyspace-events" takes as argument a string that is composed +# of zero or multiple characters. The empty string means that notifications +# are disabled. +# +# Example: to enable list and generic events, from the point of view of the +# event name, use: +# +# notify-keyspace-events Elg +# +# Example 2: to get the stream of the expired keys subscribing to channel +# name __keyevent@0__:expired use: +# +# notify-keyspace-events Ex +# +# By default all notifications are disabled because most users don't need +# this feature and the feature has some overhead. Note that if you don't +# specify at least one of K or E, no events will be delivered. +notify-keyspace-events "" + +############################### ADVANCED CONFIG ############################### + +# Hashes are encoded using a memory efficient data structure when they have a +# small number of entries, and the biggest entry does not exceed a given +# threshold. These thresholds can be configured using the following directives. +hash-max-ziplist-entries 512 +hash-max-ziplist-value 64 + +# Lists are also encoded in a special way to save a lot of space. +# The number of entries allowed per internal list node can be specified +# as a fixed maximum size or a maximum number of elements. +# For a fixed maximum size, use -5 through -1, meaning: +# -5: max size: 64 Kb <-- not recommended for normal workloads +# -4: max size: 32 Kb <-- not recommended +# -3: max size: 16 Kb <-- probably not recommended +# -2: max size: 8 Kb <-- good +# -1: max size: 4 Kb <-- good +# Positive numbers mean store up to _exactly_ that number of elements +# per list node. +# The highest performing option is usually -2 (8 Kb size) or -1 (4 Kb size), +# but if your use case is unique, adjust the settings as necessary. +list-max-ziplist-size -2 + +# Lists may also be compressed. +# Compress depth is the number of quicklist ziplist nodes from *each* side of +# the list to *exclude* from compression. The head and tail of the list +# are always uncompressed for fast push/pop operations. Settings are: +# 0: disable all list compression +# 1: depth 1 means "don't start compressing until after 1 node into the list, +# going from either the head or tail" +# So: [head]->node->node->...->node->[tail] +# [head], [tail] will always be uncompressed; inner nodes will compress. +# 2: [head]->[next]->node->node->...->node->[prev]->[tail] +# 2 here means: don't compress head or head->next or tail->prev or tail, +# but compress all nodes between them. +# 3: [head]->[next]->[next]->node->node->...->node->[prev]->[prev]->[tail] +# etc. +list-compress-depth 0 + +# Sets have a special encoding in just one case: when a set is composed +# of just strings that happen to be integers in radix 10 in the range +# of 64 bit signed integers. +# The following configuration setting sets the limit in the size of the +# set in order to use this special memory saving encoding. +set-max-intset-entries 512 + +# Similarly to hashes and lists, sorted sets are also specially encoded in +# order to save a lot of space. This encoding is only used when the length and +# elements of a sorted set are below the following limits: +zset-max-ziplist-entries 128 +zset-max-ziplist-value 64 + +# HyperLogLog sparse representation bytes limit. The limit includes the +# 16 bytes header. When an HyperLogLog using the sparse representation crosses +# this limit, it is converted into the dense representation. +# +# A value greater than 16000 is totally useless, since at that point the +# dense representation is more memory efficient. +# +# The suggested value is ~ 3000 in order to have the benefits of +# the space efficient encoding without slowing down too much PFADD, +# which is O(N) with the sparse encoding. The value can be raised to +# ~ 10000 when CPU is not a concern, but space is, and the data set is +# composed of many HyperLogLogs with cardinality in the 0 - 15000 range. +hll-sparse-max-bytes 3000 + +# Active rehashing uses 1 millisecond every 100 milliseconds of CPU time in +# order to help rehashing the main Redis hash table (the one mapping top-level +# keys to values). The hash table implementation Redis uses (see dict.c) +# performs a lazy rehashing: the more operation you run into a hash table +# that is rehashing, the more rehashing "steps" are performed, so if the +# server is idle the rehashing is never complete and some more memory is used +# by the hash table. +# +# The default is to use this millisecond 10 times every second in order to +# actively rehash the main dictionaries, freeing memory when possible. +# +# If unsure: +# use "activerehashing no" if you have hard latency requirements and it is +# not a good thing in your environment that Redis can reply from time to time +# to queries with 2 milliseconds delay. +# +# use "activerehashing yes" if you don't have such hard requirements but +# want to free memory asap when possible. +activerehashing yes + +# The client output buffer limits can be used to force disconnection of clients +# that are not reading data from the server fast enough for some reason (a +# common reason is that a Pub/Sub client can't consume messages as fast as the +# publisher can produce them). +# +# The limit can be set differently for the three different classes of clients: +# +# normal -> normal clients including MONITOR clients +# slave -> slave clients +# pubsub -> clients subscribed to at least one pubsub channel or pattern +# +# The syntax of every client-output-buffer-limit directive is the following: +# +# client-output-buffer-limit +# +# A client is immediately disconnected once the hard limit is reached, or if +# the soft limit is reached and remains reached for the specified number of +# seconds (continuously). +# So for instance if the hard limit is 32 megabytes and the soft limit is +# 16 megabytes / 10 seconds, the client will get disconnected immediately +# if the size of the output buffers reach 32 megabytes, but will also get +# disconnected if the client reaches 16 megabytes and continuously overcomes +# the limit for 10 seconds. +# +# By default normal clients are not limited because they don't receive data +# without asking (in a push way), but just after a request, so only +# asynchronous clients may create a scenario where data is requested faster +# than it can read. +# +# Instead there is a default limit for pubsub and slave clients, since +# subscribers and slaves receive data in a push fashion. +# +# Both the hard or the soft limit can be disabled by setting them to zero. +client-output-buffer-limit normal 0 0 0 +client-output-buffer-limit slave 256mb 64mb 60 +client-output-buffer-limit pubsub 32mb 8mb 60 + +# Client query buffers accumulate new commands. They are limited to a fixed +# amount by default in order to avoid that a protocol desynchronization (for +# instance due to a bug in the client) will lead to unbound memory usage in +# the query buffer. However you can configure it here if you have very special +# needs, such us huge multi/exec requests or alike. +# +# client-query-buffer-limit 1gb + +# In the Redis protocol, bulk requests, that are, elements representing single +# strings, are normally limited ot 512 mb. However you can change this limit +# here. +# +# proto-max-bulk-len 512mb + +# Redis calls an internal function to perform many background tasks, like +# closing connections of clients in timeout, purging expired keys that are +# never requested, and so forth. +# +# Not all tasks are performed with the same frequency, but Redis checks for +# tasks to perform according to the specified "hz" value. +# +# By default "hz" is set to 10. Raising the value will use more CPU when +# Redis is idle, but at the same time will make Redis more responsive when +# there are many keys expiring at the same time, and timeouts may be +# handled with more precision. +# +# The range is between 1 and 500, however a value over 100 is usually not +# a good idea. Most users should use the default of 10 and raise this up to +# 100 only in environments where very low latency is required. +hz 10 + +# When a child rewrites the AOF file, if the following option is enabled +# the file will be fsync-ed every 32 MB of data generated. This is useful +# in order to commit the file to the disk more incrementally and avoid +# big latency spikes. +aof-rewrite-incremental-fsync yes + +# Redis LFU eviction (see maxmemory setting) can be tuned. However it is a good +# idea to start with the default settings and only change them after investigating +# how to improve the performances and how the keys LFU change over time, which +# is possible to inspect via the OBJECT FREQ command. +# +# There are two tunable parameters in the Redis LFU implementation: the +# counter logarithm factor and the counter decay time. It is important to +# understand what the two parameters mean before changing them. +# +# The LFU counter is just 8 bits per key, it's maximum value is 255, so Redis +# uses a probabilistic increment with logarithmic behavior. Given the value +# of the old counter, when a key is accessed, the counter is incremented in +# this way: +# +# 1. A random number R between 0 and 1 is extracted. +# 2. A probability P is calculated as 1/(old_value*lfu_log_factor+1). +# 3. The counter is incremented only if R < P. +# +# The default lfu-log-factor is 10. This is a table of how the frequency +# counter changes with a different number of accesses with different +# logarithmic factors: +# +# +--------+------------+------------+------------+------------+------------+ +# | factor | 100 hits | 1000 hits | 100K hits | 1M hits | 10M hits | +# +--------+------------+------------+------------+------------+------------+ +# | 0 | 104 | 255 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 1 | 18 | 49 | 255 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 10 | 10 | 18 | 142 | 255 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# | 100 | 8 | 11 | 49 | 143 | 255 | +# +--------+------------+------------+------------+------------+------------+ +# +# NOTE: The above table was obtained by running the following commands: +# +# redis-benchmark -n 1000000 incr foo +# redis-cli object freq foo +# +# NOTE 2: The counter initial value is 5 in order to give new objects a chance +# to accumulate hits. +# +# The counter decay time is the time, in minutes, that must elapse in order +# for the key counter to be divided by two (or decremented if it has a value +# less <= 10). +# +# The default value for the lfu-decay-time is 1. A Special value of 0 means to +# decay the counter every time it happens to be scanned. +# +# lfu-log-factor 10 +# lfu-decay-time 1 + +########################### ACTIVE DEFRAGMENTATION ####################### +# +# WARNING THIS FEATURE IS EXPERIMENTAL. However it was stress tested +# even in production and manually tested by multiple engineers for some +# time. +# +# What is active defragmentation? +# ------------------------------- +# +# Active (online) defragmentation allows a Redis server to compact the +# spaces left between small allocations and deallocations of data in memory, +# thus allowing to reclaim back memory. +# +# Fragmentation is a natural process that happens with every allocator (but +# less so with Jemalloc, fortunately) and certain workloads. Normally a server +# restart is needed in order to lower the fragmentation, or at least to flush +# away all the data and create it again. However thanks to this feature +# implemented by Oran Agra for Redis 4.0 this process can happen at runtime +# in an "hot" way, while the server is running. +# +# Basically when the fragmentation is over a certain level (see the +# configuration options below) Redis will start to create new copies of the +# values in contiguous memory regions by exploiting certain specific Jemalloc +# features (in order to understand if an allocation is causing fragmentation +# and to allocate it in a better place), and at the same time, will release the +# old copies of the data. This process, repeated incrementally for all the keys +# will cause the fragmentation to drop back to normal values. +# +# Important things to understand: +# +# 1. This feature is disabled by default, and only works if you compiled Redis +# to use the copy of Jemalloc we ship with the source code of Redis. +# This is the default with Linux builds. +# +# 2. You never need to enable this feature if you don't have fragmentation +# issues. +# +# 3. Once you experience fragmentation, you can enable this feature when +# needed with the command "CONFIG SET activedefrag yes". +# +# The configuration parameters are able to fine tune the behavior of the +# defragmentation process. If you are not sure about what they mean it is +# a good idea to leave the defaults untouched. + +# Enabled active defragmentation +# activedefrag yes + +# Minimum amount of fragmentation waste to start active defrag +# active-defrag-ignore-bytes 100mb + +# Minimum percentage of fragmentation to start active defrag +# active-defrag-threshold-lower 10 + +# Maximum percentage of fragmentation at which we use maximum effort +# active-defrag-threshold-upper 100 + +# Minimal effort for defrag in CPU percentage +# active-defrag-cycle-min 25 + +# Maximal effort for defrag in CPU percentage +# active-defrag-cycle-max 75 diff --git a/ansible/roles/connection_db/files/servercfgd.py b/ansible/roles/connection_db/files/servercfgd.py new file mode 100644 index 00000000000..afeda4bf37f --- /dev/null +++ b/ansible/roles/connection_db/files/servercfgd.py @@ -0,0 +1,242 @@ +#!/usr/bin/env python3 +import hashlib +import json +import logging +import redis +import sys +import time +import xml.etree.ElementTree as ET + +from socketserver import ThreadingMixIn +from xmlrpc.server import SimpleXMLRPCServer + + +class _LoggerWriter(object): + + def __init__(self, writer): + self.writer = writer + + def write(self, message): + for line in message.splitlines(): + line = line.strip() + if line: + self.writer(line) + + def flush(self): + pass + + +logging.basicConfig( + filename='/tmp/servercfgd.log', + level=logging.DEBUG, + format='%(asctime)s %(levelname)s %(message)s' +) + +logger = logging.getLogger('servercfgd') +sys.stderr = _LoggerWriter(logger.debug) +sys.stdout = _LoggerWriter(logger.info) + + +class ThreadedSimpleXMLRPCServer(ThreadingMixIn, SimpleXMLRPCServer): + pass + + +DB_PROVISON_LOCK = "LOCK:db_provision" +REDIS_DB_CONN_POOL = None +DB_SCRIPTS = {} + + +def _get_db_conn(): + global REDIS_DB_CONN_POOL + if not REDIS_DB_CONN_POOL: + REDIS_DB_CONN_POOL = redis.ConnectionPool( + host='127.0.0.1', port=6379, decode_responses=True + ) + return redis.Redis(connection_pool=REDIS_DB_CONN_POOL) + + +def register_script(script_name, script_content): + """Register Lua script to connection_db.""" + logging.info('Register script %s', script_name) + conn = _get_db_conn() + conn.script_load(script_content) + script = conn.register_script(script_content) + script.name = script_name + DB_SCRIPTS[script_name] = script + + +def get_scripts(): + """Return the script name to SHA1 hash mapping.""" + return {name: script.sha for name, script in DB_SCRIPTS.items()} + + +def init_connection_db(): + """Initialize the connection db.""" + logging.info('Initialize connection db') + conn = _get_db_conn() + db_state = conn.hget('DB_META', 'DBState') + if db_state and db_state != 'down': + raise RuntimeError('Connection db had been initialized') + conn.hset('DB_META', mapping={'DBState': 'down'}) + + +def provision_connection_db(conn_graph_file_data, enforce_provision=False): + """ + Provision connection db based on devices and links in connection graph. + + @param conn_graph_file_data: connection graph xml file content + @param enforce_provision: True to provision even with dated connection graph file + """ + + def _convert_vlan_str_to_lst(vlan_str): + vlans = [] + if not vlan_str: + return vlans + for vlan_range in vlan_str.split(','): + if vlan_range.isdigit(): + vlans.append(int(vlan_range)) + elif '-' in vlan_range: + start, end = [int(_.strip()) for _ in vlan_range.split('-')[:2]] + vlans.extend(list(range(start, end + 1))) + else: + raise ValueError('Unable to convert %s' % vlan_str) + return vlans + + conn_graph_file_hash = hashlib.md5(conn_graph_file_data.encode()).hexdigest() + graph_xml_root = ET.fromstring(conn_graph_file_data) + + logging.info("Provision connection db based on connection graph file: %s", conn_graph_file_hash) + + conn = _get_db_conn() + lock = redis.lock.Lock(conn, DB_PROVISON_LOCK) + + if not lock.acquire(blocking=True, blocking_timeout=10): + raise RuntimeError('Failed to acquire db provision lock.') + + try: + conn.hset('DB_META', mapping={'server_state': 'provisioning'}) + + hashes = conn.zrange('DB_CONNECTION_GRAPH_VERSIONS', 0, -1) + logging.debug('DB_CONNECTION_GRAPH_VERSIONS: %s', hashes) + if not enforce_provision and conn_graph_file_hash in hashes: + raise ValueError('Dated connection graph file to provision connection_db') + + pipe = conn.pipeline(transaction=True) + DB_SCRIPTS['cleanup'](args=['*_TABLE*', '*_LIST*', '*_SET*'], client=pipe) + + devices = {} + for device in graph_xml_root.iter('Device'): + device = device.attrib + devices[device['Hostname']] = device + + # collect management IP from DevicesL3Info + for device in graph_xml_root.iter('DevicesL3Info'): + devinfo = device.attrib + for mgmt_iface in device.iter('ManagementIPInterface'): + ifaceinfo = mgmt_iface.attrib + if ifaceinfo['Name'] == 'ManagementIp': + devices[devinfo['Hostname']]['ManagementIp'] = ifaceinfo['Prefix'] + + for devname, device in devices.items(): + devtype = device['Type'] + device = json.dumps(device) + if devtype == 'Server': + device_table = 'SERVER_TABLE' + ':' + devname + DB_SCRIPTS['add_server'](keys=[device_table], args=[device], client=pipe) + else: + device_table = 'SWITCH_TABLE' + ':' + devname + DB_SCRIPTS['add_switch'](keys=[device_table, 'DUT_LIST'], args=[device], client=pipe) + + vlans = {} + for device in graph_xml_root.iter('DevicesL2Info'): + hostname = device.attrib['Hostname'] + vlans.setdefault(hostname, {}) + for iface in device.iter('InterfaceVlan'): + ifaceinfo = iface.attrib + vlans[hostname][ifaceinfo['portname']] = ( + ifaceinfo['mode'], + _convert_vlan_str_to_lst(ifaceinfo['vlanids']) + ) + + for link in graph_xml_root.iter('DeviceInterfaceLink'): + link = link.attrib + sd, sp = link['StartDevice'], link['StartPort'] + ed, ep = link['EndDevice'], link['EndPort'] + start_dev_port_list = 'PORT_LIST' + ':' + sd + end_dev_port_list = 'PORT_LIST' + ':' + ed + start_dev_port_table = 'PORT_TABLE' + ':' + sd + ':' + sp + end_dev_port_table = 'PORT_TABLE' + ':' + ed + ':' + ep + if sp in vlans.get(sd, {}): + vlan_mode = vlans[sd][sp][0] + elif ep in vlans.get(ed, {}): + vlan_mode = vlans[ed][ep][0] + else: + raise ValueError('No vlan mode set for link: %s' % link) + + DB_SCRIPTS['add_phy_link']( + keys=[ + start_dev_port_list, + start_dev_port_table, + end_dev_port_list, + end_dev_port_table + ], + args=[ + sd, + sp, + ed, + ep, + link['BandWidth'], + vlan_mode + ], + client=pipe + ) + + for device in vlans: + for port in vlans[device]: + vlan_list_name = 'VLAN_LIST' + ':' + device + ':' + port + DB_SCRIPTS['update_vlanid']( + keys=[ + 'USED_VLANIDPOOL_SET', + vlan_list_name + ], + args=vlans[device][port][1], + client=pipe + ) + + pipe.zadd( + 'DB_CONNECTION_GRAPH_VERSIONS', + mapping={conn_graph_file_hash: time.time()} + ) + # trim the hashes to keep only 20 entries + pipe.zremrangebyrank('DB_CONNECTION_GRAPH_VERSIONS', 0, -21) + pipe.execute() + except Exception: + logging.exception("Provision db failed, mark db as 'down'.") + conn.hset('DB_META', mapping={'server_state': 'down'}) + raise + else: + logging.info("Provision done, mark db as 'active'.") + conn.hset('DB_META', mapping={'server_state': 'active'}) + finally: + lock.release() + + +if __name__ == '__main__': + print('Starting servercfgd...') + with ThreadedSimpleXMLRPCServer( + ('0.0.0.0', 10033), + logRequests=True, + allow_none=True + ) as server: + server.register_introspection_functions() + + server.register_function(register_script) + server.register_function(get_scripts) + server.register_function(init_connection_db) + server.register_function(provision_connection_db) + + try: + server.serve_forever() + except Exception as e: + print('\nException %s received, exiting...' % repr(e)) + sys.exit(0) diff --git a/ansible/roles/connection_db/files/update_vlanid.lua b/ansible/roles/connection_db/files/update_vlanid.lua new file mode 100644 index 00000000000..e5751eebd4e --- /dev/null +++ b/ansible/roles/connection_db/files/update_vlanid.lua @@ -0,0 +1,36 @@ +-- KEYS[1] - vlan id pool set key +-- KEYS[2] - port vlan list key +-- ARGV[1:] - assigned vlan ids + +local used_vlanidpool = KEYS[1] +local vlan_list_name = KEYS[2] + +local _old_vlanids = redis.call('SMEMBERS', vlan_list_name) +local old_vlanids = {} +for i, v in ipairs(_old_vlanids) do + old_vlanids[v] = true +end + +local new_vlanids = {} +for i = 1, #ARGV, 1 do + table.insert(new_vlanids, ARGV[i]) + if old_vlanids[ARGV[i]] then + old_vlanids[ARGV[i]] = nil + end +end + +local free_vlanids = {} +for k, _ in pairs(old_vlanids) do + table.insert(free_vlanids, k) +end + +redis.call('DEL', vlan_list_name) + +if next(new_vlanids) ~= nil then + redis.call('SADD', vlan_list_name, unpack(new_vlanids)) + redis.call('SADD', used_vlanidpool, unpack(new_vlanids)) +end + +if next(free_vlanids) ~= nil then + redis.call('SREM', used_vlanidpool, unpack(free_vlanids)) +end diff --git a/ansible/roles/connection_db/handlers/main.yml b/ansible/roles/connection_db/handlers/main.yml new file mode 100644 index 00000000000..85a4fbb2e9a --- /dev/null +++ b/ansible/roles/connection_db/handlers/main.yml @@ -0,0 +1,19 @@ +--- +- name: Restart Redis + service: + name: "redis" + state: restarted + enabled: yes + delegate_to: "{{ connection_db_host }}" + become: True + +- name: Sleep + pause: + seconds: 10 + +- name: Init connection db + debug: + msg: > + Run 'init_connection_db' + {{ lookup('servercfgd_client', 'init_connection_db', servercfgd_host=connection_db_host_ip) }} + ignore_errors: True diff --git a/ansible/roles/connection_db/tasks/main.yml b/ansible/roles/connection_db/tasks/main.yml new file mode 100644 index 00000000000..3ca1ed4645a --- /dev/null +++ b/ansible/roles/connection_db/tasks/main.yml @@ -0,0 +1,24 @@ +--- +- name: Set pip_executable + set_fact: + pip_executable: pip3 + +- name: Start connection database + include_tasks: start_db.yml + args: + apply: + delegate_to: "{{ connection_db_host }}" + become: True + when: action == 'start_db' + +- name: Stop connection database + include_tasks: stop_db.yml + args: + apply: + delegate_to: "{{ connection_db_host }}" + become: True + when: action == 'stop_db' + +- name: Start db provision + include_tasks: provision_db.yml + when: action == 'provision_db' diff --git a/ansible/roles/connection_db/tasks/provision_db.yml b/ansible/roles/connection_db/tasks/provision_db.yml new file mode 100644 index 00000000000..d7f0739a0ed --- /dev/null +++ b/ansible/roles/connection_db/tasks/provision_db.yml @@ -0,0 +1,18 @@ +- name: Set enforce_provision_connection_db + set_fact: + enforce_provision_connection_db: False + when: enforce_provision_connection_db is not defined + +- name: Retrieve connection graph xml file content + set_fact: + conn_graph_file_data: "{{ lookup('file', connection_graph_filename) }}" + +- name: Provision connection db + debug: + msg: > + Run 'provision_connection_db' + {{ lookup('servercfgd_client', + 'provision_connection_db', + servercfgd_host=connection_db_host_ip, + conn_graph_file_content=conn_graph_file_data, + enforce_provision=enforce_provision_connection_db) }} diff --git a/ansible/roles/connection_db/tasks/start_db.yml b/ansible/roles/connection_db/tasks/start_db.yml new file mode 100644 index 00000000000..7b0c740416e --- /dev/null +++ b/ansible/roles/connection_db/tasks/start_db.yml @@ -0,0 +1,85 @@ +--- +- name: Set servercfgd save path + set_fact: + servercfgd_path: "/etc/servercfgd" + when: servercfgd_path is not defined + +- name: Check if enforce provision servercfgd + set_fact: + enforce_provision_servercfgd: True + when: enforce_provision_servercfgd is not defined + +- name: Install Redis + apt: + name: redis-server + state: present + +- name: Ensure Redis is running + service: + name: redis-server + state: started + enabled: yes + +- name: Setup Redis Configuration + copy: + src: redis.conf + dest: /etc/redis/redis.conf + owner: root + group: root + mode: "0644" + notify: + - Restart Redis + - Sleep + - Init connection db + +- name: Install pip3 + apt: + name: python3-pip + state: present + become: yes + +- name: Install servercfgd required Python packages + pip: + name: + - redis + state: present + executable: "{{ pip_executable }}" + environment: "{{ proxy_env | default({}) }}" + +- name: Create servercfgd directory + file: + path: "{{ servercfgd_path }}" + state: directory + mode: "0755" + +- name: Copy servercfgd to connection db server + copy: + src: servercfgd.py + dest: "{{ servercfgd_path }}" + force: "{{ enforce_provision_servercfgd }}" + mode: u=rwx,g=rx,o=rx + register: copy_result + +- name: Generate server_cfgd service file + template: + src: servercfgd.service.j2 + dest: /etc/systemd/system/servercfgd.service + when: copy_result.changed + +- name: Ensure servercfgd is running + systemd: + name: servercfgd + state: restarted + when: copy_result.changed + +- name: Get Lua scripts + set_fact: + db_scripts: "{{ db_scripts|default([]) + [ item ]}}" + with_fileglob: + - "*.lua" + +- name: Register scripts to connection_db + debug: + msg: > + Run 'register_script' + {{ lookup('servercfgd_client', 'register_scripts', servercfgd_host=connection_db_host_ip, scripts=db_scripts) }} diff --git a/ansible/roles/connection_db/tasks/stop_db.yml b/ansible/roles/connection_db/tasks/stop_db.yml new file mode 100644 index 00000000000..f3cf2266f99 --- /dev/null +++ b/ansible/roles/connection_db/tasks/stop_db.yml @@ -0,0 +1,23 @@ +--- +- name: Stop servercfgd + systemd: + name: servercfgd + state: stopped + +- name: Ensure Redis is stopped + service: + name: redis-server + state: stopped + +- name: Remove Redis + apt: + name: redis-server + state: absent + purge: yes + +- name: Remove py-redis + pip: + name: redis + state: absent + executable: "{{ pip_executable }}" + environment: "{{ proxy_env | default({}) }}" diff --git a/ansible/roles/connection_db/templates/servercfgd.service.j2 b/ansible/roles/connection_db/templates/servercfgd.service.j2 new file mode 100644 index 00000000000..b04f956dee7 --- /dev/null +++ b/ansible/roles/connection_db/templates/servercfgd.service.j2 @@ -0,0 +1,5 @@ +[Unit] +Description=servercfgd + +[Service] +ExecStart=/usr/bin/env python3 {{ servercfgd_path }}/servercfgd.py