This repository has been archived on 2023-05-19. You can view files and clone it, but cannot push or open issues or pull requests.
skylab-ansible/playbooks/node-down.yaml

133 lines
5.3 KiB
YAML

---
- name: Check cluster state
hosts: cluster
any_errors_fatal: true
pre_tasks:
- name: Configure remot execution environment
ansible.builtin.import_tasks: tasks/meta/bootstrap-remote-env.yaml
tasks:
- name: Validate user input
run_once: true
ansible.builtin.assert:
that:
- node is defined
- node in groups.cluster
fail_msg: >-
ERROR: Please set the 'node' variable to the cluster host to offline
(one of: {{ groups.cluster | join(', ') }})
- name: Fetch node swarm ID
ansible.builtin.command:
cmd: !unsafe docker info --format '{{ .Swarm.NodeID}}'
changed_when: false
register: _docker_node_id_raw
- name: Fetch swarm node availability
ansible.builtin.command:
cmd: docker node inspect {{ _docker_node_id_raw.stdout.strip() }} --format '{{ '{{ .Spec.Availability}}' }}'
changed_when: false
register: _docker_node_availability_raw
- name: Set common facts
ansible.builtin.set_fact:
_target_node: "{{ node }}"
_docker_node_id: "{{ _docker_node_id_raw.stdout.strip() }}"
_docker_node_availability: "{{ _docker_node_availability_raw.stdout.strip() }}"
# Use the next host in the group, unless that would exceed the length of the group,
# in which case use the first host in the group
_target_alt: >-
{{ groups.cluster[
lookup('ansible.utils.index_of', groups.cluster, 'eq', node) + 1
if (lookup('ansible.utils.index_of', groups.cluster, 'eq', node) + 1) < (groups.cluster | length)
else 0]
}}
# I'm not sure how to do this without invoking a loop, so here we are
- name: Set common fact for node addresses
vars:
_node_addresses:
- "{{ lookup('vars', 'ansible_' + skylab_cluster.interface.access).ipv4.address }}"
ansible.builtin.set_fact:
_node_addresses: "{{ _node_addresses + [item.address] }}"
loop: "{{ lookup('vars', 'ansible_' + skylab_cluster.interface.access).ipv4_secondaries }}"
loop_control:
label: "{{ item.address }}"
- name: Set facts for target node
when: inventory_hostname == _target_node
ansible.builtin.set_fact:
_needs_docker_migration: "{{ (_docker_node_availability | lower != 'drain') | bool }}"
- name: Check cluster settings
when: inventory_hostname != _target_node
ansible.builtin.assert:
that:
- skylab_cluster.address.access | ansible.netcommon.ipaddr('address') in _node_addresses
- _docker_node_availability | lower == 'active'
fail_msg: >-
ERROR: Node '{{ inventory_hostname }}' is already marked as unavailable. All cluster
nodes must be available before a new node can be moved to unavailable status.
- name: Offline node
hosts: "{{ node }}"
tasks:
- name: Migrate services off target node
when: _needs_docker_migration
block:
- name: Fetch current cluster service state
ansible.builtin.command:
cmd: !unsafe docker service ls --format '{{json .}}'
changed_when: false
register: _cluster_service_prestate
- name: Disable NAT rule {{ _skylab_adguard_nat_rule }}
delegate_to: core
connection: ansible.netcommon.network_cli
community.network.edgeos_config:
lines:
- set service nat rule {{ _skylab_adguard_nat_rule }} disable
- name: Update node availability
vars:
ansible_python_interpreter: "{{ skylab_state_dir }}/ansible-runtime/bin/python"
community.docker.docker_node:
availability: drain
hostname: "{{ _docker_node_id }}"
register: _node_availability_status
- name: Wait for services to shutdown
ansible.builtin.pause:
seconds: 10
- name: Wait for services to migrate
ansible.builtin.command:
cmd: !unsafe docker service ls --format '{{json .}}'
changed_when: false
register: _cluster_service_poststate
until: _cluster_service_poststate.stdout == _cluster_service_prestate.stdout
retries: 120
delay: 5
- name: Enable NAT rule {{ _skylab_adguard_nat_rule }}
delegate_to: core
connection: ansible.netcommon.network_cli
community.network.edgeos_config:
lines:
- delete service nat rule {{ _skylab_adguard_nat_rule }} disable
save: true
- name: Delete address from node
become: true
when: skylab_cluster.address.access | ansible.netcommon.ipaddr('address') in _node_addresses
ansible.builtin.command:
cmd: ip address delete {{ skylab_cluster.address.access | ansible.netcommon.ipaddr('host/prefix') }} dev {{ skylab_cluster.interface.access }}
changed_when: true
- name: Assign address to alt node
delegate_to: "{{ _target_alt }}"
become: true
when: skylab_cluster.address.access | ansible.netcommon.ipaddr('address') not in hostvars[_target_alt]._node_addresses
ansible.builtin.command:
cmd: ip address add {{ skylab_cluster.address.access | ansible.netcommon.ipaddr('host/prefix') }} dev {{ hostvars[_target_alt].skylab_cluster.interface.access }}
changed_when: true