133 lines
5.3 KiB
YAML
133 lines
5.3 KiB
YAML
---
|
|
- name: Check cluster state
|
|
hosts: cluster
|
|
any_errors_fatal: true
|
|
pre_tasks:
|
|
- name: Configure remot execution environment
|
|
ansible.builtin.import_tasks: tasks/meta/bootstrap-remote-env.yaml
|
|
tasks:
|
|
- name: Validate user input
|
|
run_once: true
|
|
ansible.builtin.assert:
|
|
that:
|
|
- node is defined
|
|
- node in groups.cluster
|
|
fail_msg: >-
|
|
ERROR: Please set the 'node' variable to the cluster host to offline
|
|
(one of: {{ groups.cluster | join(', ') }})
|
|
|
|
- name: Fetch node swarm ID
|
|
ansible.builtin.command:
|
|
cmd: !unsafe docker info --format '{{ .Swarm.NodeID}}'
|
|
changed_when: false
|
|
register: _docker_node_id_raw
|
|
|
|
- name: Fetch swarm node availability
|
|
ansible.builtin.command:
|
|
cmd: docker node inspect {{ _docker_node_id_raw.stdout.strip() }} --format '{{ '{{ .Spec.Availability}}' }}'
|
|
changed_when: false
|
|
register: _docker_node_availability_raw
|
|
|
|
- name: Set common facts
|
|
ansible.builtin.set_fact:
|
|
_target_node: "{{ node }}"
|
|
_docker_node_id: "{{ _docker_node_id_raw.stdout.strip() }}"
|
|
_docker_node_availability: "{{ _docker_node_availability_raw.stdout.strip() }}"
|
|
# Use the next host in the group, unless that would exceed the length of the group,
|
|
# in which case use the first host in the group
|
|
_target_alt: >-
|
|
{{ groups.cluster[
|
|
lookup('ansible.utils.index_of', groups.cluster, 'eq', node) + 1
|
|
if (lookup('ansible.utils.index_of', groups.cluster, 'eq', node) + 1) < (groups.cluster | length)
|
|
else 0]
|
|
}}
|
|
|
|
# I'm not sure how to do this without invoking a loop, so here we are
|
|
- name: Set common fact for node addresses
|
|
vars:
|
|
_node_addresses:
|
|
- "{{ lookup('vars', 'ansible_' + skylab_cluster.interface.access).ipv4.address }}"
|
|
ansible.builtin.set_fact:
|
|
_node_addresses: "{{ _node_addresses + [item.address] }}"
|
|
loop: "{{ lookup('vars', 'ansible_' + skylab_cluster.interface.access).ipv4_secondaries }}"
|
|
loop_control:
|
|
label: "{{ item.address }}"
|
|
|
|
- name: Set facts for target node
|
|
when: inventory_hostname == _target_node
|
|
ansible.builtin.set_fact:
|
|
_needs_docker_migration: "{{ (_docker_node_availability | lower != 'drain') | bool }}"
|
|
|
|
- name: Check cluster settings
|
|
when: inventory_hostname != _target_node
|
|
ansible.builtin.assert:
|
|
that:
|
|
- skylab_cluster.address.access | ansible.netcommon.ipaddr('address') in _node_addresses
|
|
- _docker_node_availability | lower == 'active'
|
|
fail_msg: >-
|
|
ERROR: Node '{{ inventory_hostname }}' is already marked as unavailable. All cluster
|
|
nodes must be available before a new node can be moved to unavailable status.
|
|
|
|
- name: Offline node
|
|
hosts: "{{ node }}"
|
|
tasks:
|
|
- name: Migrate services off target node
|
|
when: _needs_docker_migration
|
|
block:
|
|
- name: Fetch current cluster service state
|
|
ansible.builtin.command:
|
|
cmd: !unsafe docker service ls --format '{{json .}}'
|
|
changed_when: false
|
|
register: _cluster_service_prestate
|
|
|
|
- name: Disable NAT rule {{ _skylab_adguard_nat_rule }}
|
|
delegate_to: core
|
|
connection: ansible.netcommon.network_cli
|
|
community.network.edgeos_config:
|
|
lines:
|
|
- set service nat rule {{ _skylab_adguard_nat_rule }} disable
|
|
|
|
- name: Update node availability
|
|
vars:
|
|
ansible_python_interpreter: "{{ skylab_state_dir }}/ansible-runtime/bin/python"
|
|
community.docker.docker_node:
|
|
availability: drain
|
|
hostname: "{{ _docker_node_id }}"
|
|
register: _node_availability_status
|
|
|
|
- name: Wait for services to shutdown
|
|
ansible.builtin.pause:
|
|
seconds: 10
|
|
|
|
- name: Wait for services to migrate
|
|
ansible.builtin.command:
|
|
cmd: !unsafe docker service ls --format '{{json .}}'
|
|
changed_when: false
|
|
register: _cluster_service_poststate
|
|
until: _cluster_service_poststate.stdout == _cluster_service_prestate.stdout
|
|
retries: 120
|
|
delay: 5
|
|
|
|
- name: Enable NAT rule {{ _skylab_adguard_nat_rule }}
|
|
delegate_to: core
|
|
connection: ansible.netcommon.network_cli
|
|
community.network.edgeos_config:
|
|
lines:
|
|
- delete service nat rule {{ _skylab_adguard_nat_rule }} disable
|
|
save: true
|
|
|
|
- name: Delete address from node
|
|
become: true
|
|
when: skylab_cluster.address.access | ansible.netcommon.ipaddr('address') in _node_addresses
|
|
ansible.builtin.command:
|
|
cmd: ip address delete {{ skylab_cluster.address.access | ansible.netcommon.ipaddr('host/prefix') }} dev {{ skylab_cluster.interface.access }}
|
|
changed_when: true
|
|
|
|
- name: Assign address to alt node
|
|
delegate_to: "{{ _target_alt }}"
|
|
become: true
|
|
when: skylab_cluster.address.access | ansible.netcommon.ipaddr('address') not in hostvars[_target_alt]._node_addresses
|
|
ansible.builtin.command:
|
|
cmd: ip address add {{ skylab_cluster.address.access | ansible.netcommon.ipaddr('host/prefix') }} dev {{ hostvars[_target_alt].skylab_cluster.interface.access }}
|
|
changed_when: true
|