configuration/environments_scripts/central_mongo_setup/files/etc/fail2ban/jail.d/customisation.local
... ...
@@ -0,0 +1 @@
1
+../../../../../repo/etc/fail2ban/jail.d/customisation.local
... ...
\ No newline at end of file
configuration/environments_scripts/central_mongo_setup/setup-central-mongo-instance.sh
... ...
@@ -25,8 +25,8 @@ else
25 25
if ec2-metadata | grep -q instance-id; then
26 26
echo "Running on an AWS EC2 instance as user ${USER} / $(whoami), starting setup..."
27 27
# Install standard packages:
28
- sudo yum -y update
29
- sudo yum -y install nvme-cli chrony cronie cronie-anacron jq mailx
28
+ sudo dnf -y --best --allowerasing --releasever=latest upgrade
29
+ sudo dnf -y install nvme-cli chrony cronie cronie-anacron jq mailx whois iptables
30 30
# Copy imageupgrade_function.sh
31 31
scp -o StrictHostKeyChecking=no -p root@sapsailing.com:/home/wiki/gitwiki/configuration/environments_scripts/repo/usr/local/bin/imageupgrade_functions.sh .
32 32
sudo mv imageupgrade_functions.sh /usr/local/bin
... ...
@@ -34,6 +34,8 @@ else
34 34
. imageupgrade_functions.sh
35 35
# Install MongoDB 5.0 and configure as replica set "live"
36 36
setup_mongo_7_0_on_AL2023
37
+ setup_mail_sending
38
+ setup_fail2ban
37 39
build_crontab_and_setup_files central_mongo_setup
38 40
# obtain root SSH key from key vault:
39 41
setup_keys "central_mongo_setup"
configuration/environments_scripts/central_reverse_proxy/files/etc/fail2ban/jail.d/customisation.local
... ...
@@ -0,0 +1 @@
1
+../../../../../repo/etc/fail2ban/jail.d/customisation.local
... ...
\ No newline at end of file
configuration/environments_scripts/mongo_instance_setup/files/etc/fail2ban/jail.d/customisation.local
... ...
@@ -0,0 +1 @@
1
+../../../../../repo/etc/fail2ban/jail.d/customisation.local
... ...
\ No newline at end of file
configuration/environments_scripts/mongo_instance_setup/setup-mongo-instance.sh
... ...
@@ -19,8 +19,8 @@ else
19 19
if ec2-metadata | grep -q instance-id; then
20 20
echo "Running on an AWS EC2 instance as user ${USER} / $(whoami), starting setup..."
21 21
# Install standard packages:
22
- sudo yum -y update
23
- sudo yum -y install nvme-cli chrony cronie cronie-anacron jq mailx
22
+ sudo dnf -y --best --allowerasing --releasever=latest upgrade
23
+ sudo dnf -y install nvme-cli chrony cronie cronie-anacron jq mailx whois iptables
24 24
# Copy imageupgrade_function.sh
25 25
scp -o StrictHostKeyChecking=no -p root@sapsailing.com:/home/wiki/gitwiki/configuration/environments_scripts/repo/usr/local/bin/imageupgrade_functions.sh .
26 26
sudo mv imageupgrade_functions.sh /usr/local/bin
... ...
@@ -28,6 +28,8 @@ else
28 28
. imageupgrade_functions.sh
29 29
# Install MongoDB 5.0 and configure as replica set "live"
30 30
setup_mongo_7_0_on_AL2023
31
+ setup_mail_sending
32
+ setup_fail2ban
31 33
build_crontab_and_setup_files mongo_instance_setup
32 34
# obtain root SSH key from key vault:
33 35
setup_keys "mongo_instance_setup"
configuration/environments_scripts/mysql_instance_setup/setup-mysql-server.sh
... ...
@@ -1,6 +1,6 @@
1 1
#!/bin/bash
2 2
# Usage: ${0} [ -b {bugs-password] ] [ -r {root-password} ] {instance-ip}
3
-# Deploy with Amazon Linux 2023
3
+# Deploy with Amazon Linux 2023 with a 16GB root volume
4 4
5 5
# Read options and assign to variables:
6 6
options='b:r:'
... ...
@@ -39,8 +39,8 @@ else
39 39
sudo chgrp ec2-user /home/ec2-user/ssh-key-reader.token
40 40
sudo chmod 600 /home/ec2-user/ssh-key-reader.token
41 41
# Install packages for MariaDB and cron/anacron/crontab:
42
- sudo yum update -y
43
- sudo yum -y install mariadb105-server cronie
42
+ sudo dnf -y --best --allowerasing --releasever=latest upgrade
43
+ sudo dnf -y install mariadb105-server cronie fail2ban
44 44
sudo su -c "printf '\n[mysqld]\nlog_bin = /var/log/mariadb/mysql-bin.log\n' >> /etc/my.cnf.d/mariadb-server.cnf"
45 45
sudo systemctl enable mariadb.service
46 46
sudo systemctl start mariadb.service
... ...
@@ -51,6 +51,8 @@ else
51 51
exit 1
52 52
fi
53 53
setup_sshd_resilience
54
+ setup_mail_sending
55
+ setup_fail2ban
54 56
sudo chown root:root /usr/local/bin/imageupgrade_functions.sh
55 57
echo "Creating backup through mysql client on sapsailing.com..."
56 58
ssh -o StrictHostKeyChecking=false root@sapsailing.com "mysqldump --all-databases -h mysql.internal.sapsailing.com --user=root --password=${ROOT_PW} --master-data --skip-lock-tables --lock-tables=0" >> ${BACKUP_FILE}
configuration/environments_scripts/rabbitmq_instance_setup/setup-rabbitmq-server.sh
... ...
@@ -17,7 +17,9 @@ else
17 17
# Install packages for MariaDB and cron/anacron/crontab:
18 18
sudo apt-get -y update
19 19
sudo DEBIAN_FRONTEND=noninteractive apt-get -yq -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confnew upgrade
20
- sudo DEBIAN_FRONTEND=noninteractive apt-get -yq -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confnew install rabbitmq-server systemd-cron jq syslog-ng
20
+ # Note that the fail2ban installation on Debian automatically configures a jail for sshd based on auth.log;
21
+ # therefore, the setup_fail2ban function from imageupgrade_functions.sh does not need to be invoked in this case.
22
+ sudo DEBIAN_FRONTEND=noninteractive apt-get -yq -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confnew install rabbitmq-server systemd-cron jq syslog-ng fail2ban
21 23
sudo touch /var/run/last_change_aws_landscape_managers_ssh_keys__home_admin
22 24
sudo chown admin:admin /var/run/last_change_aws_landscape_managers_ssh_keys__home_admin
23 25
scp -o StrictHostKeyChecking=false -r root@sapsailing.com:/home/wiki/gitwiki/configuration/environments_scripts/repo/usr/local/bin/imageupgrade_functions.sh /home/admin
configuration/environments_scripts/repo/etc/fail2ban/jail.d/customisation.local
... ...
@@ -0,0 +1,8 @@
1
+ [ssh-iptables]
2
+
3
+ enabled = true
4
+ filter = sshd[mode=aggressive]
5
+ action = iptables[name=SSH, port=ssh, protocol=tcp]
6
+ sendmail-whois[name=SSH, dest=axel.uhl@sap.com, sender=fail2ban@sapsailing.com]
7
+ logpath = /var/log/fail2ban.log
8
+ maxretry = 5
configuration/environments_scripts/repo/usr/local/bin/imageupgrade_functions.sh
... ...
@@ -306,36 +306,16 @@ setup_cloud_cfg_and_root_login() {
306 306
}
307 307
308 308
setup_fail2ban() {
309
- pushd .
310
- if [[ ! -f "/etc/systemd/system/fail2ban.service" ]]; then
311
- yum install 2to3 -y
312
- cd /usr/local/src
313
- wget https://github.com/fail2ban/fail2ban/archive/refs/tags/1.0.2.tar.gz
314
- tar -xvf 1.0.2.tar.gz
315
- cd fail2ban-1.0.2/
316
- ./fail2ban-2to3
317
- python3.9 setup.py build
318
- python3.9 setup.py install
319
- cp ./build/fail2ban.service /etc/systemd/system/fail2ban.service
320
- sed -i 's|Environment=".*"|Environment="PYTHONPATH=/usr/local/lib/python3.9/site-packages"|' /etc/systemd/system/fail2ban.service
321
- sed -i 's|^backend *= *auto *$|backend = systemd|' /etc/fail2ban/jail.conf
322
- systemctl enable fail2ban
323
- chkconfig --level 23 fail2ban on
324
- fi
325
- cat << EOF > /etc/fail2ban/jail.d/customisation.local
326
- [ssh-iptables]
327
-
328
- enabled = true
329
- filter = sshd[mode=aggressive]
330
- action = iptables[name=SSH, port=ssh, protocol=tcp]
331
- sendmail-whois[name=SSH, dest=axel.uhl@sap.com, sender=fail2ban@sapsailing.com]
332
- logpath = /var/log/fail2ban.log
333
- maxretry = 5
334
-EOF
309
+ # Expects setup_mail_sending to have been invoked for fail2ban e-mails being sent properly
310
+ dnf install -y fail2ban whois
311
+ sed -i 's|^backend *= *auto *$|backend = systemd|' /etc/fail2ban/jail.conf
312
+ systemctl enable fail2ban
313
+ # the /etc/fail2ban/jail.d/ contents are expected to be provided by the files/etc/fail2ban/jail.d
314
+ # folders in the respective environments_scripts sub-folder; use, e.g., a symbolic link to
315
+ # configuration/environments_scripts/repo/etc/fail2ban/jail.d/customisation.local for a
316
+ # systemd-based sshd-iptables filter.
335 317
touch /var/log/fail2ban.log
336
- service fail2ban start
337
- yum remove -y firewalld
338
- popd
318
+ systemctl start fail2ban
339 319
}
340 320
341 321
setup_mail_sending() {
configuration/environments_scripts/reverse_proxy/files/etc/fail2ban/jail.d/customisation.local
... ...
@@ -0,0 +1 @@
1
+../../../../../repo/etc/fail2ban/jail.d/customisation.local
... ...
\ No newline at end of file
configuration/environments_scripts/sailing_server/files/etc/fail2ban/jail.d/customisation.local
... ...
@@ -0,0 +1 @@
1
+../../../../../repo/etc/fail2ban/jail.d/customisation.local
... ...
\ No newline at end of file
configuration/environments_scripts/sailing_server/setup-sailing-server.sh
... ...
@@ -19,8 +19,8 @@ else
19 19
if ec2-metadata | grep -q instance-id; then
20 20
echo "Running on an AWS EC2 instance as user ${USER} / $(whoami), starting setup..."
21 21
# Install standard packages:
22
- sudo yum -y update
23
- sudo yum -y install git tmux nvme-cli chrony cronie cronie-anacron jq telnet mailx
22
+ sudo dnf -y --best --allowerasing --releasever=latest upgrade
23
+ sudo dnf -y install git tmux nvme-cli chrony cronie cronie-anacron jq telnet mailx
24 24
# Allow root ssh login with the same key used for the ec2-user for now;
25 25
# later, a cron job will be installed that keeps the /root/authorized_keys file
26 26
# up to date with all landscape managers' public SSH keys
... ...
@@ -45,6 +45,7 @@ EOF
45 45
# and then move it to the sailing user's .ssh directory
46 46
setup_keys "sailing_server"
47 47
setup_mail_sending
48
+ setup_fail2ban
48 49
sudo su - sailing -c "mkdir servers"
49 50
# Force acceptance of sapsailing.com's host key:
50 51
sudo su - sailing -c "ssh -o StrictHostKeyChecking=false trac@sapsailing.com ls" >/dev/null
wiki/info/landscape/amazon-ec2.md
... ...
@@ -21,7 +21,7 @@ Our default region in AWS EC2 is eu-west-1 (Ireland). Tests are currently run in
21 21
22 22
In Route53 (the AWS DNS) we have registered the sapsailing.com domain and can manage records for any sub-domains. The "apex" record for sapsailing.com points to a Network Load Balancer (NLB), currently ``NLB-sapsailing-dot-com-f937a5b33246d221.elb.eu-west-1.amazonaws.com``, which does the following things:
23 23
24
-* accept SSH connects on port 22; these are forwarded to the internal IP of the web server through the target group ``SSH-to-sapsailing-dot-com-2``, currently with the internal IP target ``172.31.28.212``
24
+* accept SSH connects on port 22; these are forwarded to the internal IP of the central reverse proxy through the target group ``SSH-to-sapsailing-dot-com-2``; it is important that the target group is configured to preserve client IP addresses; otherwise, the ``fail2ban`` installation on the central reverse proxy would quickly block all SSH traffic, malicious and good, because they all would be identified as having one of the NLB's internal IP addresses as their source IP.
25 25
* accept HTTP connections for ``sapsailing.com:80``, forwarding them to the target group ``HTTP-to-sapsailing-dot-com-2`` which is a TCP target group for port 80 with ip-based targets (instance-based was unfortunately not possible for the old ``m3`` instance type of our web server), again pointing to ``172.31.28.212``, the internal IP of our web server
26 26
* accept HTTPS/TLS connections on port 443, using the ACM-managed certificate for ``*.sapsailing.com`` and ``sapsailing.com`` and also forwarding to the ``HTTP-to-sapsailing-dot-com-2`` target group
27 27
* optionally, this NLB could be extended by UDP port mappings in case we see a use case for UDP-based data streams that need forwarding to specific applications, such as the Expedition data typically sent on ports 2010 and following
... ...
@@ -33,12 +33,12 @@ Further ALBs may exist in addition to the default ALB and the NLB for ``sapsaili
33 33
### Apache httpd, the central reverse proxy (Webserver) and disposable reverse proxies
34 34
35 35
A key pillar of our architecture is the central reverse proxy, which handles traffic for the wiki, bugzilla, awstats, releases, p2, Git, jobs, static and is the target of the catch all rule in the Dynamic ALB.
36
-Any traffic to the Hudson build server subdomain *does not* go through the central webserver. Instead, it gets directed by route 53 to a `DDNSMapped` load balancer (which all route any port 80 traffic to 443), which has a rule pointing to a target group, that contains only the Hudson server. The setup procedure can be found below.
36
+Any traffic to the Hudson build server subdomain *does not* go through the central webserver. Instead, it gets directed by route 53 to a `DNSMapped` load balancer (which all route any port 80 traffic to 443), which has a rule pointing to a target group, that contains only the Hudson server. The setup procedure can be found below.
37 37
38
-To improve availability and reliability, we have a disposable environment type and AMI. The instances from this AMI are only for serving requests to the archive but are lightweight and can be quickly started and shutdown, using the landscape management console.
38
+To improve availability and reliability, we have a "disposable reverse proxy" environment type and AMI (see ``configuration/environments_scripts/reverse_proxy``). The instances from this AMI are only for serving requests to the archive but are lightweight and can be quickly started and shutdown, using the landscape management console.
39 39
40 40
The IPs for all reverse proxies will automatically be added to ALB target groups with the tag key `allReverseProxies`, including the `CentralWebServerHTTP-Dyn` target group (in the dynamic ALB in eu-west-1)
41
-and all the `DDNSMapped-x-HTTP` (in all the DDNSMapped servers). These are the target groups for the default rules and it ensures availability to the ARCHIVE especially.
41
+and all the `DDNSMapped-x-HTTP` (in all the DNSMapped ALBs). These are the target groups for the default rules and it ensures availability to the ARCHIVE especially.
42 42
Disposables instances are tagged with `DisposableProxy` to indicate it hosts no vital services. `ReverseProxy` also identifies any reverse proxies. The health check for the target groups would change to trigger a script which returns different error codes: healthy/200 if in the same AZ as the archive (or if the failover archive is in use), whilst unhealthy/503 if in different AZs. This will reduce cross-AZ, archive traffic costs, but maintain availability and load balancing.
43 43
44 44
For security groups of the central reverse proxy, we want Webserver, as well as Reverse Proxy. The disposables just have the latter.
... ...
@@ -519,13 +519,17 @@ disposables*. It is used to reduce costly cross-AZ traffic between our instances
519 519
520 520
The general idea of this ALB target group healthcheck, is to make instances healthy only if in the same AZ as the archive (the correct AZ). However, availability takes priority over cost saving, so if there is no healthy instance in the "correct" AZ, the healthcheck returns healthy.
521 521
522
-All the target groups, tagged with allReverseProxies, have this healthcheck:
522
+All the target groups, tagged with ``allReverseProxies``, have this healthcheck:
523 523
524 524
```
525 525
/cgi-bin/reverseProxyHealthcheck.sh?arn=TARGET_GROUP_ARN
526 526
```
527 527
528
-The healthcheck works by first checking internal-server-status. If genuinely unhealthy, then unhealthy is returned to the ELB (elastic load balancer) health checker. Otherwise, the instance uses cached CIDR masks (which correspond to AZ definitions) and nmap to check if in the same AZ as the archive.
528
+The script can be found under ``configuration/environments_scripts/repo/var/www/cgi-bin`` to where the environments for ``reverse_proxy`` (the disposables) and ``central_reverse_proxy`` link symbolically.
529
+
530
+The healthcheck works by first checking whether another instance of the healthcheck is already running. The PID of a running process is stored under ``/var/run/reverseProxyHealthcheck`` which is a folder created by a directive in ``/etc/tmpfiles.d/reverseProxyHealthcheck.conf`` upon boot. A healthcheck records its exit status and output in files under ``/var/run/reverseProxyHealthcheck`` which are removed after 10s by a background job again. If a health check is started while another is already running, the new one waits for the exit status of the already running one to be written to ``/var/run/reverseProxyHealthcheck``, then picks up that exit status and output to use it as exit status and output of the health check started concurrently. This way, we avoid congestion and clogging of reverse proxies by health checks which at times may be long-running, especially if the AWS CLI takes unusually long to discover the target groups and target health checks. Without the mechanics that shortcut these concurrent executions, we've see hundreds of concurrently executing health checks, even leading to out-of-memory situations at times.
531
+
532
+Then it checks the ``internal-server-status``. If genuinely unhealthy, then unhealthy is returned to the ELB (elastic load balancer) health checker. Otherwise, the instance uses cached CIDR masks (which correspond to AZ definitions) and nmap to check if in the same AZ as the archive.
529 533
If in the same AZ, then "healthy" is returned to the ELB health checker. If not, then the target group ARN, passed as a parameter
530 534
to the healthcheck, is used to get the private IPs of the other instances in the target group, via a describe-target-health call to the AWS API. This is the most costly part of the check, so these values are cached.
531 535