f6d8f24fee631ab14f29e71c1499385f7bf91ee9
configuration/environments_scripts/central_mongo_setup/files/etc/fail2ban/jail.d/customisation.local
| ... | ... | @@ -0,0 +1 @@ |
| 1 | +../../../../../repo/etc/fail2ban/jail.d/customisation.local |
|
| ... | ... | \ No newline at end of file |
configuration/environments_scripts/central_mongo_setup/setup-central-mongo-instance.sh
| ... | ... | @@ -25,8 +25,8 @@ else |
| 25 | 25 | if ec2-metadata | grep -q instance-id; then |
| 26 | 26 | echo "Running on an AWS EC2 instance as user ${USER} / $(whoami), starting setup..." |
| 27 | 27 | # Install standard packages: |
| 28 | - sudo yum -y update |
|
| 29 | - sudo yum -y install nvme-cli chrony cronie cronie-anacron jq mailx |
|
| 28 | + sudo dnf -y --best --allowerasing --releasever=latest upgrade |
|
| 29 | + sudo dnf -y install nvme-cli chrony cronie cronie-anacron jq mailx whois iptables |
|
| 30 | 30 | # Copy imageupgrade_function.sh |
| 31 | 31 | scp -o StrictHostKeyChecking=no -p root@sapsailing.com:/home/wiki/gitwiki/configuration/environments_scripts/repo/usr/local/bin/imageupgrade_functions.sh . |
| 32 | 32 | sudo mv imageupgrade_functions.sh /usr/local/bin |
| ... | ... | @@ -34,6 +34,8 @@ else |
| 34 | 34 | . imageupgrade_functions.sh |
| 35 | 35 | # Install MongoDB 5.0 and configure as replica set "live" |
| 36 | 36 | setup_mongo_7_0_on_AL2023 |
| 37 | + setup_mail_sending |
|
| 38 | + setup_fail2ban |
|
| 37 | 39 | build_crontab_and_setup_files central_mongo_setup |
| 38 | 40 | # obtain root SSH key from key vault: |
| 39 | 41 | setup_keys "central_mongo_setup" |
configuration/environments_scripts/central_reverse_proxy/files/etc/fail2ban/jail.d/customisation.local
| ... | ... | @@ -0,0 +1 @@ |
| 1 | +../../../../../repo/etc/fail2ban/jail.d/customisation.local |
|
| ... | ... | \ No newline at end of file |
configuration/environments_scripts/mongo_instance_setup/files/etc/fail2ban/jail.d/customisation.local
| ... | ... | @@ -0,0 +1 @@ |
| 1 | +../../../../../repo/etc/fail2ban/jail.d/customisation.local |
|
| ... | ... | \ No newline at end of file |
configuration/environments_scripts/mongo_instance_setup/setup-mongo-instance.sh
| ... | ... | @@ -19,8 +19,8 @@ else |
| 19 | 19 | if ec2-metadata | grep -q instance-id; then |
| 20 | 20 | echo "Running on an AWS EC2 instance as user ${USER} / $(whoami), starting setup..." |
| 21 | 21 | # Install standard packages: |
| 22 | - sudo yum -y update |
|
| 23 | - sudo yum -y install nvme-cli chrony cronie cronie-anacron jq mailx |
|
| 22 | + sudo dnf -y --best --allowerasing --releasever=latest upgrade |
|
| 23 | + sudo dnf -y install nvme-cli chrony cronie cronie-anacron jq mailx whois iptables |
|
| 24 | 24 | # Copy imageupgrade_function.sh |
| 25 | 25 | scp -o StrictHostKeyChecking=no -p root@sapsailing.com:/home/wiki/gitwiki/configuration/environments_scripts/repo/usr/local/bin/imageupgrade_functions.sh . |
| 26 | 26 | sudo mv imageupgrade_functions.sh /usr/local/bin |
| ... | ... | @@ -28,6 +28,8 @@ else |
| 28 | 28 | . imageupgrade_functions.sh |
| 29 | 29 | # Install MongoDB 5.0 and configure as replica set "live" |
| 30 | 30 | setup_mongo_7_0_on_AL2023 |
| 31 | + setup_mail_sending |
|
| 32 | + setup_fail2ban |
|
| 31 | 33 | build_crontab_and_setup_files mongo_instance_setup |
| 32 | 34 | # obtain root SSH key from key vault: |
| 33 | 35 | setup_keys "mongo_instance_setup" |
configuration/environments_scripts/mysql_instance_setup/setup-mysql-server.sh
| ... | ... | @@ -1,6 +1,6 @@ |
| 1 | 1 | #!/bin/bash |
| 2 | 2 | # Usage: ${0} [ -b {bugs-password] ] [ -r {root-password} ] {instance-ip} |
| 3 | -# Deploy with Amazon Linux 2023 |
|
| 3 | +# Deploy with Amazon Linux 2023 with a 16GB root volume |
|
| 4 | 4 | |
| 5 | 5 | # Read options and assign to variables: |
| 6 | 6 | options='b:r:' |
| ... | ... | @@ -39,8 +39,8 @@ else |
| 39 | 39 | sudo chgrp ec2-user /home/ec2-user/ssh-key-reader.token |
| 40 | 40 | sudo chmod 600 /home/ec2-user/ssh-key-reader.token |
| 41 | 41 | # Install packages for MariaDB and cron/anacron/crontab: |
| 42 | - sudo yum update -y |
|
| 43 | - sudo yum -y install mariadb105-server cronie |
|
| 42 | + sudo dnf -y --best --allowerasing --releasever=latest upgrade |
|
| 43 | + sudo dnf -y install mariadb105-server cronie fail2ban |
|
| 44 | 44 | sudo su -c "printf '\n[mysqld]\nlog_bin = /var/log/mariadb/mysql-bin.log\n' >> /etc/my.cnf.d/mariadb-server.cnf" |
| 45 | 45 | sudo systemctl enable mariadb.service |
| 46 | 46 | sudo systemctl start mariadb.service |
| ... | ... | @@ -51,6 +51,8 @@ else |
| 51 | 51 | exit 1 |
| 52 | 52 | fi |
| 53 | 53 | setup_sshd_resilience |
| 54 | + setup_mail_sending |
|
| 55 | + setup_fail2ban |
|
| 54 | 56 | sudo chown root:root /usr/local/bin/imageupgrade_functions.sh |
| 55 | 57 | echo "Creating backup through mysql client on sapsailing.com..." |
| 56 | 58 | ssh -o StrictHostKeyChecking=false root@sapsailing.com "mysqldump --all-databases -h mysql.internal.sapsailing.com --user=root --password=${ROOT_PW} --master-data --skip-lock-tables --lock-tables=0" >> ${BACKUP_FILE} |
configuration/environments_scripts/rabbitmq_instance_setup/setup-rabbitmq-server.sh
| ... | ... | @@ -17,7 +17,9 @@ else |
| 17 | 17 | # Install packages for MariaDB and cron/anacron/crontab: |
| 18 | 18 | sudo apt-get -y update |
| 19 | 19 | sudo DEBIAN_FRONTEND=noninteractive apt-get -yq -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confnew upgrade |
| 20 | - sudo DEBIAN_FRONTEND=noninteractive apt-get -yq -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confnew install rabbitmq-server systemd-cron jq syslog-ng |
|
| 20 | + # Note that the fail2ban installation on Debian automatically configures a jail for sshd based on auth.log; |
|
| 21 | + # therefore, the setup_fail2ban function from imageupgrade_functions.sh does not need to be invoked in this case. |
|
| 22 | + sudo DEBIAN_FRONTEND=noninteractive apt-get -yq -o Dpkg::Options::=--force-confdef -o Dpkg::Options::=--force-confnew install rabbitmq-server systemd-cron jq syslog-ng fail2ban |
|
| 21 | 23 | sudo touch /var/run/last_change_aws_landscape_managers_ssh_keys__home_admin |
| 22 | 24 | sudo chown admin:admin /var/run/last_change_aws_landscape_managers_ssh_keys__home_admin |
| 23 | 25 | scp -o StrictHostKeyChecking=false -r root@sapsailing.com:/home/wiki/gitwiki/configuration/environments_scripts/repo/usr/local/bin/imageupgrade_functions.sh /home/admin |
configuration/environments_scripts/repo/etc/fail2ban/jail.d/customisation.local
| ... | ... | @@ -0,0 +1,8 @@ |
| 1 | + [ssh-iptables] |
|
| 2 | + |
|
| 3 | + enabled = true |
|
| 4 | + filter = sshd[mode=aggressive] |
|
| 5 | + action = iptables[name=SSH, port=ssh, protocol=tcp] |
|
| 6 | + sendmail-whois[name=SSH, dest=axel.uhl@sap.com, sender=fail2ban@sapsailing.com] |
|
| 7 | + logpath = /var/log/fail2ban.log |
|
| 8 | + maxretry = 5 |
configuration/environments_scripts/repo/usr/local/bin/imageupgrade_functions.sh
| ... | ... | @@ -306,36 +306,16 @@ setup_cloud_cfg_and_root_login() { |
| 306 | 306 | } |
| 307 | 307 | |
| 308 | 308 | setup_fail2ban() { |
| 309 | - pushd . |
|
| 310 | - if [[ ! -f "/etc/systemd/system/fail2ban.service" ]]; then |
|
| 311 | - yum install 2to3 -y |
|
| 312 | - cd /usr/local/src |
|
| 313 | - wget https://github.com/fail2ban/fail2ban/archive/refs/tags/1.0.2.tar.gz |
|
| 314 | - tar -xvf 1.0.2.tar.gz |
|
| 315 | - cd fail2ban-1.0.2/ |
|
| 316 | - ./fail2ban-2to3 |
|
| 317 | - python3.9 setup.py build |
|
| 318 | - python3.9 setup.py install |
|
| 319 | - cp ./build/fail2ban.service /etc/systemd/system/fail2ban.service |
|
| 320 | - sed -i 's|Environment=".*"|Environment="PYTHONPATH=/usr/local/lib/python3.9/site-packages"|' /etc/systemd/system/fail2ban.service |
|
| 321 | - sed -i 's|^backend *= *auto *$|backend = systemd|' /etc/fail2ban/jail.conf |
|
| 322 | - systemctl enable fail2ban |
|
| 323 | - chkconfig --level 23 fail2ban on |
|
| 324 | - fi |
|
| 325 | - cat << EOF > /etc/fail2ban/jail.d/customisation.local |
|
| 326 | - [ssh-iptables] |
|
| 327 | - |
|
| 328 | - enabled = true |
|
| 329 | - filter = sshd[mode=aggressive] |
|
| 330 | - action = iptables[name=SSH, port=ssh, protocol=tcp] |
|
| 331 | - sendmail-whois[name=SSH, dest=axel.uhl@sap.com, sender=fail2ban@sapsailing.com] |
|
| 332 | - logpath = /var/log/fail2ban.log |
|
| 333 | - maxretry = 5 |
|
| 334 | -EOF |
|
| 309 | + # Expects setup_mail_sending to have been invoked for fail2ban e-mails being sent properly |
|
| 310 | + dnf install -y fail2ban whois |
|
| 311 | + sed -i 's|^backend *= *auto *$|backend = systemd|' /etc/fail2ban/jail.conf |
|
| 312 | + systemctl enable fail2ban |
|
| 313 | + # the /etc/fail2ban/jail.d/ contents are expected to be provided by the files/etc/fail2ban/jail.d |
|
| 314 | + # folders in the respective environments_scripts sub-folder; use, e.g., a symbolic link to |
|
| 315 | + # configuration/environments_scripts/repo/etc/fail2ban/jail.d/customisation.local for a |
|
| 316 | + # systemd-based sshd-iptables filter. |
|
| 335 | 317 | touch /var/log/fail2ban.log |
| 336 | - service fail2ban start |
|
| 337 | - yum remove -y firewalld |
|
| 338 | - popd |
|
| 318 | + systemctl start fail2ban |
|
| 339 | 319 | } |
| 340 | 320 | |
| 341 | 321 | setup_mail_sending() { |
configuration/environments_scripts/reverse_proxy/files/etc/fail2ban/jail.d/customisation.local
| ... | ... | @@ -0,0 +1 @@ |
| 1 | +../../../../../repo/etc/fail2ban/jail.d/customisation.local |
|
| ... | ... | \ No newline at end of file |
configuration/environments_scripts/sailing_server/files/etc/fail2ban/jail.d/customisation.local
| ... | ... | @@ -0,0 +1 @@ |
| 1 | +../../../../../repo/etc/fail2ban/jail.d/customisation.local |
|
| ... | ... | \ No newline at end of file |
configuration/environments_scripts/sailing_server/setup-sailing-server.sh
| ... | ... | @@ -19,8 +19,8 @@ else |
| 19 | 19 | if ec2-metadata | grep -q instance-id; then |
| 20 | 20 | echo "Running on an AWS EC2 instance as user ${USER} / $(whoami), starting setup..." |
| 21 | 21 | # Install standard packages: |
| 22 | - sudo yum -y update |
|
| 23 | - sudo yum -y install git tmux nvme-cli chrony cronie cronie-anacron jq telnet mailx |
|
| 22 | + sudo dnf -y --best --allowerasing --releasever=latest upgrade |
|
| 23 | + sudo dnf -y install git tmux nvme-cli chrony cronie cronie-anacron jq telnet mailx |
|
| 24 | 24 | # Allow root ssh login with the same key used for the ec2-user for now; |
| 25 | 25 | # later, a cron job will be installed that keeps the /root/authorized_keys file |
| 26 | 26 | # up to date with all landscape managers' public SSH keys |
| ... | ... | @@ -45,6 +45,7 @@ EOF |
| 45 | 45 | # and then move it to the sailing user's .ssh directory |
| 46 | 46 | setup_keys "sailing_server" |
| 47 | 47 | setup_mail_sending |
| 48 | + setup_fail2ban |
|
| 48 | 49 | sudo su - sailing -c "mkdir servers" |
| 49 | 50 | # Force acceptance of sapsailing.com's host key: |
| 50 | 51 | sudo su - sailing -c "ssh -o StrictHostKeyChecking=false trac@sapsailing.com ls" >/dev/null |
wiki/info/landscape/amazon-ec2.md
| ... | ... | @@ -21,7 +21,7 @@ Our default region in AWS EC2 is eu-west-1 (Ireland). Tests are currently run in |
| 21 | 21 | |
| 22 | 22 | In Route53 (the AWS DNS) we have registered the sapsailing.com domain and can manage records for any sub-domains. The "apex" record for sapsailing.com points to a Network Load Balancer (NLB), currently ``NLB-sapsailing-dot-com-f937a5b33246d221.elb.eu-west-1.amazonaws.com``, which does the following things: |
| 23 | 23 | |
| 24 | -* accept SSH connects on port 22; these are forwarded to the internal IP of the web server through the target group ``SSH-to-sapsailing-dot-com-2``, currently with the internal IP target ``172.31.28.212`` |
|
| 24 | +* accept SSH connects on port 22; these are forwarded to the internal IP of the central reverse proxy through the target group ``SSH-to-sapsailing-dot-com-2``; it is important that the target group is configured to preserve client IP addresses; otherwise, the ``fail2ban`` installation on the central reverse proxy would quickly block all SSH traffic, malicious and good, because they all would be identified as having one of the NLB's internal IP addresses as their source IP. |
|
| 25 | 25 | * accept HTTP connections for ``sapsailing.com:80``, forwarding them to the target group ``HTTP-to-sapsailing-dot-com-2`` which is a TCP target group for port 80 with ip-based targets (instance-based was unfortunately not possible for the old ``m3`` instance type of our web server), again pointing to ``172.31.28.212``, the internal IP of our web server |
| 26 | 26 | * accept HTTPS/TLS connections on port 443, using the ACM-managed certificate for ``*.sapsailing.com`` and ``sapsailing.com`` and also forwarding to the ``HTTP-to-sapsailing-dot-com-2`` target group |
| 27 | 27 | * optionally, this NLB could be extended by UDP port mappings in case we see a use case for UDP-based data streams that need forwarding to specific applications, such as the Expedition data typically sent on ports 2010 and following |
| ... | ... | @@ -33,12 +33,12 @@ Further ALBs may exist in addition to the default ALB and the NLB for ``sapsaili |
| 33 | 33 | ### Apache httpd, the central reverse proxy (Webserver) and disposable reverse proxies |
| 34 | 34 | |
| 35 | 35 | A key pillar of our architecture is the central reverse proxy, which handles traffic for the wiki, bugzilla, awstats, releases, p2, Git, jobs, static and is the target of the catch all rule in the Dynamic ALB. |
| 36 | -Any traffic to the Hudson build server subdomain *does not* go through the central webserver. Instead, it gets directed by route 53 to a `DDNSMapped` load balancer (which all route any port 80 traffic to 443), which has a rule pointing to a target group, that contains only the Hudson server. The setup procedure can be found below. |
|
| 36 | +Any traffic to the Hudson build server subdomain *does not* go through the central webserver. Instead, it gets directed by route 53 to a `DNSMapped` load balancer (which all route any port 80 traffic to 443), which has a rule pointing to a target group, that contains only the Hudson server. The setup procedure can be found below. |
|
| 37 | 37 | |
| 38 | -To improve availability and reliability, we have a disposable environment type and AMI. The instances from this AMI are only for serving requests to the archive but are lightweight and can be quickly started and shutdown, using the landscape management console. |
|
| 38 | +To improve availability and reliability, we have a "disposable reverse proxy" environment type and AMI (see ``configuration/environments_scripts/reverse_proxy``). The instances from this AMI are only for serving requests to the archive but are lightweight and can be quickly started and shutdown, using the landscape management console. |
|
| 39 | 39 | |
| 40 | 40 | The IPs for all reverse proxies will automatically be added to ALB target groups with the tag key `allReverseProxies`, including the `CentralWebServerHTTP-Dyn` target group (in the dynamic ALB in eu-west-1) |
| 41 | -and all the `DDNSMapped-x-HTTP` (in all the DDNSMapped servers). These are the target groups for the default rules and it ensures availability to the ARCHIVE especially. |
|
| 41 | +and all the `DDNSMapped-x-HTTP` (in all the DNSMapped ALBs). These are the target groups for the default rules and it ensures availability to the ARCHIVE especially. |
|
| 42 | 42 | Disposables instances are tagged with `DisposableProxy` to indicate it hosts no vital services. `ReverseProxy` also identifies any reverse proxies. The health check for the target groups would change to trigger a script which returns different error codes: healthy/200 if in the same AZ as the archive (or if the failover archive is in use), whilst unhealthy/503 if in different AZs. This will reduce cross-AZ, archive traffic costs, but maintain availability and load balancing. |
| 43 | 43 | |
| 44 | 44 | For security groups of the central reverse proxy, we want Webserver, as well as Reverse Proxy. The disposables just have the latter. |
| ... | ... | @@ -519,13 +519,17 @@ disposables*. It is used to reduce costly cross-AZ traffic between our instances |
| 519 | 519 | |
| 520 | 520 | The general idea of this ALB target group healthcheck, is to make instances healthy only if in the same AZ as the archive (the correct AZ). However, availability takes priority over cost saving, so if there is no healthy instance in the "correct" AZ, the healthcheck returns healthy. |
| 521 | 521 | |
| 522 | -All the target groups, tagged with allReverseProxies, have this healthcheck: |
|
| 522 | +All the target groups, tagged with ``allReverseProxies``, have this healthcheck: |
|
| 523 | 523 | |
| 524 | 524 | ``` |
| 525 | 525 | /cgi-bin/reverseProxyHealthcheck.sh?arn=TARGET_GROUP_ARN |
| 526 | 526 | ``` |
| 527 | 527 | |
| 528 | -The healthcheck works by first checking internal-server-status. If genuinely unhealthy, then unhealthy is returned to the ELB (elastic load balancer) health checker. Otherwise, the instance uses cached CIDR masks (which correspond to AZ definitions) and nmap to check if in the same AZ as the archive. |
|
| 528 | +The script can be found under ``configuration/environments_scripts/repo/var/www/cgi-bin`` to where the environments for ``reverse_proxy`` (the disposables) and ``central_reverse_proxy`` link symbolically. |
|
| 529 | + |
|
| 530 | +The healthcheck works by first checking whether another instance of the healthcheck is already running. The PID of a running process is stored under ``/var/run/reverseProxyHealthcheck`` which is a folder created by a directive in ``/etc/tmpfiles.d/reverseProxyHealthcheck.conf`` upon boot. A healthcheck records its exit status and output in files under ``/var/run/reverseProxyHealthcheck`` which are removed after 10s by a background job again. If a health check is started while another is already running, the new one waits for the exit status of the already running one to be written to ``/var/run/reverseProxyHealthcheck``, then picks up that exit status and output to use it as exit status and output of the health check started concurrently. This way, we avoid congestion and clogging of reverse proxies by health checks which at times may be long-running, especially if the AWS CLI takes unusually long to discover the target groups and target health checks. Without the mechanics that shortcut these concurrent executions, we've see hundreds of concurrently executing health checks, even leading to out-of-memory situations at times. |
|
| 531 | + |
|
| 532 | +Then it checks the ``internal-server-status``. If genuinely unhealthy, then unhealthy is returned to the ELB (elastic load balancer) health checker. Otherwise, the instance uses cached CIDR masks (which correspond to AZ definitions) and nmap to check if in the same AZ as the archive. |
|
| 529 | 533 | If in the same AZ, then "healthy" is returned to the ELB health checker. If not, then the target group ARN, passed as a parameter |
| 530 | 534 | to the healthcheck, is used to get the private IPs of the other instances in the target group, via a describe-target-health call to the AWS API. This is the most costly part of the check, so these values are cached. |
| 531 | 535 |