9da0e8d01d4e84d86e977e856af840f30eacf02b
configuration/on-site-scripts/paris2024/clone-security-service-db-safe-exit
| ... | ... | @@ -4,12 +4,12 @@ logger -t sailing "Cloning security_service DB from eu-west-1 live replica set t |
| 4 | 4 | logger -t sailing "Copying an existing local security_service DB to security_service_bak..." |
| 5 | 5 | cd /tmp |
| 6 | 6 | rm -rf /tmp/dump |
| 7 | -ssh ec2-user@tokyo-ssh.sapsailing.com "set -e; cd /tmp; rm -rf /tmp/dump; mongodump --host live/mongo0.internal.sapsailing.com,mongo1.internal.sapsailing.com,dbserver.internal.sapsailing.com:10203 --db security_service; tar czvpf - dump" | tar xzvpf - && logger -t sailing "mongodump finished with $?. Restoring dump of security_service DB from eu-west-1 locally..." || ( logger -t sailing "SEVERE: mongodump finished with $?. Aborting..."; echo "exiting with code 1"; exit 1 ) |
|
| 7 | +ssh ec2-user@paris-ssh.sapsailing.com "set -e; cd /tmp; rm -rf /tmp/dump; mongodump --host live/mongo0.internal.sapsailing.com,mongo1.internal.sapsailing.com,dbserver.internal.sapsailing.com:10203 --db security_service; tar czvpf - dump" | tar xzvpf - && logger -t sailing "mongodump finished with $?. Restoring dump of security_service DB from eu-west-1 locally..." || ( logger -t sailing "SEVERE: mongodump finished with $?. Aborting..."; echo "exiting with code 1"; exit 1 ) |
|
| 8 | 8 | echo 'use security_service_bak |
| 9 | 9 | db.dropDatabase() |
| 10 | 10 | db.copyDatabase("security_service", "security_service_bak") |
| 11 | 11 | quit()' | mongo "mongodb://localhost/security_service_bak?replicaSet=security_service&retryWrites=true&readPreference=nearest" && logger -t sailing "Succesful, continuing..." || ( logger -t sailing "SEVERE: mongo finished with $?"; exit 1 ) |
| 12 | -mongorestore --drop --host security_service/localhost && logger -t sailing "mongorestore finished with $?. Done cloning security_service DB from eu-west-1 live replica set to local tokyo2020 replica set." || ( logger -t sailing "SEVERE: mongorestore finished with $?. Aborting..."; echo 'use security_service |
|
| 12 | +mongorestore --drop --host security_service/localhost && logger -t sailing "mongorestore finished with $?. Done cloning security_service DB from eu-west-1 live replica set to local paris2024 replica set." || ( logger -t sailing "SEVERE: mongorestore finished with $?. Aborting..."; echo 'use security_service |
|
| 13 | 13 | db.dropDatabase() |
| 14 | 14 | db.copyDatabase("security_service_bak", "security_service") |
| 15 | 15 | quit()' | mongo "mongodb://localhost/security_service_bak?replicaSet=security_service&retryWrites=true&readPreference=nearest"; logger -t sailing "SEVERE: Restored old backup, dropped security_service_bak"; exit 1 ) |
configuration/on-site-scripts/paris2024/get-replica-ips
| ... | ... | @@ -3,6 +3,6 @@ |
| 3 | 3 | IPs="" |
| 4 | 4 | for i in $( cat `dirname $0`/regions.txt ); do |
| 5 | 5 | echo Region: $i >&2 |
| 6 | - IPs="${IPs} $( aws --region $i ec2 describe-instances --filters Name=instance-state-name,Values=running Name=tag:sailing-analytics-server,Values=tokyo2020 | jq .Reservations[].Instances[].PublicIpAddress -r )" |
|
| 6 | + IPs="${IPs} $( aws --region $i ec2 describe-instances --filters Name=instance-state-name,Values=running Name=tag:sailing-analytics-server,Values=paris2024 | jq .Reservations[].Instances[].PublicIpAddress -r )" |
|
| 7 | 7 | done |
| 8 | 8 | echo "${IPs}" |
configuration/on-site-scripts/paris2024/launch-replicas-in-all-regions.sh
| ... | ... | @@ -1,5 +1,5 @@ |
| 1 | 1 | #!/bin/bash |
| 2 | -TARGET_GROUP_NAME=S-ded-tokyo2020 |
|
| 2 | +TARGET_GROUP_NAME=S-paris2024 |
|
| 3 | 3 | |
| 4 | 4 | if [ $# -eq 0 ]; then |
| 5 | 5 | echo "$0 -R <release-name> -b <replication-bearer-token> [-t <instance-type>] [-i <ami-id>] [-k <key-pair-name>]" |
| ... | ... | @@ -13,9 +13,9 @@ if [ $# -eq 0 ]; then |
| 13 | 13 | echo "Example: $0 -b 098toyw098typ9e8/87t9shytp98894y5= -R build-202106041327 -k Jan" |
| 14 | 14 | echo |
| 15 | 15 | echo "Will launch as many new replicas in regions $( cat `dirname $0`/regions.txt ) with the release specified with -R" |
| 16 | - echo "as there are currently healthy auto-replicas registered with the S-ded-tokyo2020 target group in the region (at least one)" |
|
| 17 | - echo "which will register at the master proxy tokyo-ssh.internal.sapsailing.com:8888 and RabbitMQ at" |
|
| 18 | - echo "rabbit-ap-northeast-1.sapsailing.com:5672, then when healthy get added to target group S-ded-tokyo2020" |
|
| 16 | + echo "as there are currently healthy auto-replicas registered with the S-paris2024 target group in the region (at least one)" |
|
| 17 | + echo "which will register at the master proxy paris-ssh.internal.sapsailing.com:8888 and RabbitMQ at" |
|
| 18 | + echo "rabbit-ap-northeast-1.sapsailing.com:5672, then when healthy get added to target group S-paris2024" |
|
| 19 | 19 | echo "in that region, with all auto-replicas registered before removed from the target group." |
| 20 | 20 | exit 2 |
| 21 | 21 | fi |
| ... | ... | @@ -52,7 +52,7 @@ for REGION in $( cat `dirname $0`/regions.txt ); do |
| 52 | 52 | else |
| 53 | 53 | MONGODB_PRIMARY="localhost" |
| 54 | 54 | MONGODB_REPLICA_SET="replica" |
| 55 | - VPC_NAME="Tokyo2020" |
|
| 55 | + VPC_NAME="Paris2024" |
|
| 56 | 56 | fi |
| 57 | 57 | echo "Using MongoDB primary ${MONGODB_PRIMARY} and replica set ${MONGODB_REPLICA_SET}" |
| 58 | 58 | OPTIONS="-g ${REGION} -b ${BEARER_TOKEN} -R ${RELEASE} -p ${MONGODB_PRIMARY} -r ${MONGODB_REPLICA_SET} -v ${VPC_NAME} -c ${HEALTHY_TARGETS_IN_REGION}" |
configuration/on-site-scripts/paris2024/launch-replicas-in-region.sh
| ... | ... | @@ -3,8 +3,8 @@ INSTANCE_TYPE=c5.2xlarge |
| 3 | 3 | REPLICA_SET_NAME=replica |
| 4 | 4 | REPLICA_SET_PRIMARY=localhost |
| 5 | 5 | KEY_NAME=Axel |
| 6 | -VPC=Tokyo2020 |
|
| 7 | -TARGET_GROUP_NAME=S-ded-tokyo2020 |
|
| 6 | +VPC=Paris2024 |
|
| 7 | +TARGET_GROUP_NAME=S-paris2024 |
|
| 8 | 8 | COUNT=1 |
| 9 | 9 | |
| 10 | 10 | if [ $# -eq 0 ]; then |
| ... | ... | @@ -24,8 +24,8 @@ if [ $# -eq 0 ]; then |
| 24 | 24 | echo "Example: $0 -g ap-southeast-2 -b 098toyw098typ9e8/87t9shytp98894y5= -R build-202106041327 -k Jan" |
| 25 | 25 | echo |
| 26 | 26 | echo "Will launch one or more (see -c) new replicas in the AWS region specified with -g with the release specified with -R" |
| 27 | - echo "which will register at the master proxy tokyo-ssh.internal.sapsailing.com:8888 and RabbitMQ at" |
|
| 28 | - echo "rabbit-ap-northeast-1.sapsailing.com:5672, then when healthy get added to target group S-ded-tokyo2020" |
|
| 27 | + echo "which will register at the master proxy paris-ssh.internal.sapsailing.com:8888 and RabbitMQ at" |
|
| 28 | + echo "rabbit-ap-northeast-1.sapsailing.com:5672, then when healthy get added to target group S-paris2024" |
|
| 29 | 29 | echo "in that region, with all auto-replicas registered before removed from the target group." |
| 30 | 30 | echo "Specify -r and -p if you are launching in eu-west-1 because it has a special non-default environment." |
| 31 | 31 | exit 2 |
| ... | ... | @@ -69,17 +69,17 @@ while [ ${i} -lt ${COUNT} ]; do |
| 69 | 69 | SUBNET_ID=$( echo "${SUBNETS}" | jq -r '.Subnets['${SUBNET_INDEX}'].SubnetId' ) |
| 70 | 70 | echo "Launching image with ID ${IMAGE_ID} into subnet #${SUBNET_INDEX} in region ${REGION} with ID ${SUBNET_ID} in VPC ${VPC_ID}" |
| 71 | 71 | PRIVATE_IP_AND_INSTANCE_ID=$( aws --region ${REGION} ec2 run-instances --subnet-id ${SUBNET_ID} --instance-type ${INSTANCE_TYPE} --security-group-ids ${SECURITY_GROUP_ID} --image-id ${IMAGE_ID} --user-data "INSTALL_FROM_RELEASE=${RELEASE} |
| 72 | -SERVER_NAME=tokyo2020 |
|
| 73 | -MONGODB_URI=\"mongodb://${REPLICA_SET_PRIMARY}/tokyo2020-replica?replicaSet=${REPLICA_SET_NAME}&retryWrites=true&readPreference=nearest\" |
|
| 72 | +SERVER_NAME=paris2024 |
|
| 73 | +MONGODB_URI=\"mongodb://${REPLICA_SET_PRIMARY}/paris2024-replica?replicaSet=${REPLICA_SET_NAME}&retryWrites=true&readPreference=nearest\" |
|
| 74 | 74 | USE_ENVIRONMENT=live-replica-server |
| 75 | -REPLICATION_CHANNEL=tokyo2020-replica |
|
| 76 | -REPLICATION_HOST=rabbit-ap-northeast-1.sapsailing.com |
|
| 77 | -REPLICATE_MASTER_SERVLET_HOST=tokyo-ssh.internal.sapsailing.com |
|
| 75 | +REPLICATION_CHANNEL=paris2024-replica |
|
| 76 | +REPLICATION_HOST=rabbit-eu-west-3.sapsailing.com |
|
| 77 | +REPLICATE_MASTER_SERVLET_HOST=paris-ssh.internal.sapsailing.com |
|
| 78 | 78 | REPLICATE_MASTER_SERVLET_PORT=8888 |
| 79 | -REPLICATE_MASTER_EXCHANGE_NAME=tokyo2020 |
|
| 80 | -REPLICATE_MASTER_QUEUE_HOST=rabbit-ap-northeast-1.sapsailing.com |
|
| 79 | +REPLICATE_MASTER_EXCHANGE_NAME=paris2024 |
|
| 80 | +REPLICATE_MASTER_QUEUE_HOST=rabbit-eu-west-3.sapsailing.com |
|
| 81 | 81 | REPLICATE_MASTER_BEARER_TOKEN=${BEARER_TOKEN} |
| 82 | -ADDITIONAL_JAVA_ARGS=\"${ADDITIONAL_JAVA_ARGS} -Dcom.sap.sse.debranding=true\"" --ebs-optimized --key-name $KEY_NAME --tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=SL Tokyo2020 (Upgrade Replica)},{Key=sailing-analytics-server,Value=tokyo2020}]" "ResourceType=volume,Tags=[{Key=Name,Value=SL Tokyo2020 (Upgrade Replica)}]" | jq -r '.Instances[].PrivateIpAddress + " " + .Instances[].InstanceId' ) |
|
| 82 | +ADDITIONAL_JAVA_ARGS=\"${ADDITIONAL_JAVA_ARGS} -Dcom.sap.sse.debranding=true\"" --ebs-optimized --key-name $KEY_NAME --tag-specifications "ResourceType=instance,Tags=[{Key=Name,Value=SL Paris2024 (Upgrade Replica)},{Key=sailing-analytics-server,Value=paris2024}]" "ResourceType=volume,Tags=[{Key=Name,Value=SL Paris2024 (Upgrade Replica)}]" | jq -r '.Instances[].PrivateIpAddress + " " + .Instances[].InstanceId' ) |
|
| 83 | 83 | EXIT_CODE=$? |
| 84 | 84 | if [ "${EXIT_CODE}" != "0" ]; then |
| 85 | 85 | echo "Error launching instance in region ${REGION}. Exiting with status ${EXIT_CODE}" |
| ... | ... | @@ -95,7 +95,7 @@ ADDITIONAL_JAVA_ARGS=\"${ADDITIONAL_JAVA_ARGS} -Dcom.sap.sse.debranding=true\"" |
| 95 | 95 | fi |
| 96 | 96 | # Now wait for those instances launched to become available |
| 97 | 97 | echo "Waiting for instance with private IP ${PRIVATE_IP} in region ${REGION} to become healthy..." |
| 98 | - while ! ssh -A -o StrictHostKeyChecking=no ec2-user@tokyo-ssh.sapsailing.com "ssh -o StrictHostKeyChecking=no root@${PRIVATE_IP} \"cd /home/sailing/servers/tokyo2020; ./status >/dev/null\""; do |
|
| 98 | + while ! ssh -A -o StrictHostKeyChecking=no ec2-user@paris-ssh.sapsailing.com "ssh -o StrictHostKeyChecking=no root@${PRIVATE_IP} \"cd /home/sailing/servers/paris2024; ./status >/dev/null\""; do |
|
| 99 | 99 | echo "${PRIVATE_IP} in region ${REGION} still not healthy. Trying again in 10s..." |
| 100 | 100 | sleep 10 |
| 101 | 101 | done |
configuration/on-site-scripts/paris2024/monitor-mongo-disk-full
| ... | ... | @@ -1,7 +1,7 @@ |
| 1 | 1 | #!/bin/bash |
| 2 | 2 | WARN_THRESHOLD_PERCENT=80 |
| 3 | -mongo_volume_fill=$( ssh ec2-user@tokyo-ssh.sapsailing.com df /var/lib/mongo | awk '{ print $5; }' | tail -n 1 | sed -e 's/%$//' ) |
|
| 3 | +mongo_volume_fill=$( ssh ec2-user@paris-ssh.sapsailing.com df /var/lib/mongo | awk '{ print $5; }' | tail -n 1 | sed -e 's/%$//' ) |
|
| 4 | 4 | if [ ${mongo_volume_fill} -gt ${WARN_THRESHOLD_PERCENT} ]; then |
| 5 | - subjectAndBody="MongoDB volume on tokyo-ssh.sapsailing.com almost full; currently at ${mongo_volume_fill}%" |
|
| 5 | + subjectAndBody="MongoDB volume on paris-ssh.sapsailing.com almost full; currently at ${mongo_volume_fill}%" |
|
| 6 | 6 | echo "${subjectAndBody}" | notify-operators "${subjectAndBody}" |
| 7 | 7 | fi |
configuration/on-site-scripts/paris2024/monitor-mongo-replica-set-delay
| ... | ... | @@ -2,7 +2,7 @@ |
| 2 | 2 | DELAY_FILE=/tmp/mongo-replica-set-delay |
| 3 | 3 | NOTIFYING_THRESHOLD_SECOND_AVERAGE=10 |
| 4 | 4 | echo "rs.printSecondaryReplicationInfo()" | \ |
| 5 | - mongo "mongodb://localhost:10201,localhost:10202,localhost:10203/?replicaSet=tokyo2020&retryWrites=true&readPreference=nearest" | |
|
| 5 | + mongo "mongodb://localhost:10201,localhost:10202,localhost:10203/?replicaSet=paris2024&retryWrites=true&readPreference=nearest" | |
|
| 6 | 6 | grep "\(behind the primary\)" | sed -e 's/^[ \t]*\([0-9]*\) secs.*$/\1/' >>${DELAY_FILE} |
| 7 | 7 | s=0 |
| 8 | 8 | c=0 |
| ... | ... | @@ -13,7 +13,7 @@ done |
| 13 | 13 | q=$(( $s / $c )) |
| 14 | 14 | if [ $q -gt ${NOTIFYING_THRESHOLD_SECOND_AVERAGE} ]; then |
| 15 | 15 | echo "rs.printSecondaryReplicationInfo()" | \ |
| 16 | - mongo "mongodb://localhost:10201/?replicaSet=tokyo2020&retryWrites=true&readPreference=nearest" | \ |
|
| 16 | + mongo "mongodb://localhost:10201/?replicaSet=paris2024&retryWrites=true&readPreference=nearest" | \ |
|
| 17 | 17 | grep "\(^source:\)\|\(syncedTo:\)\|\(behind the primary\)" | \ |
| 18 | 18 | notify-operators "SLOW MONGODB REPLICATION" |
| 19 | 19 | fi |
configuration/on-site-scripts/paris2024/monitor-tokyo2020-reachability-from-regions
| ... | ... | @@ -15,16 +15,16 @@ else |
| 15 | 15 | # Discover replicas |
| 16 | 16 | IPs="" |
| 17 | 17 | for i in $( cat `dirname $0`/regions.txt ); do |
| 18 | - IPs="$( aws --region $i ec2 describe-instances --filters Name=instance-state-name,Values=running Name=tag:sailing-analytics-server,Values=tokyo2020 | jq .Reservations[].Instances[].PublicIpAddress -r )" |
|
| 18 | + IPs="$( aws --region $i ec2 describe-instances --filters Name=instance-state-name,Values=running Name=tag:sailing-analytics-server,Values=paris2024 | jq .Reservations[].Instances[].PublicIpAddress -r )" |
|
| 19 | 19 | if [ -z "${IPs}" ]; then |
| 20 | 20 | echo "Couldn't find a running replica in region $i" >&2 |
| 21 | 21 | message="Couldn't find a running replica in region $i" |
| 22 | 22 | echo "${message}" | notify-operators "${message}" |
| 23 | 23 | else |
| 24 | 24 | read first others <<<"${IPs}" |
| 25 | - if ! ssh -o StrictHostKeyChecking=no root@${first} "curl https://tokyo2020.sapsailing.com/gwt/status 2>/dev/null >/dev/null"; then |
|
| 26 | - echo "Problem reaching tokyo2020.sapsailing.com from instance ${first} in region ${i}" >&2 |
|
| 27 | - message="Problem reaching tokyo2020.sapsailing.com from instance ${first} in region ${i}" |
|
| 25 | + if ! ssh -o StrictHostKeyChecking=no root@${first} "curl https://paris2024.sapsailing.com/gwt/status 2>/dev/null >/dev/null"; then |
|
| 26 | + echo "Problem reaching paris2024.sapsailing.com from instance ${first} in region ${i}" >&2 |
|
| 27 | + message="Problem reaching paris2024.sapsailing.com from instance ${first} in region ${i}" |
|
| 28 | 28 | echo "${message}" | notify-operators "${message}" |
| 29 | 29 | else |
| 30 | 30 | echo "Access from region ${i}, IP ${first} OK." >&2 |
configuration/on-site-scripts/paris2024/sap-p1-1/clone-security-service-db
| ... | ... | @@ -7,10 +7,10 @@ db.copyDatabase("security_service", "security_service_bak") |
| 7 | 7 | quit()' | mongo "mongodb://localhost/security_service_bak?replicaSet=security_service&retryWrites=true&readPreference=nearest" |
| 8 | 8 | cd /tmp |
| 9 | 9 | rm -rf /tmp/dump |
| 10 | -ssh ec2-user@tokyo-ssh.sapsailing.com "cd /tmp; rm -rf /tmp/dump; mongodump --host live/mongo0.internal.sapsailing.com,mongo1.internal.sapsailing.com,dbserver.internal.sapsailing.com:10203 --db security_service; tar czvpf - dump" | tar xzvpf - |
|
| 10 | +ssh ec2-user@paris-ssh.sapsailing.com "cd /tmp; rm -rf /tmp/dump; mongodump --host live/mongo0.internal.sapsailing.com,mongo1.internal.sapsailing.com,dbserver.internal.sapsailing.com:10203 --db security_service; tar czvpf - dump" | tar xzvpf - |
|
| 11 | 11 | logger -t sailing "mongodump finished with $?. Restoring dump of security_service DB from eu-west-1 locally..." |
| 12 | 12 | mongorestore --drop --host security_service/localhost |
| 13 | 13 | mongorestore_exit=$? |
| 14 | 14 | rm -rf /tmp/dump |
| 15 | -logger -t sailing "mongorestore finished with ${mongorestore_exit}. Done cloning security_service DB from eu-west-1 live replica set to local tokyo2020 replica set." |
|
| 15 | +logger -t sailing "mongorestore finished with ${mongorestore_exit}. Done cloning security_service DB from eu-west-1 live replica set to local paris2024 replica set." |
|
| 16 | 16 |
configuration/on-site-scripts/paris2024/sap-p1-1/hosts
| ... | ... | @@ -5,10 +5,10 @@ |
| 5 | 5 | #10.1.3.197 sap-p1-2 sap-p1-2.sapsailing.com |
| 6 | 6 | #10.1.3.197 sap-p1-2 sap-p1-2.sapsailing.com |
| 7 | 7 | #127.0.0.1 osg2020.sapsailing.com |
| 8 | -#127.0.0.1 tokyo2020-master.sapsailing.com |
|
| 8 | +#127.0.0.1 paris2024-master.sapsailing.com |
|
| 9 | 9 | #127.0.0.1 www.sapsailing.com |
| 10 | -# Jump host in Tokyo (ap-northeast-1) with elastic IP |
|
| 11 | -#52.194.91.94 tokyo-ssh.sapsailing.com |
|
| 10 | +# Jump host in Paris (eu-west-3) with elastic IP |
|
| 11 | +#13.39.66.118 paris-ssh.sapsailing.com |
|
| 12 | 12 | # Igtimi primary web server |
| 13 | 13 | #114.23.98.68 www.igtimi.com |
| 14 | 14 | # AWS SMTP server |
configuration/on-site-scripts/paris2024/sap-p1-1/master.conf
| ... | ... | @@ -1,14 +1,14 @@ |
| 1 | 1 | INSTALL_FROM_RELEASE=build-202107291820 |
| 2 | 2 | INSTALL_FROM_SCP_USER_AT_HOST_AND_PORT=sailing@localhost |
| 3 | -SERVER_NAME=tokyo2020 |
|
| 4 | -MONGODB_URI="mongodb://localhost:10201,localhost:10202,localhost:10203/${SERVER_NAME}?replicaSet=tokyo2020&retryWrites=true&readPreference=nearest" |
|
| 3 | +SERVER_NAME=paris2024 |
|
| 4 | +MONGODB_URI="mongodb://localhost:10201,localhost:10202,localhost:10203/${SERVER_NAME}?replicaSet=paris2024&retryWrites=true&readPreference=nearest" |
|
| 5 | 5 | # RabbitMQ in eu-west-1 (rabbit.internal.sapsailing.com) is expected to be found through SSH tunnel on localhost:5675 |
| 6 | 6 | # Replication of shared services from central security-service.sapsailing.com through SSH tunnel 443:security-service.sapsailing.com:443 |
| 7 | 7 | # with a local /etc/hosts entry mapping security-service.sapsailing.com to 127.0.0.1 |
| 8 | 8 | REPLICATE_MASTER_QUEUE_HOST=localhost |
| 9 | 9 | REPLICATE_MASTER_QUEUE_PORT=5675 |
| 10 | 10 | REPLICATE_MASTER_BEARER_TOKEN="..." |
| 11 | -# Outbound replication to RabbitMQ through SSH tunnel with port forward on port 5673, regularly to rabbit-ap-northeast-1.sapsailing.com |
|
| 11 | +# Outbound replication to RabbitMQ through SSH tunnel with port forward on port 5673, regularly to rabbit-eu-west-3.sapsailing.com |
|
| 12 | 12 | # Can be re-mapped to the RabbitMQ running on sap-p1-2 |
| 13 | 13 | REPLICATION_HOST=localhost |
| 14 | 14 | REPLICATION_PORT=5673 |
configuration/on-site-scripts/paris2024/sap-p1-1/security_service.conf
| ... | ... | @@ -1,13 +1,13 @@ |
| 1 | 1 | # This is a configuration for an "emergency" local copy of the security-service.sapsailing.com server. |
| 2 | 2 | # It assumes that a regular back-up of the eu-west-1 "security_service" DB from the "live" replica set |
| 3 | -# has been copied to the local tokyo2020 replica set. Outbound replication is to the RabbitMQ on sap-p1-2. |
|
| 3 | +# has been copied to the local paris2024 replica set. Outbound replication is to the local RabbitMQ. |
|
| 4 | 4 | INSTALL_FROM_RELEASE=build-202107291820 |
| 5 | 5 | INSTALL_FROM_SCP_USER_AT_HOST_AND_PORT=sailing@localhost |
| 6 | 6 | SERVER_NAME=security_service |
| 7 | 7 | SERVER_PORT=8889 |
| 8 | 8 | EXPEDITION_PORT=2011 |
| 9 | 9 | TELNET_PORT=14889 |
| 10 | -MONGODB_URI="mongodb://localhost/${SERVER_NAME}?replicaSet=security_service&retryWrites=true&readPreference=nearest" |
|
| 10 | +MONGODB_URI="mongodb://localhost:10201,localhost:10202,localhost:10203/${SERVER_NAME}?replicaSet=paris2024&retryWrites=true&readPreference=nearest" |
|
| 11 | 11 | # RabbitMQ in eu-west-1 (rabbit.internal.sapsailing.com) is expected to be found through SSH tunnel on localhost:5675 |
| 12 | 12 | # Replication of shared services from central security-service.sapsailing.com through SSH tunnel 443:security-service.sapsailing.com:443 |
| 13 | 13 | # with a local /etc/hosts entry mapping security-service.sapsailing.com to 127.0.0.1 |
configuration/on-site-scripts/paris2024/sap-p1-1/tunnels
| ... | ... | @@ -2,7 +2,7 @@ |
| 2 | 2 | killall autossh |
| 3 | 3 | sleep 2 |
| 4 | 4 | # Tunnel to cloud MongoDB on port 10203 with reverse tunnels for MongoDB on localhost:10201 and through tunnel on localhost:10202 to P1 #2 |
| 5 | -autossh -M 20500 -f -A -N -L 22443:sapsailing.com:443 -L 22222:sapsailing.com:22 -R 9443:localhost:9443 -R '*:8888:localhost:8888' -R 10201:localhost:10201 -L 10203:localhost:10203 -L 5673:rabbit-ap-northeast-1.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -L 443:security-service.sapsailing.com:443 -R 18122:localhost:22 ec2-user@tokyo-ssh.sapsailing.com |
|
| 5 | +autossh -M 20500 -f -A -N -L 22443:sapsailing.com:443 -L 22222:sapsailing.com:22 -R 9443:localhost:9443 -R '*:8888:localhost:8888' -R 10201:localhost:10201 -L 10203:localhost:10203 -L 5673:rabbit-eu-west-3.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -L 443:security-service.sapsailing.com:443 -R 18122:localhost:22 ec2-user@paris-ssh.sapsailing.com |
|
| 6 | 6 | sleep 2 |
| 7 | 7 | # Tunnel to other laptop P1 #2, tunneling to MongoDB running on 10202, mapping to local port 10202 |
| 8 | 8 | autossh -M 20502 -f -A -N -L 10202:localhost:10202 -R 10201:localhost:10201 sailing@sap-p1-2 |
configuration/on-site-scripts/paris2024/sap-p1-1/tunnels-no-internet
| ... | ... | @@ -1,11 +1,11 @@ |
| 1 | 1 | #!/bin/bash |
| 2 | 2 | killall autossh |
| 3 | 3 | sleep 2 |
| 4 | -# With no Internet, tokyo-ssh.sapsailing.com and rabbit-api-northeast-1.sapsailing.com and rabbit.internal.sapsailing.com are unreachable; we'll use |
|
| 4 | +# With no Internet, paris-ssh.sapsailing.com and rabbit-eu-west-3.sapsailing.com and rabbit.internal.sapsailing.com are unreachable; we'll use |
|
| 5 | 5 | # the RabbitMQ on sap-p1-2 instead. Furthermore, security-service.sapsailing.com is then not reachable, and we expect a local backup |
| 6 | 6 | # copy to be launched, reachable through sap-p1-1 (this host)'s NGINX on port 9443. A reverse port forward from sap-p1-2:443 to |
| 7 | 7 | # localhost:9443 is established for access to this security-service.sapsailing.com backup replacement. |
| 8 | -#autossh -M 20500 -f -A -N -R 9443:localhost:9443 -R '*:8888:localhost:8888' -R 10201:localhost:10201 -L 10203:localhost:10203 -L 5673:rabbit-ap-northeast-1.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -L 443:security-service.sapsailing.com:443 -R 18122:localhost:22 ec2-user@tokyo-ssh.sapsailing.com |
|
| 8 | +#autossh -M 20500 -f -A -N -R 9443:localhost:9443 -R '*:8888:localhost:8888' -R 10201:localhost:10201 -L 10203:localhost:10203 -L 5673:rabbit-eu-west-3.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -L 443:security-service.sapsailing.com:443 -R 18122:localhost:22 ec2-user@paris-ssh.sapsailing.com |
|
| 9 | 9 | # Tunnel to other laptop P1 #2, tunneling to MongoDB running on 10202, mapping to local port 10202 |
| 10 | 10 | autossh -M 20502 -f -A -N -L 10202:localhost:10202 -R 10201:localhost:10201 sailing@sap-p1-2 |
| 11 | 11 | sleep 2 |
configuration/on-site-scripts/paris2024/sap-p1-2/hosts
| ... | ... | @@ -3,7 +3,7 @@ |
| 3 | 3 | 127.0.1.1 sap-p1-2 sap-p1-2.sapsailing.com |
| 4 | 4 | 10.1.3.195 sap-p1-1 sap-p1-1.sapsailing.com |
| 5 | 5 | #10.94.81.137 sap-p1-1 sap-p1-1.sapsailing.com |
| 6 | -127.0.0.1 tokyo2020-master.sapsailing.com |
|
| 6 | +127.0.0.1 paris2024-master.sapsailing.com |
|
| 7 | 7 | 127.0.0.1 www.sapsailing.com |
| 8 | 8 | # TracTrac on-site servers |
| 9 | 9 | 10.1.1.104 stso1 |
configuration/on-site-scripts/paris2024/sap-p1-2/master.conf
| ... | ... | @@ -1,15 +1,15 @@ |
| 1 | 1 | INSTALL_FROM_RELEASE=build-202107291820 |
| 2 | 2 | INSTALL_FROM_SCP_USER_AT_HOST_AND_PORT="sailing@sap-p1-1" |
| 3 | 3 | #INSTALL_FROM_SCP_USER_AT_HOST_AND_PORT="sailing@localhost" |
| 4 | -SERVER_NAME=tokyo2020 |
|
| 5 | -MONGODB_URI="mongodb://localhost:10202,localhost:10203/${SERVER_NAME}?replicaSet=tokyo2020&retryWrites=true&readPreference=nearest" |
|
| 4 | +SERVER_NAME=paris2024 |
|
| 5 | +MONGODB_URI="mongodb://localhost:10202,localhost:10203/${SERVER_NAME}?replicaSet=paris2024&retryWrites=true&readPreference=nearest" |
|
| 6 | 6 | # RabbitMQ in eu-west-1 (rabbit.internal.sapsailing.com) is expected to be found through SSH tunnel on localhost:5675 |
| 7 | 7 | # Replication of shared services from central security-service.sapsailing.com through SSH tunnel 443:security-service.sapsailing.com:443 |
| 8 | 8 | # with a local /etc/hosts entry mapping security-service.sapsailing.com to 127.0.0.1 |
| 9 | 9 | REPLICATE_MASTER_QUEUE_HOST=localhost |
| 10 | 10 | REPLICATE_MASTER_QUEUE_PORT=5675 |
| 11 | 11 | REPLICATE_MASTER_BEARER_TOKEN="..." |
| 12 | -# Outbound replication to RabbitMQ through SSH tunnel with port forward on port 5673, regularly to rabbit-ap-northeast-1.sapsailing.com |
|
| 12 | +# Outbound replication to RabbitMQ through SSH tunnel with port forward on port 5673, regularly to rabbit-eu-west-3.sapsailing.com |
|
| 13 | 13 | # Can be re-mapped to the RabbitMQ running on sap-p1-2 |
| 14 | 14 | REPLICATION_HOST=localhost |
| 15 | 15 | REPLICATION_PORT=5673 |
configuration/on-site-scripts/paris2024/sap-p1-2/replica.conf
| ... | ... | @@ -1,11 +1,11 @@ |
| 1 | -# Regular operations; sap-p1-2 replicates sap-p1-1 using the rabbit-ap-northeast-1.sapsailing.com RabbitMQ in the cloud through SSH tunnel. |
|
| 1 | +# Regular operations; sap-p1-2 replicates sap-p1-1 using the rabbit-eu-west-3.sapsailing.com RabbitMQ in the cloud through SSH tunnel. |
|
| 2 | 2 | # Outbound replication, though not expected to become active, goes to a local RabbitMQ |
| 3 | 3 | INSTALL_FROM_RELEASE=build-202107291820 |
| 4 | 4 | INSTALL_FROM_SCP_USER_AT_HOST_AND_PORT="sailing@sap-p1-1" |
| 5 | 5 | #INSTALL_FROM_SCP_USER_AT_HOST_AND_PORT="sailing@localhost" |
| 6 | -SERVER_NAME=tokyo2020 |
|
| 7 | -MONGODB_URI="mongodb://localhost:10201,localhost:10202,localhost:10203/${SERVER_NAME}-replica?replicaSet=tokyo2020&retryWrites=true&readPreference=nearest" |
|
| 8 | -# RabbitMQ in ap-northeast-1 is expected to be found locally on port 5673 |
|
| 6 | +SERVER_NAME=paris2024 |
|
| 7 | +MONGODB_URI="mongodb://localhost:10201,localhost:10202,localhost:10203/${SERVER_NAME}-replica?replicaSet=paris2024&retryWrites=true&readPreference=nearest" |
|
| 8 | +# RabbitMQ in eu-west-3 is expected to be found locally on port 5673 |
|
| 9 | 9 | REPLICATE_MASTER_SERVLET_HOST=sap-p1-1 |
| 10 | 10 | REPLICATE_MASTER_SERVLET_PORT=8888 |
| 11 | 11 | REPLICATE_MASTER_QUEUE_HOST=localhost |
configuration/on-site-scripts/paris2024/sap-p1-2/security_service.conf
| ... | ... | @@ -1,6 +1,6 @@ |
| 1 | 1 | # This is a configuration for an "emergency" local copy of the security-service.sapsailing.com server. |
| 2 | 2 | # It assumes that a regular back-up of the eu-west-1 "security_service" DB from the "live" replica set |
| 3 | -# has been copied to the local tokyo2020 replica set. Outbound replication is to the RabbitMQ on sap-p1-2. |
|
| 3 | +# has been copied to the local paris2024 replica set. Outbound replication is to the RabbitMQ on sap-p1-2. |
|
| 4 | 4 | INSTALL_FROM_RELEASE=build-202107291820 |
| 5 | 5 | INSTALL_FROM_SCP_USER_AT_HOST_AND_PORT=sailing@sap-p1-1 |
| 6 | 6 | SERVER_NAME=security_service |
configuration/on-site-scripts/paris2024/sap-p1-2/tunnels
| ... | ... | @@ -2,7 +2,7 @@ |
| 2 | 2 | killall autossh |
| 3 | 3 | sleep 2 |
| 4 | 4 | # Tunnel to cloud MongoDB on port 10203; reverse tunnel to localhost:10202 for MongoDB |
| 5 | -autossh -M 21504 -f -A -N -L 22222:sapsailing.com:22 -R 9444:localhost:9443 -L 443:security-service.sapsailing.com:443 -R 10202:localhost:10202 -L 10203:localhost:10203 -L 5673:rabbit-ap-northeast-1.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -R 18222:localhost:22 ec2-user@tokyo-ssh.sapsailing.com |
|
| 5 | +autossh -M 21504 -f -A -N -L 22222:sapsailing.com:22 -R 9444:localhost:9443 -L 443:security-service.sapsailing.com:443 -R 10202:localhost:10202 -L 10203:localhost:10203 -L 5673:rabbit-eu-west-3.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -R 18222:localhost:22 ec2-user@paris-ssh.sapsailing.com |
|
| 6 | 6 | # The tunnel connection to sap-p1-1 is expected to be established from the sap-p1-1 side by a reverse port forward |
| 7 | 7 | # Tunnel to localhost for TracTrac A port forwards |
| 8 | 8 | tractrac-A |
configuration/on-site-scripts/paris2024/sap-p1-2/tunnels-master
| ... | ... | @@ -1,7 +1,7 @@ |
| 1 | 1 | #!/bin/bash |
| 2 | 2 | killall autossh |
| 3 | 3 | # Tunnel to cloud MongoDB on port 10203; reverse tunnel to localhost:10202 for MongoDB |
| 4 | -autossh -M 21504 -f -A -N -L 22443:sapsailing.com:443 -L 22222:sapsailing.com:22 -R 9443:localhost:9443 -R '*:8888:localhost:8888' -L 443:security-service.sapsailing.com:443 -R 10202:localhost:10202 -L 10203:localhost:10203 -L 5673:rabbit-ap-northeast-1.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -R 18222:localhost:22 ec2-user@tokyo-ssh.sapsailing.com |
|
| 4 | +autossh -M 21504 -f -A -N -L 22443:sapsailing.com:443 -L 22222:sapsailing.com:22 -R 9443:localhost:9443 -R '*:8888:localhost:8888' -L 443:security-service.sapsailing.com:443 -R 10202:localhost:10202 -L 10203:localhost:10203 -L 5673:rabbit-eu-west-3.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -R 18222:localhost:22 ec2-user@paris-ssh.sapsailing.com |
|
| 5 | 5 | sleep 2 |
| 6 | 6 | # Tunnel to localhost for TracTrac port forwards |
| 7 | 7 | tractrac-A |
configuration/on-site-scripts/paris2024/sap-p1-2/tunnels-master-tractrac-failover
| ... | ... | @@ -1,5 +1,5 @@ |
| 1 | 1 | #!/bin/bash |
| 2 | 2 | killall autossh |
| 3 | 3 | # Tunnel to cloud MongoDB on port 10203; reverse tunnel to localhost:10202 for MongoDB |
| 4 | -autossh -M 21504 -f -A -N -L 22443:sapsailing.com:443 -L 22222:sapsailing.com:22 -L 443:security-service.sapsailing.com:443 -R 10202:localhost:10202 -L 10203:localhost:10203 -L 5673:rabbit-ap-northeast-1.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -R 18222:localhost:22 ec2-user@tokyo-ssh.sapsailing.com |
|
| 4 | +autossh -M 21504 -f -A -N -L 22443:sapsailing.com:443 -L 22222:sapsailing.com:22 -L 443:security-service.sapsailing.com:443 -R 10202:localhost:10202 -L 10203:localhost:10203 -L 5673:rabbit-eu-west-3.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -R 18222:localhost:22 ec2-user@paris-ssh.sapsailing.com |
|
| 5 | 5 | # The tunnel connection to sap-p1-1 is expected to be established from the sap-p1-1 side by a reverse port forward |
configuration/on-site-scripts/paris2024/sap-p1-2/tunnels-no-internet
| ... | ... | @@ -1,16 +1,16 @@ |
| 1 | 1 | #!/bin/bash |
| 2 | 2 | killall autossh |
| 3 | 3 | sleep 2 |
| 4 | -# No point in trying to reach tokyo-ssh.sapsailing.com without Internet connection; |
|
| 4 | +# No point in trying to reach paris-ssh.sapsailing.com without Internet connection; |
|
| 5 | 5 | # Instead, sap-p1-1 and sap-p1-2 are expected to use sap-p1-2's RabbitMQ through port 5673; |
| 6 | 6 | # Local port 443 which is expected to reach security-service.sapsailing.com:443 needs to be |
| 7 | 7 | # diverted to a non-existing port so that outbound replication operations will fail. |
| 8 | 8 | # If this server (sap-p1-2) needs to run as master in this scenario it cannot replicate |
| 9 | 9 | # the SecurityService successfully anymore. If the application then has to be restarted, |
| 10 | 10 | # it requires a configuration that loads the relevant security information locally. |
| 11 | -#autossh -M 20504 -f -A -N -L 443:security-service.sapsailing.com:443 -R 10202:localhost:10202 -L 10203:localhost:10203 -L 5673:rabbit-ap-northeast-1.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -R 18222:localhost:22 ec2-user@tokyo-ssh.sapsailing.com |
|
| 11 | +#autossh -M 20504 -f -A -N -L 443:security-service.sapsailing.com:443 -R 10202:localhost:10202 -L 10203:localhost:10203 -L 5673:rabbit-eu-west-3.sapsailing.com:5672 -L 15673:localhost:15673 -L 5675:rabbit.internal.sapsailing.com:5672 -L 15675:rabbit.internal.sapsailing.com:15672 -R 18222:localhost:22 ec2-user@paris-ssh.sapsailing.com |
|
| 12 | 12 | # The tunnel connection to sap-p1-1 is expected to be established from the sap-p1-1 side by a reverse port forward |
| 13 | -# However, the RabbitMQ forwards to emulate rabbit.internal.sapsailing.com (eu-west-1) and rabbit-ap-northeast-1.sapsailing.com |
|
| 13 | +# However, the RabbitMQ forwards to emulate rabbit.internal.sapsailing.com (eu-west-1) and rabbit-eu-west-3.sapsailing.com |
|
| 14 | 14 | # with the local RabbitMQ need to go through a localhost port forward. Furthermore, the security-service.sapsailing.com tunnel |
| 15 | 15 | # is re-directed to sap-p1-1's NGINX, assuming it has a replacement running if Internet fails. |
| 16 | 16 | autossh -M 21504 -f -A -N -L 5673:sap-p1-1:5672 -L 15673:sap-p1-1:15672 -L 5675:sap-p1-1:5672 -L 15675:sap-p1-1:15672 sap-p1-1 |
configuration/on-site-scripts/paris2024/stop-all-cloud-replicas.sh
| ... | ... | @@ -8,7 +8,7 @@ if [ $# -eq 0 ]; then |
| 8 | 8 | echo |
| 9 | 9 | echo "Will tell all replicas in the cloud to stop replicating. This works by invoking the" |
| 10 | 10 | echo "get-replica-ips script and for each of them to stop replicating, using the stopReplicating.sh" |
| 11 | - echo "script in their /home/sailing/servers/tokyo2020 directory, passing through the bearer token." |
|
| 11 | + echo "script in their /home/sailing/servers/paris2024 directory, passing through the bearer token." |
|
| 12 | 12 | echo "Note: this will NOT stop replication on the local replica on sap-p1-2!" |
| 13 | 13 | exit 2 |
| 14 | 14 | fi |
| ... | ... | @@ -22,5 +22,5 @@ do |
| 22 | 22 | esac |
| 23 | 23 | done |
| 24 | 24 | for i in `./get-replica-ips`; do |
| 25 | - ssh -o StrictHostKeyChecking=no root@$i "su - sailing -c \"cd /home/sailing/servers/tokyo2020; ./stopReplicating.sh ${BEARER_TOKEN}\"" |
|
| 25 | + ssh -o StrictHostKeyChecking=no root@$i "su - sailing -c \"cd /home/sailing/servers/paris2024; ./stopReplicating.sh ${BEARER_TOKEN}\"" |
|
| 26 | 26 | done |
configuration/on-site-scripts/paris2024/update-launch-configuration.sh
| ... | ... | @@ -1,6 +1,6 @@ |
| 1 | 1 | #!/bin/bash |
| 2 | -LAUNCH_CONFIGURATION_NAME_PATTERN="^tokyo2020-.*" |
|
| 3 | -AUTO_SCALING_GROUP_NAME_PATTERN="^tokyo2020.*" |
|
| 2 | +LAUNCH_CONFIGURATION_NAME_PATTERN="^paris2024-.*" |
|
| 3 | +AUTO_SCALING_GROUP_NAME_PATTERN="^paris2024.*" |
|
| 4 | 4 | KEY_NAME=Axel |
| 5 | 5 | |
| 6 | 6 | if [ $# -eq 0 ]; then |
| ... | ... | @@ -13,9 +13,9 @@ if [ $# -eq 0 ]; then |
| 13 | 13 | echo |
| 14 | 14 | echo "Example: $0 -R build-202106041327 -k Jan" |
| 15 | 15 | echo |
| 16 | - echo "Will upgrade the auto-scaling group tokyo2020-* in the regions from regions.txt with a new" |
|
| 17 | - echo "launch configuration that will be derived from the existing launch configuration named tokyo2020-*" |
|
| 18 | - echo "by copying it to tokyo2020-{RELEASE_NAME} while updating the INSTALL_FROM_RELEASE parameter in the" |
|
| 16 | + echo "Will upgrade the auto-scaling group paris2024-* in the regions from regions.txt with a new" |
|
| 17 | + echo "launch configuration that will be derived from the existing launch configuration named paris2024-*" |
|
| 18 | + echo "by copying it to paris2024-{RELEASE_NAME} while updating the INSTALL_FROM_RELEASE parameter in the" |
|
| 19 | 19 | echo "user data to the {RELEASE_NAME}, and optionally adjusting the AMI, key pair name and instance type if specified." |
| 20 | 20 | echo "Note: this will NOT terminate any instances in the target group!" |
| 21 | 21 | exit 2 |
| ... | ... | @@ -53,7 +53,7 @@ for REGION in $( cat `dirname $0`/regions.txt ); do |
| 53 | 53 | SECURITY_GROUP=$( echo "${LAUNCH_CONFIGURATION_JSON}" | jq -r '.SecurityGroups[0]' ) |
| 54 | 54 | BLOCK_DEVICE_MAPPINGS="$( echo "${LAUNCH_CONFIGURATION_JSON}" | jq -r '.BlockDeviceMappings' )" |
| 55 | 55 | NEW_USER_DATA=$( echo "${OLD_USER_DATA}" | sed -e 's/^INSTALL_FROM_RELEASE=.*$/INSTALL_FROM_RELEASE='${RELEASE}'/' ) |
| 56 | - NEW_LAUNCH_CONFIGURATION_NAME=tokyo2020-${RELEASE} |
|
| 56 | + NEW_LAUNCH_CONFIGURATION_NAME=paris2024-${RELEASE} |
|
| 57 | 57 | echo "Creating new launch configuration ${NEW_LAUNCH_CONFIGURATION_NAME}" |
| 58 | 58 | aws autoscaling create-launch-configuration --launch-configuration-name ${NEW_LAUNCH_CONFIGURATION_NAME} --image-id ${REGIONAL_IMAGE_ID} --key-name ${KEY_NAME} --security-groups ${SECURITY_GROUP} --user-data "${NEW_USER_DATA}" --instance-type ${REGIONAL_INSTANCE_TYPE} --block-device-mappings "${BLOCK_DEVICE_MAPPINGS}" |
| 59 | 59 | EXIT_CODE=$? |
configuration/on-site-scripts/paris2024/upgrade-landscape.sh
| ... | ... | @@ -11,12 +11,12 @@ |
| 11 | 11 | # - wait until master is healthy |
| 12 | 12 | # - on sap-p1-2:servers/replica run ./stop; ./start to bring up on-site replica again |
| 13 | 13 | # - launch upgraded cloud replicas and replace old replicas in target group (launch-replicas-in-all-regions.sh) |
| 14 | -# - terminate all instances named "SL Tokyo2020 (auto-replica)"; this should cause the auto-scaling group to launch new instances as required |
|
| 15 | -# - manually inspect the health of everything and terminate the "SL Tokyo2020 (Upgrade Replica)" instances when enough new instances |
|
| 16 | -# named "SL Tokyo2020 (auto-replica)" are available |
|
| 14 | +# - terminate all instances named "SL Paris2024 (auto-replica)"; this should cause the auto-scaling group to launch new instances as required |
|
| 15 | +# - manually inspect the health of everything and terminate the "SL Paris2024 (Upgrade Replica)" instances when enough new instances |
|
| 16 | +# named "SL Paris2024 (auto-replica)" are available |
|
| 17 | 17 | # |
| 18 | 18 | KEY_NAME=Axel |
| 19 | -INSTANCE_NAME_TO_TERMINATE="SL Tokyo2020 (auto-replica)" |
|
| 19 | +INSTANCE_NAME_TO_TERMINATE="SL Paris2024 (auto-replica)" |
|
| 20 | 20 | if [ $# -eq 0 ]; then |
| 21 | 21 | echo "$0 -R <release-name> -b <replication-bearer-token> [-t <instance-type>] [-i <ami-id>] [-k <key-pair-name>] [-s]" |
| 22 | 22 | echo "" |
| ... | ... | @@ -40,9 +40,9 @@ if [ $# -eq 0 ]; then |
| 40 | 40 | echo " - wait until master is healthy" |
| 41 | 41 | echo " - on sap-p1-2:servers/replica run ./stop; ./start to bring up on-site replica again" |
| 42 | 42 | echo " - launch upgraded cloud replicas and replace old replicas in target group (launch-replicas-in-all-regions.sh)" |
| 43 | - echo " - terminate all instances named \"SL Tokyo2020 (auto-replica)\"; this should cause the auto-scaling group to launch new instances as required" |
|
| 44 | - echo " - manually inspect the health of everything and terminate the \"SL Tokyo2020 (Upgrade Replica)\" instances when enough new instances" |
|
| 45 | - echo " named \"SL Tokyo2020 (auto-replica)\" are available" |
|
| 43 | + echo " - terminate all instances named \"SL Paris2024 (auto-replica)\"; this should cause the auto-scaling group to launch new instances as required" |
|
| 44 | + echo " - manually inspect the health of everything and terminate the \"SL Paris2024 (Upgrade Replica)\" instances when enough new instances" |
|
| 45 | + echo " named \"SL Paris2024 (auto-replica)\" are available" |
|
| 46 | 46 | exit 2 |
| 47 | 47 | fi |
| 48 | 48 | options='R:b:t:i:k:s' |
| ... | ... | @@ -124,7 +124,7 @@ if [ "${EXIT_CODE}" != "0" ]; then |
| 124 | 124 | echo "Re-launching replica on sap-p1-2 failed with exit code ${EXIT_CODE}" |
| 125 | 125 | exit ${EXIT_CODE} |
| 126 | 126 | fi |
| 127 | -echo " * Launching upgraded replicas SL Tokyo2020 (Upgrade Replica) in the regions" |
|
| 127 | +echo " * Launching upgraded replicas SL Paris2024 (Upgrade Replica) in the regions" |
|
| 128 | 128 | OPTIONS="-b ${BEARER_TOKEN} -R ${RELEASE}" |
| 129 | 129 | if [ -n "${IMAGE_ID}" ]; then |
| 130 | 130 | OPTIONS="${OPTIONS} -i ${IMAGE_ID}" |
configuration/on-site-scripts/paris2024/wait-for-new-auto-replicas-and-terminate-upgrade-replicas.sh
| ... | ... | @@ -1,8 +1,8 @@ |
| 1 | 1 | #!/bin/bash |
| 2 | -VPC="Tokyo2020" |
|
| 3 | -TARGET_GROUP_NAME="S-ded-tokyo2020" |
|
| 4 | -UPGRADE_REPLICA_NAME="SL Tokyo2020 (Upgrade Replica)" |
|
| 5 | -AUTO_REPLICA_NAME="SL Tokyo2020 (auto-replica)" |
|
| 2 | +VPC="Paris2024" |
|
| 3 | +TARGET_GROUP_NAME="S-paris2024" |
|
| 4 | +UPGRADE_REPLICA_NAME="SL Paris2024 (Upgrade Replica)" |
|
| 5 | +AUTO_REPLICA_NAME="SL Paris2024 (auto-replica)" |
|
| 6 | 6 | |
| 7 | 7 | if [ $# -eq 0 ]; then |
| 8 | 8 | echo "$0 [ -g <AWS-region> ]" |
wiki/info/landscape/paris2024/olympic-plan-for-paris-marseille-2024.md
| ... | ... | @@ -30,7 +30,7 @@ We will install a cron job that regularly performs a "compareServers" between pr |
| 30 | 30 | |
| 31 | 31 | ## Cloud RabbitMQ |
| 32 | 32 | |
| 33 | -Instead of ``rabbit-ap-northeast-1.sapsailing.com`` we will use ``rabbit-eu-west-3.sapsailing.com`` pointing to the internal IP address of the RabbitMQ installation in ``eu-west-3`` that is used as the default for the on-site master processes as well as for all cloud replicas. |
|
| 33 | +We will use ``rabbit-eu-west-3.sapsailing.com`` pointing to the internal IP address of the RabbitMQ installation in ``eu-west-3`` that is used as the default for the on-site master processes as well as for all cloud replicas. |
|
| 34 | 34 | |
| 35 | 35 | ## ALB and Target Group Set-Up |
| 36 | 36 | |
| ... | ... | @@ -118,7 +118,39 @@ This makes for the following set-up: |
| 118 | 118 | |
| 119 | 119 | ### Internet Failure Using Shadow Master |
| 120 | 120 | |
| 121 | +TODO |
|
| 121 | 122 | |
| 123 | +## Checklist After Event |
|
| 124 | + |
|
| 125 | +The experience during "Tokyo 2020" has shown that after the last race of the last day everybody gets in a rush, and the on-site infrastructure starts to get dismantled quickly. For us this means that we need to prepare well for switching to cloud-only operations. The approach in Enoshima worked well, although we were caught a bit by surprise regarding the speed at which infrastructure was taken down. |
|
| 126 | + |
|
| 127 | +### Cleanly Remove On-Site MongoDB Replicas from ``paris2024`` MongoDB Replica Set |
|
| 128 | + |
|
| 129 | +Connecting to the ``paris2024`` MongoDB replica set, first we need to make sure that the cloud replica can become primary. The production configuration was such that by assigning a priority and votes of 0 the cloud replica never would become primary. Now it shall, so we need to change its priority and votes value in the configuration first. For this, issue the following command in the MongoDB shell while connected to the ``paris2024`` replica set: |
|
| 130 | + |
|
| 131 | +``` |
|
| 132 | + cfg=rs.config() |
|
| 133 | +``` |
|
| 134 | + |
|
| 135 | +Then find the member using port number ``10203`` which is the cloud replica. Typically, this would be the first element (index 0) in the ``members`` array of the ``cfg`` object. Assuming it *is* at index 0, issue the following commands (replacing the 0 index by the actual index of the ``10203`` port member): |
|
| 136 | + |
|
| 137 | +``` |
|
| 138 | + cfg.members[0].priority=1 |
|
| 139 | + cfg.members[0].votes=1 |
|
| 140 | + rs.reconfig() |
|
| 141 | + rs.remove("localhost:10201") |
|
| 142 | + rs.remove("localhost:10202") |
|
| 143 | +``` |
|
| 144 | + |
|
| 145 | +This will make the MongoDB cloud replica running on ``paris-ssh.sapsailing.com`` the single primary of the now single-element replica set. The MongoDB processes running on the on-site laptops can then be stopped. |
|
| 146 | + |
|
| 147 | +### Stop Replication in Cloud Replicas |
|
| 148 | + |
|
| 149 | +Then, all cloud replicas need to stop replicating because soon the on-site master will be stopped. See script ``configuration/on-site-scripts/paris2024/stop-all-cloud-replicas.sh``. |
|
| 150 | + |
|
| 151 | +### Stop On-Site Master and Launch Cloud Master on ``paris-ssh.sapsailingcom`` |
|
| 152 | + |
|
| 153 | +Next, an application master for the ``paris2024`` application replica set needs to be launched on ``paris-ssh.sapsailing.com``. It uses the MongoDB URI ``mongodb://localhost:10203/paris2024?replicaSet=paris2024&retryWrites=true&readPreference=nearest``, hence connecting to the single-instance MongoDB "replica set" running on the same host. Other than this the instance uses a standard configuration for a live master. This configuration can already be prepared before the event. All that then needs to be done is to adjust the release to the one that all cloud replicas are using. |
|
| 122 | 154 | |
| 123 | 155 | ## Test Plan for Test Event Marseille July 2023 |
| 124 | 156 | |
| ... | ... | @@ -134,7 +166,7 @@ This will require switching entirely to the shadow master. Depending on the stat |
| 134 | 166 | |
| 135 | 167 | This can be caused by a deadlock, VM crash, Full GC phase, massive performance degradation or other faulty behavior. We then need to actively close the reverse SSH port forward from the cloud to the production master's 8888 HTTP port, as a precaution switch the RabbitMQ tunnel from the cloud-based to the local RabbitMQ instance so that in case the production master "wakes up" again, e.g., after a Full GC, it does not start to interfere with the now active shadow master on the RabbitMQ fan-out exchange. On the shadow master we need to re-configure the SSH tunnels, particularly to target the cloud-based RabbitMQ and have the reverse port forward on port 8888 target the shadow master on site now. |
| 136 | 168 | |
| 137 | -### Test Primary Mater Failures with no Internet Connection |
|
| 169 | +### Test Primary Master Failures with no Internet Connection |
|
| 138 | 170 | |
| 139 | 171 | Combine the above scenarios: a failing production master (hardware or VM-only) will require different tunnel re-configurations, especially regarding the then local security-service.sapsailing.com environment which may need to move to the shadow laptop. |
| 140 | 172 |
wiki/info/landscape/paris2024/olympic-setup.md
| ... | ... | @@ -0,0 +1,721 @@ |
| 1 | +# Setup for the Olympic Summer Games 2024 Paris/Marseille |
|
| 2 | + |
|
| 3 | +[[_TOC_]] |
|
| 4 | + |
|
| 5 | +## Local Installation |
|
| 6 | + |
|
| 7 | +For the Olympic Summer Games 2024 Paris/Marseille we use a dedicated hardware set-up to accommodate the requirements on site. In particular, two Lenovo P1 laptops with similar hardware configuration (32GB RAM, Intel Core i9-9880H) will be established as server devices running various services in a way that we can tolerate, with minimal downtimes, failures of either of the two devices. One is the old ``sap-p1-1`` that was already used for the "Paris 2024" event; the other one will be a newly ordered one (we already have the approval). |
|
| 8 | + |
|
| 9 | +### Installation Packages |
|
| 10 | + |
|
| 11 | +The two laptops run Mint Linux with a fairly modern 5.4 kernel. We keep both up to date with regular ``apt-get update && apt-get upgrade`` executions. Both have an up-to-date SAP JVM 8 (see [https://tools.hana.ondemand.com/#cloud](https://tools.hana.ondemand.com/#cloud)) installed under /opt/sapjvm_8. This is the runtime VM used to run the Java application server process. |
|
| 12 | + |
|
| 13 | +Furthermore, both laptops have a MongoDB 4.4 installation configured through ``/etc/apt/sources.list.d/mongodb-org-4.4.list`` containing the line ``deb http://repo.mongodb.org/apt/debian jessie/mongodb-org/4.4 main``. Their respective configuration can be found under ``/etc/mongod.conf``. The WiredTiger storage engine cache size should be limited. Currently, the following entry in ``/etc/mongod.conf`` does this. |
|
| 14 | + |
|
| 15 | +RabbitMQ is part of the distribution natively. It runs on both laptops. Both, RabbitMQ and MongoDB are installed as systemd service units and are launched during the boot sequence. The latest GWT version (currently our own fork, 2.11.0) is installed from [https://static.sapsailing.com/wt-2.11.0.zip](https://static.sapsailing.com/wt-2.11.0.zip) under ``/opt/gwt-2.11.0`` in case any development work would need to be done on these machines. |
|
| 16 | + |
|
| 17 | +Both machines have been configured to use 2GB of swap space at ``/swapfile``. |
|
| 18 | + |
|
| 19 | +### Mongo Configuration |
|
| 20 | + |
|
| 21 | +On both laptops, the ``/etc/mongod.conf`` configuration configures ``/var/lib/mongodb`` to be the storage directory, and the in-memory cache size to be 2GB: |
|
| 22 | + |
|
| 23 | +``` |
|
| 24 | +storage: |
|
| 25 | + dbPath: /var/lib/mongodb |
|
| 26 | + journal: |
|
| 27 | + enabled: true |
|
| 28 | + wiredTiger: |
|
| 29 | + engineConfig: |
|
| 30 | + cacheSizeGB: 2 |
|
| 31 | +``` |
|
| 32 | + |
|
| 33 | +The port is set to ``10201`` on ``sap-p1-1``: |
|
| 34 | + |
|
| 35 | +``` |
|
| 36 | +# network interfaces |
|
| 37 | +net: |
|
| 38 | + port: 10201 |
|
| 39 | + bindIp: 0.0.0.0 |
|
| 40 | +``` |
|
| 41 | + |
|
| 42 | +and to ``10202`` on ``sap-p1-2``: |
|
| 43 | + |
|
| 44 | +``` |
|
| 45 | +# network interfaces |
|
| 46 | +net: |
|
| 47 | + port: 10202 |
|
| 48 | + bindIp: 0.0.0.0 |
|
| 49 | +``` |
|
| 50 | + |
|
| 51 | +Furthermore, the replica set is configured to be ``paris2024`` on both: |
|
| 52 | + |
|
| 53 | +``` |
|
| 54 | +replication: |
|
| 55 | + oplogSizeMB: 10000 |
|
| 56 | + replSetName: paris2024 |
|
| 57 | +``` |
|
| 58 | + |
|
| 59 | +For "Paris 2024" we configured yet another MongoDB replica set that consisted only of the two on-site nodes and where we stored the backup copy of the ``security_service`` database. We should, however, be able to store the ``security_service`` DB backup in the same replica set of which the two local nodes with their MongoDB processes listening on ports ``10201/10202``. The ``security_service`` database is used as the target for a backup script for the ``security_service`` database. See below. We increased the priority of the ``sap-p1-1`` node from 1 to 2. |
|
| 60 | + |
|
| 61 | +### User Accounts |
|
| 62 | + |
|
| 63 | +The essential user account on both laptops is ``sailing``. The account is intended to be used for running the Java VM that executes the SAP Sailing Analytics server software. The account is currently still protected by a password that our on-site team should know. On both laptops the ``sailing`` account has a password-less SSH key installed under ``/home/sailing/.ssh`` that is contained in the ``known_hosts`` file of ``paris-ssh.sapsailing.com`` as well as the mutually other P1 laptop. This way, all tunnels can easily be created once logged on to this ``sailing`` account. |
|
| 64 | + |
|
| 65 | +There are also still two personal accounts ``uhl`` and ``tim`` and an Eclipse development environment under ``/usr/local/eclipse``. |
|
| 66 | + |
|
| 67 | +### Hostnames |
|
| 68 | + |
|
| 69 | +DNS is available on site on the gateway host ``10.1.0.6``. This is essential for resolving ``www.igtimi.com``, the AWS SES SMTP server at ``email-smtp.eu-west-1.amazonaws.com`` and all e-mail address's domains for sendmail's domain verification. The DNS server is set for both, ``sap-p1-1`` and ``sap-p1-2``. It can be set from the command line using ``nmcli connection modify Wired\ connection\ 2 ipv4.dns "10.1.0.6"; nmcli connection down Wired\ connection\ 2; nmcli connection up Wired\ connection\ 2``. Currently, when testing in the SAP facilities with the SAP Guest WiFi, possibly changing IP addresses have to be updated in ``/etc/hosts``. |
|
| 70 | + |
|
| 71 | +The domain name has been set to ``sapsailing.com`` so that the fully-qualified host names are ``sap-p1-1.sapsailing.com`` and ``sap-p1-2.sapsailing.com`` respectively. Using this domain name is helpful later when it comes to the shared security realm established with the central ``security-service.sapsailing.com`` replica set. |
|
| 72 | + |
|
| 73 | +The hostname ``www.sapsailing.com`` is required by master instances when connected to the Internet in order to download polar data and wind estimation data from the archive server. Since direct access to ``www.sapsailing.com`` is blocked, we run this through the SSH tunnel to our jump host; in order to have matching certificates and appropriate hostname-based routing in the cloud for requests to ``www.sapsailing.com`` we alias this hostname in ``/etc/hosts`` to ``127.0.0.1`` (localhost). |
|
| 74 | + |
|
| 75 | +### IP Addresses and VPN |
|
| 76 | + |
|
| 77 | +Here are the IP addresses as indicated by SwissTiming: |
|
| 78 | + |
|
| 79 | +``` |
|
| 80 | +Host Internal IP VPN IP |
|
| 81 | +----------------------------------------------------------------------------------------- |
|
| 82 | +TracTrac A (Linux) 10.1.1.104 10.8.0.128 STSP-SAL_client28 |
|
| 83 | +TracTrac B (Linux) 10.1.1.105 10.8.0.129 STSP-SAL_client29 |
|
| 84 | +SAP Analytics 1 Server A (Linux) 10.1.3.195 10.8.0.130 STSP-SAL_client30 |
|
| 85 | +SAP Analytics 2 Server B (Linux) 10.1.3.197 10.8.0.131 |
|
| 86 | +SAP Client Jan (Windows) 10.1.3.220 10.8.0.132 |
|
| 87 | +SAP Client Alexandro (Windows) 10.1.3.221 10.8.0.133 |
|
| 88 | +SAP Client Axel (Windows) 10.1.3.227 10.8.0.134 |
|
| 89 | +TracTrac Dev Jorge (Linux) 10.1.3.228 10.8.0.135 |
|
| 90 | +TracTrac Dev Chris (Linux) 10.1.3.233 10.8.0.136 |
|
| 91 | +``` |
|
| 92 | + |
|
| 93 | +The OpenVPN connection is set up with the GUI of the Linux Desktop. Therefore the management is done through Network Manager. Network Manager has a CLI, ``nmcli``. With that more properties of connections can be modified. The ``connection.secondaries`` property defines the UUID of a connection that will be established as soon as the initial connection is working. With ``nmcli connection show`` you will get the list of connections with the corresponding UUIDs. For the Medemblik Event the OpenVPN connection to the A server is bound to the wired interface that is used with |
|
| 94 | + |
|
| 95 | +``` |
|
| 96 | +sudo nmcli connection modify <Wired Connection 2> +connection.secondaries <UUID-of-OpenVPN-A> |
|
| 97 | +``` |
|
| 98 | + |
|
| 99 | +For the OpenVPN connections we have received two alternative configuration files together with keys and certificates for our server and work laptops, as well as the certificates for the OpenVPN server (``ca.crt``, ``dh.pem``, ``pfs.key``). The "A" configuration, e.g., provided in a file named ``st-soft-aws_A.ovpn``, looks like this: |
|
| 100 | + |
|
| 101 | +``` |
|
| 102 | +client |
|
| 103 | +dev tun |
|
| 104 | +proto udp |
|
| 105 | +remote 3.122.96.235 1195 |
|
| 106 | +ca ca.crt |
|
| 107 | +cert {name-of-the-certificate}.crt |
|
| 108 | +key {name-of-the-key}.key |
|
| 109 | +tls-version-min 1.2 |
|
| 110 | +tls-cipher TLS-ECDHE-RSA-WITH-AES-128-GCM-SHA256:TLS-ECDHE-ECDSA-WITH-AES-128-GCM-SHA256:TLS-ECDHE-RSA-WITH-AES-256-GCM-SHA384:TLS-DHE-RSA-WITH-AES-256-CBC-SHA256 |
|
| 111 | +cipher AES-256-CBC |
|
| 112 | +auth SHA512 |
|
| 113 | +resolv-retry infinite |
|
| 114 | +auth-retry none |
|
| 115 | +nobind |
|
| 116 | +persist-key |
|
| 117 | +persist-tun |
|
| 118 | +ns-cert-type server |
|
| 119 | +comp-lzo |
|
| 120 | +verb 3 |
|
| 121 | +tls-client |
|
| 122 | +tls-auth pfs.key |
|
| 123 | +``` |
|
| 124 | + |
|
| 125 | +Here, ``{name-of-the-certificate}.crt`` and ``{name-of-the-key}.key`` need to be replaced by the names of the files corresponding with the host to connect to the OpenVPN. The "B" configuration only differs in the ``remote`` specification, using a different IP address for the OpenVPN server, namely ``52.59.130.167``. It is useful to copy the ``.ovpn`` file and the other ``.key`` and ``.crt`` files into one directory. |
|
| 126 | + |
|
| 127 | +Under Windows download the latest OpenVPN client from [https://openvpn.net/client-connect-vpn-for-windows/](https://openvpn.net/client-connect-vpn-for-windows/). After installation, use the ``.ovpn`` file, adjusted with your personalized key/certificate, to establish the connection. |
|
| 128 | + |
|
| 129 | +On Linux, go to the global settings through Gnome, node "Network" and press the "+" button next to VPN. Import the ``.ovpn`` file, then enable the OpenVPN connection by flicking the switch. The connection will show in the output of |
|
| 130 | + |
|
| 131 | +``` |
|
| 132 | + nmcli connection show |
|
| 133 | +``` |
|
| 134 | + |
|
| 135 | +The connection IDs will be shown, e.g., ``st-soft-aws_A``. Such a connection can be stopped and restarted from the command line using the following commands: |
|
| 136 | + |
|
| 137 | +``` |
|
| 138 | + nmcli connection down st-soft-aws_A |
|
| 139 | + nmcli connection up st-soft-aws_A |
|
| 140 | +``` |
|
| 141 | + |
|
| 142 | +### Tunnels |
|
| 143 | + |
|
| 144 | +On both laptops there is a script ``/usr/local/bin/tunnels`` which establishes SSH tunnels using the ``autossh`` tool. The ``autossh`` processes are forked into the background using the ``-f`` option. It seems important to then pass the port to use for sending heartbeats using the ``-M`` option. If this is omitted, according to my experience only one of several ``autossh`` processes survives. However, we have also learned that using the ``-M`` option together with the "port" ``0`` can help to stabilize the connection because in some cases, if ``-M`` is used with a real port, port collisions may result, and furthermore when re-connecting the release of those heartbeat ports cannot become an issue which otherwise it sometimes does. The ``-M 0`` option is particularly helpful when tunnelling to ``sapsailing.com`` which is provided through a network load balancer (NLB). |
|
| 145 | + |
|
| 146 | +During regular operations we assume that we have an Internet connection that allows us to reach our jump host ``paris-ssh.sapsailing.com`` through SSH, establishing various port forwards. We also expect TracTrac to have their primary server available. Furthermore, we assume both our laptops to be in service. ``sap-p1-1`` then runs the master server instance, ``sap-p1-2`` runs a local replica. The master on ``sap-p1-1`` replicates the central security service at ``security-service.sapsailing.com`` using the RabbitMQ installation on ``rabbit.internal.sapsailing.com`` in the AWS region `eu-west-1`. The port forwarding through `paris-ssh.sapsailing.com` (in `eu-west-3`) to the internal RabbitMQ address (in eu-west-1) works through VPC peering. The RabbitMQ instance used for outbound replication, both, into the cloud and for the on-site replica, is `rabbit-eu-west-3.sapsailing.com`. The replica on ``sap-p1-2`` obtains its replication stream from there, and for the HTTP connection for "reverse replication" it uses a direct connection to ``sap-p1-1``. The outside world, in particular all "S-paris2024-m" master security groups in all regions supported, access the on-site master through a reverse port forward on our jump host ``paris-ssh.sapsailing.com:8888`` which under regular operations points to ``sap-p1-1:8888`` where the master process runs. |
|
| 147 | + |
|
| 148 | +On both laptops we establish a port forward from ``localhost:22443`` to ``sapsailing.com:443``. Together with the alias in ``/etc/hosts`` that aliases ``www.sapsailing.com`` to ``localhost``, requests to ``www.sapsailing.com:22443`` will end up on the archive server. |
|
| 149 | + |
|
| 150 | +On both laptops, we maintain SSH connections to ``localhost`` with port forwards to the current TracTrac production server for HTTP, live data, and stored data. In the test we did on 2021-05-25, those port numbers were 9081, 14001, and 14011, respectively, for the primary server, and 9082, 14002, and 14012, respectively, for the secondary server. In addition to these port forwards, an entry in ``/etc/hosts`` is required for the hostname that TracTrac will use on site for their server(s), pointing to ``127.0.0.1`` to let the Sailing Analytics process connect to localhost with the port forwards. Tests have shown that if the port forwards are changed during live operations, e.g., to point to the secondary instead of the primary TracTrac server, the TracAPI continues smoothly which is a great way of handling such a fail-over process without having to re-start our master server necessarily or reconnect to all live races. |
|
| 151 | + |
|
| 152 | +Furthermore, for administrative SSH access from outside, we establish reverse port forwards from our jump host ``paris-ssh.sapsailing.com`` to the SSH ports on ``sap-p1-1`` (on port 18122) and ``sap-p1-2`` (on port 18222). |
|
| 153 | + |
|
| 154 | +Both laptops have a forward from ``localhost:22222`` to ``sapsailing.com:22`` through ``paris-ssh.sapsailing.com``, in order to be able to have a git remote ``ssh`` with the url ``ssh://trac@localhost:22222/home/trac/git``. |
|
| 155 | + |
|
| 156 | +The port forwards vary for exceptional situations, such as when the Internet connection is not available, or when ``sap-p1-1`` that regularly runs the master process fails and we need to make ``sap-p1-2`` the new master. See below for the details of the configurations for those scenarios. |
|
| 157 | + |
|
| 158 | +The tunnel configurations are established and configured using a set of scripts, each to be found under ``/usr/local/bin`` on each of the two laptops. |
|
| 159 | + |
|
| 160 | +#### ssh_config and sshd_config tweaks |
|
| 161 | + |
|
| 162 | +In order to recover quickly from failures we changed ``/etc/ssh/ssh_config`` on both of the P1s and added the following parameters: |
|
| 163 | +``` |
|
| 164 | +ExitOnForwardFailure yes |
|
| 165 | +ConnectTimeout 10 |
|
| 166 | +ServerAliveCountMax 3 |
|
| 167 | +ServerAliveInterval 10 |
|
| 168 | +``` |
|
| 169 | +For the server side on paris-ssh and on the both P1s the following parameters have been added to ``/etc/ssh/sshd_config``: |
|
| 170 | +``` |
|
| 171 | +ClientAliveInterval 3 |
|
| 172 | +ClientAliveCountMax 3 |
|
| 173 | +``` |
|
| 174 | + |
|
| 175 | +ExitOnForwardFailure will force ssh to exit if one of the port forwards fails. ConnectTimeout manages the time in seconds until an initial connection fails. AliveInterval (client and server) manages the time in seconds after ssh/sshd are sending client and server alive probes. CountMax is the number of retries for those probes. |
|
| 176 | + |
|
| 177 | +The settings have been verified by executing a network change on both the laptops, the ssh tunnel returns after a couple of seconds. |
|
| 178 | + |
|
| 179 | +#### Regular Operations: master on sap-p1-1, replica on sap-p1-2, with Internet / Cloud connection |
|
| 180 | + |
|
| 181 | +On sap-p1-1 two SSH connections are maintained, with the following default port forwards, assuming sap-p1-1 is the local master: |
|
| 182 | + |
|
| 183 | +* paris-ssh.sapsailing.com: 10203-->10203; 5763-->rabbit-eu-west-3.sapsailing.com:5762; 15763-->rabbit-eu-west-3.sapsailing.com:15672; 5675:rabbit.internal.sapsailing.com:5672; 15675:rabbit.internal.sapsailing.com:15672; 10201<--10201; 18122<--22; 443:security-service.sapsailing.com:443; 8888<--8888; 9443<--9443 |
|
| 184 | +* sap-p1-2: 10202-->10202; 10201<--10201 |
|
| 185 | + |
|
| 186 | +On sap-p1-2, the following SSH connections are maintained, assuming sap-p1-2 is the local replica: |
|
| 187 | + |
|
| 188 | +- paris-ssh.sapsailing.com: 10203-->10203; 5763-->rabbit-eu-west-3.sapsailing.com:5762; 15763-->rabbit-eu-west-3.sapsailing.com; 5675:rabbit.internal.sapsailing.com:5672; 15675:rabbit.internal.sapsailing.com:15672; 10202<--10202; 9444<--9443 |
|
| 189 | + |
|
| 190 | +A useful set of entries in your personal ``~/.ssh/config`` file for "off-site" use may look like this: |
|
| 191 | + |
|
| 192 | +``` |
|
| 193 | +Host paris |
|
| 194 | + Hostname paris-ssh.sapsailing.com |
|
| 195 | + User ec2-user |
|
| 196 | + ForwardAgent yes |
|
| 197 | + ForwardX11Trusted yes |
|
| 198 | + LocalForward 18122 localhost:18122 |
|
| 199 | + LocalForward 18222 localhost:18222 |
|
| 200 | + LocalForward 9443 localhost:9443 |
|
| 201 | + LocalForward 9444 localhost:9444 |
|
| 202 | + |
|
| 203 | +Host sap-p1-1 |
|
| 204 | + Hostname localhost |
|
| 205 | + Port 18122 |
|
| 206 | + User sailing |
|
| 207 | + ForwardAgent yes |
|
| 208 | + ForwardX11Trusted yes |
|
| 209 | + |
|
| 210 | +Host sap-p1-2 |
|
| 211 | + Hostname localhost |
|
| 212 | + Port 18222 |
|
| 213 | + User sailing |
|
| 214 | + ForwardAgent yes |
|
| 215 | + ForwardX11Trusted yes |
|
| 216 | +``` |
|
| 217 | + |
|
| 218 | +It will allow you to log on to the "jump host" ``paris-ssh.sapsailing.com`` with the simple command ``ssh paris`` and will establish the port forwards that will then allow you to connect to the two laptops using ``ssh sap-p1-1`` and ``ssh sap-p1-2``, respectively. Of course, when on site and with the two laptops in direct reach you may adjust the host entries for ``sap-p1-1`` and ``sap-p1-2`` accordingly, and you may then wish to establish only an SSH connection to ``sap-p1-1`` which then does the port forwards for HTTPS ports 9443/9444. This could look like this: |
|
| 219 | + |
|
| 220 | +``` |
|
| 221 | +Host sap-p1-1 |
|
| 222 | + Hostname 10.1.3.195 |
|
| 223 | + Port 22 |
|
| 224 | + User sailing |
|
| 225 | + ForwardAgent yes |
|
| 226 | + ForwardX11Trusted yes |
|
| 227 | + LocalForward 9443 localhost:9443 |
|
| 228 | + LocalForward 9444 10.1.3.197:9443 |
|
| 229 | + |
|
| 230 | +Host sap-p1-2 |
|
| 231 | + Hostname 10.1.3.197 |
|
| 232 | + Port 22 |
|
| 233 | + User sailing |
|
| 234 | + ForwardAgent yes |
|
| 235 | + ForwardX11Trusted yes |
|
| 236 | +``` |
|
| 237 | + |
|
| 238 | +#### Operations with sap-p1-1 failing: master on sap-p1-2, with Internet / Cloud connection |
|
| 239 | + |
|
| 240 | +On sap-p1-1, if the operating system still runs and the failure affects only the Java process running the SAP Sailing Analytics, two SSH connections are maintained, with the following default port forwards, assuming sap-p1-1 is not running an SAP Sailing Analytics process currently: |
|
| 241 | + |
|
| 242 | +* paris-ssh.sapsailing.com: 10203-->10203; 5763-->rabbit-eu-west-3.sapsailing.com:5762; 15763-->rabbit-eu-west-3.sapsailing.com:15672; 5675:rabbit.internal.sapsailing.com:5672; 15675:rabbit.internal.sapsailing.com:15672; 10201<--10201; 18122<--22; 443:security-service.sapsailing.com:443 |
|
| 243 | +* sap-p1-2: 10202-->10202; 10201<--10201 |
|
| 244 | + |
|
| 245 | +On sap-p1-2 two SSH connections are maintained, with the following default port forwards, assuming sap-p1-2 is the local master: |
|
| 246 | + |
|
| 247 | +* paris-ssh.sapsailing.com: 10203-->10203; 5763-->rabbit-eu-west-3.sapsailing.com:5762; 15763-->rabbit-eu-west-3.sapsailing.com:15672; 5675:rabbit.internal.sapsailing.com:5672; 15675:rabbit.internal.sapsailing.com:15672; 10202<--10202; 18222<--22; 443:security-service.sapsailing.com:443; 8888<--8888 |
|
| 248 | +* sap-p1-1 (if the operating system on sap-p1-1 still runs): 10202-->10202; 10201<--10201 |
|
| 249 | + |
|
| 250 | +So the essential change is that the reverse forward from ``paris-ssh.sapsailing.com:8888`` now targets ``sap-p1-2:8888`` where we now assume the failover master to be running. |
|
| 251 | + |
|
| 252 | +#### Operations with Internet failing |
|
| 253 | + |
|
| 254 | +When the Internet connection fails, replicating the security service from ``security-service.sapsailing.com`` / ``rabbit.internal.sapsailing.com`` will no longer be possible. Neither will outbound replication to ``rabbit-eu-west-3.sapsailing.com`` be possible, and cloud replicas won't be able to reach the on-site master anymore through the ``paris-ssh.sapsailing.com:8888`` reverse port forward. This also has an effect on the local on-site replica which no longer will be able to reach ``rabbit-eu-west-3.sapsailing.com`` which provides the on-site replica with the operation stream under regular circumstances. |
|
| 255 | + |
|
| 256 | +There is little we can do against the lack of Internet connection regarding providing data to the cloud replicas and maintaining replication with ``security-service.sapsailing.com`` (we could theoretically try to work with local WiFi hotspots; but the key problem will be that TracTrac then neither has Internet connectivity for their on-site server, and we would have to radically change to a cloud-only set-up which is probably beyond what we'd be doing in this case). But we can ensure continued local operations with the replica on ``sap-p1-2`` now using a local on-site RabbitMQ installation between the two instances. For this, we replace the port forwards that during regular operations point to ``rabbit-eu-west-3.sapsailing.com`` by port forwards pointing to the RabbitMQ process on ``sap-p1-2``. |
|
| 257 | + |
|
| 258 | +On ``sap-p1-1`` an SSH connection to ``sap-p1-2`` is maintained, with the following port forwards: |
|
| 259 | + |
|
| 260 | +* sap-p1-2: 10202-->10202; 10201<--10201; 5763-->localhost:5672 |
|
| 261 | + |
|
| 262 | +So the essential changes are that there are no more SSH connections into the cloud, and the port forward on each laptop's port 5673, which would point to ``rabbit-eu-west-3.sapsailing.com`` during regular operations, now points to ``sap-p1-2:5672`` where the RabbitMQ installation takes over from the cloud instance. |
|
| 263 | + |
|
| 264 | +### Letsencrypt Certificate for paris2024.sapsailing.com, security-service.sapsailing.com and paris2024-master.sapsailing.com |
|
| 265 | + |
|
| 266 | +In order to allow us to access ``paris2024.sapsailing.com`` and ``security-service.sapsailing.com`` with any HTTPS port forwarding locally so that all ``JSESSION_GLOBAL`` etc. cookies with their ``Secure`` attribute are delivered properly, we need an SSL certificate. I've created one by doing |
|
| 267 | + |
|
| 268 | +``` |
|
| 269 | +/usr/bin/sudo -u certbot docker run --rm -it --name certbot -v "/etc/letsencrypt:/etc/letsencrypt" -v "/var/lib/letsencrypt:/var/lib/letsencrypt" certbot/certbot certonly --manual -d paris2024.sapsailing.com |
|
| 270 | +/usr/bin/sudo -u certbot docker run --rm -it --name certbot -v "/etc/letsencrypt:/etc/letsencrypt" -v "/var/lib/letsencrypt:/var/lib/letsencrypt" certbot/certbot certonly --manual -d security-service.sapsailing.com |
|
| 271 | +``` |
|
| 272 | + |
|
| 273 | +as ``root`` on ``sapsailing.com``. The challenge displayed can be solved by creating an ALB rule for hostname header ``paris2024.sapsailing.com`` and the path as issued in the output of the ``certbot`` command, and as action specify a fixed response, response code 200, and pasting as text/plain the challenge data printed by the ``certbot`` command. Wait a few seconds, then confirm the Certbot prompt. The certificate will be issued and stored under ``/etc/letsencrypt/live/paris2024.sapsailing.com`` from where I copied it to ``/home/sailing/Downloads/letsencrypt`` on both laptops for later use with a local Apache httpd server. The certificate will expire on 2021-08-19, so after the Olympic Games, so we don't have to worry about renewing it. |
|
| 274 | + |
|
| 275 | +### Local NGINX Webserver Setup |
|
| 276 | + |
|
| 277 | +In order to be able to access the applications running on the local on-site laptops using HTTPS there is a web server on each of the two laptops, listening on port 9443 (HTTPS). The configuration for this is under ``/etc/nginx/sites-enabled/paris2024`` and looks like this: |
|
| 278 | + |
|
| 279 | +``` |
|
| 280 | +server { |
|
| 281 | + listen 9443 ssl; |
|
| 282 | + server_name paris2024.sapsailing.com; |
|
| 283 | + ssl_certificate /etc/ssl/certs/paris2024.sapsailing.com.crt; |
|
| 284 | + ssl_certificate_key /etc/ssl/private/paris2024.sapsailing.com.key; |
|
| 285 | + ssl_protocols TLSv1 TLSv1.1 TLSv1.2; |
|
| 286 | + ssl_ciphers HIGH:!aNULL:!MD5; |
|
| 287 | + |
|
| 288 | + location / { |
|
| 289 | + proxy_pass http://127.0.0.1:8888; |
|
| 290 | + } |
|
| 291 | +} |
|
| 292 | +``` |
|
| 293 | + |
|
| 294 | +The "Let's Encrypt"-provided certificate is used for SSL termination. With paris2024.sapsailing.com aliased in ``/etc/hosts`` to the address of the current master server, this allows accessing ``https://paris2024.sapsailing.com:9443`` with all benefits of cookie / session authentication. |
|
| 295 | + |
|
| 296 | +Likewise, ``/etc/nginx/sites-enabled/security-service`` forwards to 127.0.0.1:8889 where a local copy of the security service may be deployed in case the Internet fails. In this case, the local port 443 must be forwarded to the NGINX port 9443 instead of security-service.sapsailing.com:443 through paris-ssh.sapsailing.com. |
|
| 297 | + |
|
| 298 | +On sap-p1-1 is currently a nginx listening to paris2024-master.sapsailing.com with the following configuration: |
|
| 299 | + |
|
| 300 | +``` |
|
| 301 | +server { |
|
| 302 | + listen 9443 ssl; |
|
| 303 | + server_name paris2024-master.sapsailing.com; |
|
| 304 | + ssl_certificate /etc/ssl/private/paris2024-master.sapsailing.com.fullchain.pem; |
|
| 305 | + ssl_certificate_key /etc/ssl/private/paris2024-master.sapsailing.com.privkey.pem; |
|
| 306 | + ssl_protocols TLSv1 TLSv1.1 TLSv1.2; |
|
| 307 | + ssl_ciphers HIGH:!aNULL:!MD5; |
|
| 308 | + |
|
| 309 | + location / { |
|
| 310 | + proxy_pass http://127.0.0.1:8888; |
|
| 311 | + } |
|
| 312 | +} |
|
| 313 | +``` |
|
| 314 | + |
|
| 315 | + |
|
| 316 | + |
|
| 317 | +### Backup |
|
| 318 | + |
|
| 319 | +borgbackup is used to backup the ``/`` folder of both laptops towards the other machine. Folder where the borg repository is located is: ``/backup``. |
|
| 320 | + |
|
| 321 | +The backup from sap-p1-1 to sap-p1-2 runs at 01:00 each day, and the backup from sap-p1-2 to sap-p1-1 runs at 02:00 each day. Details about the configuration can be found in ``/root/borg-backup.sh`` on either machine. Log files for the backup run are in ``/var/log/backup.log``. Crontab file is in ``/root``. |
|
| 322 | + |
|
| 323 | +Both ``/backup`` folders have been mirrored to a S3 bucket called ``backup-sap-p1`` on June 14th. |
|
| 324 | + |
|
| 325 | +### Monitoring and e-Mail Alerting |
|
| 326 | + |
|
| 327 | +To be able to use ``sendmail`` to send notifications via email it needs to be installed and configured to use the AWS SES as smtp relay: |
|
| 328 | +``` |
|
| 329 | +sudo apt install sendmail |
|
| 330 | +``` |
|
| 331 | + |
|
| 332 | +Follow the instructions on [https://docs.aws.amazon.com/ses/latest/DeveloperGuide/send-email-sendmail.html](https://docs.aws.amazon.com/ses/latest/DeveloperGuide/send-email-sendmail.html) with one exception, the content that needs to be added to ``sendmail.mc`` looks like: |
|
| 333 | +``` |
|
| 334 | +define(`SMART_HOST', `email-smtp.eu-west-1.amazonaws.com')dnl |
|
| 335 | +define(`RELAY_MAILER_ARGS', `TCP $h 587')dnl |
|
| 336 | +define(`confAUTH_MECHANISMS', `LOGIN PLAIN')dnl |
|
| 337 | +FEATURE(`authinfo', `hash -o /etc/mail/authinfo.db')dnl |
|
| 338 | +MASQUERADE_AS(`sapsailing.com')dnl |
|
| 339 | +FEATURE(masquerade_envelope)dnl |
|
| 340 | +FEATURE(masquerade_entire_domain)dnl |
|
| 341 | +``` |
|
| 342 | +The authentication details can be fetched from the content of ``/root/mail.properties`` of any running sailing EC2 instance. |
|
| 343 | + |
|
| 344 | +Both laptops, ``sap-p1-1`` and ``sap-p1-2`` have monitoring scripts from the git folder ``configuration/on-site-scripts`` linked to ``/usr/local/bin``. These in particular include ``monitor-autossh-tunnels`` and ``monitor-mongo-replica-set-delay`` as well as a ``notify-operators`` script which contains the list of e-mail addresses to notify in case an alert occurs. |
|
| 345 | + |
|
| 346 | +The ``monitor-autossh-tunnels`` script checks all running ``autossh`` processes and looks for their corresponding ``ssh`` child processes. If any of them is missing, an alert is sent using ``notify-operators``. |
|
| 347 | + |
|
| 348 | +The ``monitor-mongo-replica-set-delay`` looks as the result of calling ``rs.printSecondaryReplicationInfo()`` and logs it to ``/tmp/mongo-replica-set-delay``. The average of the last ten values is compared to a threshold (currently 3s), and an alert is sent using ``notify-operators`` if the threshold is exceeded. |
|
| 349 | + |
|
| 350 | +The ``monitor-disk-usage`` script checks the partition holding ``/var/lib/mongodb/``. Should it fill up to more than 90%, an alert will be sent using ``notify-operators``. |
|
| 351 | + |
|
| 352 | +### Time Synchronizing |
|
| 353 | +Setup chronyd service on desktop machine, in order to regurlary connect via VPN and relay the time towards the two P1s. Added |
|
| 354 | +``` |
|
| 355 | +# Paris2024 configuration |
|
| 356 | +server 10.1.3.221 iburst |
|
| 357 | +``` |
|
| 358 | +to ``/etc/chrony/chrony.conf`` on the clients. |
|
| 359 | +Added |
|
| 360 | +``` |
|
| 361 | +# FOR PARIS SERVER SETUP |
|
| 362 | +allow all |
|
| 363 | +local stratum 10 |
|
| 364 | +``` |
|
| 365 | +to the server file, started ```chronyd``` service. |
|
| 366 | + |
|
| 367 | +## AWS Setup |
|
| 368 | + |
|
| 369 | +Our primary AWS region for the event will be Paris (eu-west-3). There, we have reserved the elastic IP ``13.39.66.118`` to which we've mapped the Route53 hostname ``paris-ssh.sapsailing.com`` with a simple A-record. The host assigned to the IP/hostname is to be used as a "jump host" for SSH tunnels. It runs Amazon Linux with a login-user named ``ec2-user``. The ``ec2-user`` has ``sudo`` permission. In the root user's crontab we have the same set of scripts hooked up that in our eu-west-1 production landscape is responsible for obtaining and installing the landscape manager's SSH public keys to the login user's account, aligning the set of ``authorized_keys`` with those of the registered landscape managers (users with permission ``LANDSCAPE:MANAGE:AWS``). The ``authorized_keys.org`` file also contains the two public SSH keys of the ``sailing`` accounts on the two laptops, so each time the script produces a new ``authorized_keys`` file for the ``ec2-user``, the ``sailing`` keys for the laptop tunnels don't get lost. |
|
| 370 | + |
|
| 371 | +I added the EPEL repository like this: |
|
| 372 | + |
|
| 373 | +``` |
|
| 374 | + yum install https://dl.fedoraproject.org/pub/epel/epel-release-latest-7.noarch.rpm |
|
| 375 | +``` |
|
| 376 | + |
|
| 377 | +Our "favorite" Availability Zone (AZ) in eu-west-3 is "1a" / "eu-west-3a". |
|
| 378 | + |
|
| 379 | +The same host ``paris-ssh.sapsailing.com`` also runs a MongoDB 3.6 instance on port 10203. |
|
| 380 | + |
|
| 381 | +For RabbitMQ we run a separate host, based on AWS Ubuntu 20. It brings the ``rabbitmq-server`` package with it (version 3.8.2 on Erlang 22.2.7), and we'll install it with default settings, except for the following change: In the new file ``/etc/rabbitmq/rabbitmq.conf`` we enter the line |
|
| 382 | + |
|
| 383 | +``` |
|
| 384 | + loopback_users = none |
|
| 385 | +``` |
|
| 386 | + |
|
| 387 | +which allows clients from other hosts to connect (note how this works differently on different version of RabbitMQ; the local laptops have to use a different syntax in their ``rabbitmq.config`` file). The security groups for the RabbitMQ server are configured such that only ``172.0.0.0/8`` addresses from our VPCs can connect. |
|
| 388 | + |
|
| 389 | +The RabbitMQ management plugin is enabled using ``rabbitmq-plugins enable rabbitmq_management`` for access from localhost. This will require again an SSH tunnel to the host. The host's default user is ``ubuntu``. The RabbitMQ management plugin is active on port 15672 and accessible only from localhost or an SSH tunnel with port forward ending at this host. RabbitMQ itself listens on the default port 5672. With this set-up, RabbitMQ traffic for this event remains independent and undisturbed from any other RabbitMQ traffic from other servers in our default ``eu-west-1`` landscape, such as ``my.sapsailing.com``. The hostname pointing to the internal IP address of the RabbitMQ host is ``rabbit-eu-west-3.sapsailing.com`` and has a timeout of 60s. |
|
| 390 | + |
|
| 391 | +An autossh tunnel is established from ``paris-ssh.sapsailing.com`` to ``rabbit-eu-west-3.sapsailing.com`` which forwards port 15673 to port 15672, thus exposing the RabbitMQ web interface which otherwise only responds to localhost. This autossh tunnel is established by a systemctl service that is described in ``/etc/systemd/system/autossh-port-forwards.service`` in ``paris-ssh.sapsailing.com``. |
|
| 392 | + |
|
| 393 | +### Local setup of rabbitmq |
|
| 394 | + |
|
| 395 | +The above configuration needs also to be set on the rabbitmq installations of the P1s. The rabbitmq-server package has version 3.6.10. In that version the config file is located in ``/etc/rabbitmq/rabbitmq.config``, the entry is ``[{rabbit, [{loopback_users, []}]}].`` Further documentation for this version can be found here: [http://previous.rabbitmq.com/v3_6_x/configure.html](http://previous.rabbitmq.com/v3_6_x/configure.html) |
|
| 396 | + |
|
| 397 | +### Cross-Region VPC Peering |
|
| 398 | + |
|
| 399 | +The primary AWS region for the paris2024 replica set is eu-west-3 (Paris). In order to provide low latencies for the RHBs we'd like to add replicas also in other regions. Since we want to not expose the RabbitMQ running eu-west-3 to the outside world, we plan to peer the VPCs of other regions with the one in eu-west-3. |
|
| 400 | + |
|
| 401 | +The pre-requisite for VPCs to get peered is that their CIDRs (such as 172.31.0.0/16) don't overlap. The default VPC in each region always uses the same CIDR (172.31.0.0/16), and hence in order to peer VPCs all but one must be non-default VPC. To avoid confusion when launching instances or setting up security groups it can be adequate for those peering regions other than our default region ``eu-west-1`` to set up non-default VPCs with peering-capable CIDRs and remove the default VPC. This way users cannot accidentally launch instances or define security groups for any VPC other than the peered one. |
|
| 402 | + |
|
| 403 | +After having peered the VPCs, the VPCs default routing table must be extended by a route to the peered VPC's CIDR using the peering connection. |
|
| 404 | + |
|
| 405 | +With peering in place it is possible to reach instances in peered VPCs by their internal IPs. In particular, it is possible to connect to a RabbitMQ instance with the internal IP and port 5672 even if that RabbitMQ runs in a different region whose VPC is peered. |
|
| 406 | + |
|
| 407 | +### Global Accelerator |
|
| 408 | + |
|
| 409 | +We have created a Global Accelerator [Paris2024](https://us-west-2.console.aws.amazon.com/ec2/v2/home?region=us-west-2#AcceleratorDetails:AcceleratorArn=arn:aws:globalaccelerator::017363970217:accelerator/TODO) which manages cross-region load balancing for us. There are two listeners: one for port 80 (HTTP) and one for port 443 (HTTPS). For each region an endpoint group must be created for both of the listeners, and the application load balancer (ALB) in that region has to be added as an endpoint. |
|
| 410 | + |
|
| 411 | +The Route53 entry ``paris2024.sapsailing.com`` now is an alias A record pointing to this global accelerator (``TODO.awsglobalaccelerator.com.``). |
|
| 412 | + |
|
| 413 | +### Geo-Blocking |
|
| 414 | + |
|
| 415 | +While for Tokyo 2020 this was not requested, for Paris 2024 we heard rumors that it may. If it does, using the [AWS Web Application Firewall (WAF)](https://us-east-1.console.aws.amazon.com/wafv2/homev2/start) provides the solution. There, we can create so-called Web Access Control Lists (Web ACLs) which need to be created per region where an ALB is used. |
|
| 416 | + |
|
| 417 | +A Web ACL consists of a number of rules and has a default action (typically "Allow" or "Block") for those requests not matched by any rule. An ACL can be associated with one or more resources, in particular with Application Load Balancers (ALBs) deployed in the region. |
|
| 418 | + |
|
| 419 | +Rules, in turn, consist of statements that can be combined using logical operators. The rule type of interest for geo-blocking is "Originates from a country in" where one or more countries can be selected. When combined with an "Allow" or "Block" action, this results in the geo-blocking behavior desired. |
|
| 420 | + |
|
| 421 | +For requests blocked by the rule, the response code, response headers and message body to return to the client can be configured. We can use this, e.g., to configure a 301 re-direct to a static page that informs the user about the geo-blocking. |
|
| 422 | + |
|
| 423 | +### Application Load Balancers (ALBs) and Target Groups |
|
| 424 | + |
|
| 425 | +In each region supported, a dedicated load balancer for the Global Accelerator-based event setup has been set up (``Paris2024ALB`` or simply ``ALB``). A single target group with the usual settings (port 8888, health check on ``/gwt/status``, etc.) must exist: ``S-paris2024`` (public). |
|
| 426 | + |
|
| 427 | +Note that no dedicated ``-m`` master target group is established. The reason is that the AWS Global Accelerator judges an ALB's health by looking at _all_ its target groups; should only a single target group not have a healthy target, the Global Accelerator considers the entire ALB unhealthy. With this, as soon as the on-site master server is unreachable, e.g., during an upgrade, all those ALBs would enter the "unhealthy" state from the Global Accelerator's perspective, and all public replicas which are still healthy would no longer receive traffic; the site would go "black." Therefore, we must ensure that the ALBs targeted by the Global Accelerator only have a single target group which only has the public replicas in that region as its targets. |
|
| 428 | + |
|
| 429 | +Each ALB has an HTTP and an HTTPS listener. The HTTP listener has only a single rule redirecting all traffic permanently (301) to the corresponding HTTPS request. The HTTPS listener has three rules: the ``/`` path for ``paris2024.sapsailing.com`` is re-directed to the Olympic event with ID ``TODO``. All other traffic for ``paris2024.sapsailing.com`` goes to the public target group holding the regional replica(s). A default rule returns a 404 status with a static ``Not found`` text. |
|
| 430 | + |
|
| 431 | +## Landscape Architecture |
|
| 432 | + |
|
| 433 | +We have applied for a single SSH tunnel to IP address ``52.194.91.94`` which is our elastic IP for our SSH jump host in eu-west-3(d). |
|
| 434 | + |
|
| 435 | +The default production set-up is defined as follows: |
|
| 436 | + |
|
| 437 | +### MongoDB |
|
| 438 | + |
|
| 439 | +Three MongoDB nodes are intended to run during regular operations: sap-p1-1:10201, sap-p1-2:10202, and paris-ssh.sapsailing.com:10203. Since we have to work with SSH tunnels to keep things connected, we map everything using ``localhost`` ports such that both, sap-p1-2 and paris-ssh see sap-p1-1:10201 as their localhost:10201, and that both, sap-p1-1 and paris-ssh see sap-p1-2:10202 as their respective localhost:10202. Both, sap-p1-1 and sap-p1-2 see paris-ssh:10203 as their localhost:10203. This way, the MongoDB URI can be specified as |
|
| 440 | + |
|
| 441 | +``` |
|
| 442 | + mongodb://localhost:10201,localhost:10202,localhost:10203/paris2024?replicaSet=paris2024&retryWrites=true&readPreference=nearest |
|
| 443 | +``` |
|
| 444 | + |
|
| 445 | +The cloud replica is not supposed to become primary, except for maybe in the unlikely event where operations would move entirely to the cloud. To achieve this, the cloud replica has priority 0 which can be configured like this: |
|
| 446 | + |
|
| 447 | +``` |
|
| 448 | + paris2024:PRIMARY> cfg = rs.conf() |
|
| 449 | + # Then search for the member localhost:10203; let's assume, it's in cfg.members[0] : |
|
| 450 | + cfg.members[0].priority=0 |
|
| 451 | + rs.reconfig(cfg) |
|
| 452 | +``` |
|
| 453 | + |
|
| 454 | +All cloud replicas shall use a MongoDB database name ``paris2024-replica``. In those regions where we don't have dedicated MongoDB support established (basically all but eu-west-1 currently), an image should be used that has a MongoDB server configured to use ``/home/sailing/mongo`` as its data directory and ``replica`` as its replica set name. See AMI SAP Sailing Analytics App HVM with MongoDB 1.137 (ami-05b6c7b1244f49d54) in eu-west-3 (already copied to the other peered regions except eu-west-1). |
|
| 455 | + |
|
| 456 | +One way to monitor the health and replication status of the replica set is running the following command: |
|
| 457 | + |
|
| 458 | +``` |
|
| 459 | + watch 'echo "rs.printSecondaryReplicationInfo()" | \ |
|
| 460 | + mongo "mongodb://localhost:10201/?replicaSet=paris2024&retryWrites=true&readPreference=nearest" | \ |
|
| 461 | + grep "\(^source:\)\|\(syncedTo:\)\|\(behind the primary\)"' |
|
| 462 | +``` |
|
| 463 | + |
|
| 464 | +It shows the replication state and in particular the delay of the replicas. A cronjob exists for ``sailing@sap-p1-1`` which triggers ``/usr/local/bin/monitor-mongo-replica-set-delay`` every minute which will use ``/usr/local/bin/notify-operators`` in case the average replication delay for the last ten read-outs exceeds a threshold (currently 3s). We have a cron job monitoring this (see above) and sending out alerts if things start slowing down. |
|
| 465 | + |
|
| 466 | +In order to have a local copy of the ``security_service`` database, a CRON job exists for user ``sailing`` on ``sap-p1-1`` which executes the ``/usr/local/bin/clone-security-service-db-safe-exit`` script (versioned in git under ``configuration/on-site-scripts/clone-security-service-db-safe-exit``) once per hour. See ``/home/sailing/crontab``. The script dumps ``security_service`` from the ``live`` replica set in ``eu-west-1`` to the ``/tmp/dump`` directory on ``ec2-user@paris-ssh.sapsailing.com`` and then sends the directory content as a ``tar.gz`` stream through SSH and restores it on the local ``mongodb://sap-p1-1:27017,sap-p1-2/security_service?replicaSet=security_service`` replica set, after copying an existing local ``security_service`` database to ``security_service_bak``. This way, even if the Internet connection dies during this cloning process, a valid copy still exists in the local ``paris2024`` replica set which can be copied back to ``security_service`` using the MongoDB shell command |
|
| 467 | + |
|
| 468 | +``` |
|
| 469 | + db.copyDatabase("security_service_bak", "security_service") |
|
| 470 | +``` |
|
| 471 | + |
|
| 472 | +### Master |
|
| 473 | + |
|
| 474 | +The master configuration is described in ``/home/sailing/servers/master/master.conf`` and can be used to produce a clean set-up like this: |
|
| 475 | + |
|
| 476 | +``` |
|
| 477 | + rm env.sh; cat master.conf | ./refreshInstance.sh auto-install-from-stdin |
|
| 478 | +``` |
|
| 479 | + |
|
| 480 | +If the laptops cannot reach ``https://releases.sapsailing.com`` due to connectivity constraints, releases and environments can be downloaded through other channels to ``sap-p1-1:/home/trac/releases``, and the variable ``INSTALL_FROM_SCP_USER_AT_HOST_AND_PORT`` can be set to ``sailing@sap-p1-1`` to fetch the release file and environment file from there by SCP. Alternatively, ``sap-p1-2:/home/trac/releases`` may be used for the same. |
|
| 481 | + |
|
| 482 | +This way, a clean new ``env.sh`` file will be produced from the config file, including the download and installation of a release. The ``master.conf`` file looks approximately like this: |
|
| 483 | + |
|
| 484 | +``` |
|
| 485 | +INSTALL_FROM_RELEASE=build-202106012325 |
|
| 486 | +SERVER_NAME=paris2024 |
|
| 487 | +MONGODB_URI="mongodb://localhost:10201,localhost:10202,localhost:10203/${SERVER_NAME}?replicaSet=paris2024&retryWrites=true&readPreference=nearest" |
|
| 488 | +# RabbitMQ in eu-west-1 (rabbit.internal.sapsailing.com) is expected to be found through SSH tunnel on localhost:5675 |
|
| 489 | +# Replication of shared services from central security-service.sapsailing.com through SSH tunnel 443:security-service.sapsailing.com:443 |
|
| 490 | +# with a local /etc/hosts entry mapping security-service.sapsailing.com to 127.0.0.1 |
|
| 491 | +REPLICATE_MASTER_QUEUE_HOST=localhost |
|
| 492 | +REPLICATE_MASTER_QUEUE_PORT=5675 |
|
| 493 | +REPLICATE_MASTER_BEARER_TOKEN="***" |
|
| 494 | +# Outbound replication to RabbitMQ through SSH tunnel with port forward on port 5673, regularly to rabbit-eu-west-3.sapsailing.com |
|
| 495 | +# Can be re-mapped to the RabbitMQ running on sap-p1-2 |
|
| 496 | +REPLICATION_HOST=localhost |
|
| 497 | +REPLICATION_PORT=5673 |
|
| 498 | +USE_ENVIRONMENT=live-master-server |
|
| 499 | +ADDITIONAL_JAVA_ARGS="${ADDITIONAL_JAVA_ARGS} -Dcom.sap.sse.debranding=true" |
|
| 500 | +``` |
|
| 501 | + |
|
| 502 | +### Replicas |
|
| 503 | + |
|
| 504 | +The on-site replica on ``sap-p1-2`` can be configured with a ``replica.conf`` file in ``/home/sailing/servers/replica``, using |
|
| 505 | + |
|
| 506 | +``` |
|
| 507 | + rm env.sh; cat replica.conf | ./refreshInstance auto-install-from-stdin |
|
| 508 | +``` |
|
| 509 | + |
|
| 510 | +The file looks like this: |
|
| 511 | + |
|
| 512 | +``` |
|
| 513 | +# Regular operations; sap-p1-2 replicates sap-p1-1 using the rabbit-eu-west-3.sapsailing.com RabbitMQ in the cloud through SSH tunnel. |
|
| 514 | +# Outbound replication, though not expected to become active, goes to a local RabbitMQ |
|
| 515 | +INSTALL_FROM_RELEASE=build-202106012325 |
|
| 516 | +SERVER_NAME=paris2024 |
|
| 517 | +MONGODB_URI="mongodb://localhost:10201,localhost:10202,localhost:10203/${SERVER_NAME}-replica?replicaSet=paris2024&retryWrites=true&readPreference=nearest" |
|
| 518 | +# RabbitMQ in eu-west-3 is expected to be found locally on port 5673 |
|
| 519 | +REPLICATE_MASTER_SERVLET_HOST=sap-p1-1 |
|
| 520 | +REPLICATE_MASTER_SERVLET_PORT=8888 |
|
| 521 | +REPLICATE_MASTER_QUEUE_HOST=localhost |
|
| 522 | +REPLICATE_MASTER_QUEUE_PORT=5673 |
|
| 523 | +REPLICATE_MASTER_BEARER_TOKEN="***" |
|
| 524 | +# Outbound replication to RabbitMQ running locally on sap-p1-2 |
|
| 525 | +REPLICATION_HOST=localhost |
|
| 526 | +REPLICATION_PORT=5672 |
|
| 527 | +REPLICATION_CHANNEL=${SERVER_NAME}-replica |
|
| 528 | +USE_ENVIRONMENT=live-replica-server |
|
| 529 | +ADDITIONAL_JAVA_ARGS="${ADDITIONAL_JAVA_ARGS} -Dcom.sap.sse.debranding=true" |
|
| 530 | +``` |
|
| 531 | + |
|
| 532 | +Replicas in region ``eu-west-1`` can be launched using the following user data, making use of the established MongoDB live replica set in the region: |
|
| 533 | + |
|
| 534 | +``` |
|
| 535 | +INSTALL_FROM_RELEASE=build-202106012325 |
|
| 536 | +SERVER_NAME=paris2024 |
|
| 537 | +MONGODB_URI="mongodb://mongo0.internal.sapsailing.com,mongo1.internal.sapsailing.com,dbserver.internal.sapsailing.com:10203/paris2024-replica?replicaSet=live&retryWrites=true&readPreference=nearest" |
|
| 538 | +USE_ENVIRONMENT=live-replica-server |
|
| 539 | +REPLICATION_CHANNEL=paris2024-replica |
|
| 540 | +REPLICATION_HOST=rabbit-eu-west-3.sapsailing.com |
|
| 541 | +REPLICATE_MASTER_SERVLET_HOST=paris-ssh.internal.sapsailing.com |
|
| 542 | +REPLICATE_MASTER_SERVLET_PORT=8888 |
|
| 543 | +REPLICATE_MASTER_EXCHANGE_NAME=paris2024 |
|
| 544 | +REPLICATE_MASTER_QUEUE_HOST=rabbit-eu-west-3.sapsailing.com |
|
| 545 | +REPLICATE_MASTER_BEARER_TOKEN="***" |
|
| 546 | +ADDITIONAL_JAVA_ARGS="${ADDITIONAL_JAVA_ARGS} -Dcom.sap.sse.debranding=true" |
|
| 547 | +``` |
|
| 548 | + |
|
| 549 | +(Adjust the release accordingly, of course). (NOTE: During the first production days of the event we noticed that it was really a BAD IDEA to have all replicas use the same DB set-up, all writing to the MongoDB PRIMARY of the "live" replica set in eu-west-1. With tens of replicas running concurrently, this led to a massive block-up based on MongoDB not writing fast enough. This gave rise to a new application server AMI which now has a MongoDB set-up included, using "replica" as the MongoDB replica set name. Now, each replica hence can write into its own MongoDB instance, isolated from all others and scaling linearly.) |
|
| 550 | + |
|
| 551 | +In other regions, instead an instance-local MongoDB shall be used for each replica, not interfering with each other or with other databases: |
|
| 552 | + |
|
| 553 | +``` |
|
| 554 | +INSTALL_FROM_RELEASE=build-202106012325 |
|
| 555 | +SERVER_NAME=paris2024 |
|
| 556 | +MONGODB_URI="mongodb://localhost/paris2024-replica?replicaSet=replica&retryWrites=true&readPreference=nearest" |
|
| 557 | +USE_ENVIRONMENT=live-replica-server |
|
| 558 | +REPLICATION_CHANNEL=paris2024-replica |
|
| 559 | +REPLICATION_HOST=rabbit-eu-west-3.sapsailing.com |
|
| 560 | +REPLICATE_MASTER_SERVLET_HOST=paris-ssh.internal.sapsailing.com |
|
| 561 | +REPLICATE_MASTER_SERVLET_PORT=8888 |
|
| 562 | +REPLICATE_MASTER_EXCHANGE_NAME=paris2024 |
|
| 563 | +REPLICATE_MASTER_QUEUE_HOST=rabbit-eu-west-3.sapsailing.com |
|
| 564 | +REPLICATE_MASTER_BEARER_TOKEN="***" |
|
| 565 | +ADDITIONAL_JAVA_ARGS="${ADDITIONAL_JAVA_ARGS} -Dcom.sap.sse.debranding=true" |
|
| 566 | +``` |
|
| 567 | + |
|
| 568 | +### Application Servers |
|
| 569 | + |
|
| 570 | +``sap-p1-1`` normally is the master for the ``paris2024`` replica set. The application server directory is found under ``/home/sailing/servers/master``, and the master's HTTP port is 8888. It shall replicate the shared services, in particular ``SecurityServiceImpl``, from ``security-service.sapsailing.com``, like any normal server in our landscape, only that here we have to make sure we can target the default RabbitMQ in eu-west-1 and can see the ``security-service.sapsailing.com`` master directly or even better the load balancer. |
|
| 571 | + |
|
| 572 | +SSH local port forwards (configured with the ``-L`` option) that use hostnames instead of IP addresses for the remote host specification are resolved each time a new connection is established through this forward. If the DNS entry resolves to multiple IPs or if the DNS entry changes over time, later connection requests through the port forward will honor the new host name's DNS resolution. |
|
| 573 | + |
|
| 574 | +Furthermore, there is a configuration under ``/home/sailing/servers/security_service`` which can be fired up with port 8889, using the local ``security_service`` database that a script ``/usr/local/bin/clone-security-service-db`` on the jump host ``paris-ssh.sapsailing.com`` updates on an hourly basis as long as an Internet connection is available. This can be used as a replacement of the official ``security-service.sapsailing.com`` service. Both laptops have an ``/etc/hosts`` entry mapping ``security-service.sapsailing.com`` to ``127.0.0.1`` and work with flexible SSH port forwards to decide whether the official Internet-based or the local copy of the security service shall be used. |
|
| 575 | + |
|
| 576 | +``sap-p1-2`` normally is a replica for the ``paris2024`` replica set, using the local RabbitMQ running on ``sap-p1-1``. Its outbound ``REPLICATION_CHANNEL`` will be ``paris2024-replica`` and uses the RabbitMQ running in ``eu-west-3``, using an SSH port forward with local port 5673 for the ``eu-west-3`` RabbitMQ (15673 for the web administration UI). A reverse port forward from ``eu-west-3`` to the application port 8888 on ``sap-p1-2`` has to be established which replicas running in ``eu-west-3`` will use to reach their master through HTTP. This way, adding more replicas on the AWS side in the cloud will not require any additional bandwidth between cloud and on-site network, except that the reverse HTTP channel, which uses only little traffic, will see additional traffic per replica whereas all outbound replication goes to the single exchange in the RabbitMQ node running in ``eu-west-3``. |
|
| 577 | + |
|
| 578 | +## User Groups and Permissions |
|
| 579 | + |
|
| 580 | +The general public shall not be allowed during the live event to browse the event through ``paris2024.sapsailing.com``. Instead, they are required to go through any of the so-called "Rights-Holding Broadcaster" (RHB) web sites. There, a "widget" will be embedded into their web sites which works with our REST API to display links to the regattas and races, in particular the RaceBoard.html pages displaying the live and replay races. |
|
| 581 | + |
|
| 582 | +Moderators who need to comment on the races shall be given more elaborate permissions and shall be allowed to use the full-fledged functionality of ``paris2024.sapsailing.com``, in particular, browse through all aspects of the event, see flag statuses, postponements and so on. |
|
| 583 | + |
|
| 584 | +To achieve this effect, the ``paris2024-server`` group has the ``sailing_viewer`` role assigned for all users, and all objects, except for the top-level ``Event`` object are owned by that group. This way, everything but the event are publicly visible. |
|
| 585 | + |
|
| 586 | +The ``Event`` object is owned by ``paris2024-moderators``, and that group grants the ``sailing_viewer`` role only to its members, meaning only the members of that group are allowed to see the ``Event`` object. |
|
| 587 | + |
|
| 588 | +## Landscape Upgrade Procedure |
|
| 589 | + |
|
| 590 | +In the ``configuration/on-site-scripts`` we have prepared a number of scripts intended to be useful for local and cloud landscape management. TL;DR: |
|
| 591 | +``` |
|
| 592 | + configuration/on-site-scripts/upgrade-landscape.sh -R {release-name} -b {replication-bearer-token} |
|
| 593 | +``` |
|
| 594 | +will upgrade the entire landscape to the release ``{release-name}`` (e.g., build-202107210711). The ``{replication-bearer-token}`` must be provided such that the user authenticated by that token will have the permission to stop replication and to replicate the ``paris2024`` master. |
|
| 595 | + |
|
| 596 | +The script will proceed in the following steps: |
|
| 597 | + - patch ``*.conf`` files in ``sap-p1-1:servers/[master|security_service]`` and ``sap-p1-2:servers/[replica|master|security_service]`` so |
|
| 598 | + their ``INSTALL_FROM_RELEASE`` points to the new ``${RELEASE}`` |
|
| 599 | + - Install new releases to ``sap-p1-1:servers/[master|security_service]`` and ``sap-p1-2:servers/[replica|master|security_service]`` |
|
| 600 | + - Update all launch configurations and auto-scaling groups in the cloud (``update-launch-configuration.sh``) |
|
| 601 | + - Tell all replicas in the cloud to stop replicating (``stop-all-cloud-replicas.sh``) |
|
| 602 | + - Tell ``sap-p1-2`` to stop replicating |
|
| 603 | + - on ``sap-p1-1:servers/master`` run ``./stop; ./start`` to bring the master to the new release |
|
| 604 | + - wait until master is healthy |
|
| 605 | + - on ``sap-p1-2:servers/replica`` run ``./stop; ./start`` to bring up on-site replica again |
|
| 606 | + - launch upgraded cloud replicas and replace old replicas in target group (``launch-replicas-in-all-regions.sh``) |
|
| 607 | + - terminate all instances named "SL Paris2024 (auto-replica)"; this should cause the auto-scaling group to launch new instances as required |
|
| 608 | + - manually inspect the health of everything and terminate the "SL Paris2024 (Upgrade Replica)" instances when enough new instances |
|
| 609 | + named "SL Paris2024 (auto-replica)" are available |
|
| 610 | + |
|
| 611 | +The individual scripts will be described briefly in the following sub-sections. Many of them use as a common artifact the ``regions.txt`` file which contains the list of regions in which operations are executed. The ``eu-west-1`` region as our "legacy" or "primary" region requires special attention in some cases. In particular, it can use the ``live`` replica set for the replicas started in the region, also because the AMI used in this region is slightly different and in particular doesn't launch a MongoDB local replica set on each instance which the AMIs in all other regions supported do. |
|
| 612 | + |
|
| 613 | +### clone-security-service-db-safe-exit |
|
| 614 | + |
|
| 615 | +Creates a ``mongodump`` of "mongodb://mongo0.internal.sapsailing.com,mongo1.internal.sapsailing.com,dbserver.internal.sapsailing.com:10203/security_service?replicaSet=live&retryWrites=true&readPreference=nearest" on the ``paris-ssh.sapsailing.com`` host and packs it into a ``.tar.gz`` file. This archive is then transferred as the standard output of an SSH command to the host executing the script where it is unpacked into ``/tmp/dump``. The local "mongodb://localhost/security_service_bak?replicaSet=security_service&retryWrites=true&readPreference=nearest" backup copy is then dropped, the local ``security_service`` DB is moved to ``security_service_bak``, and the dump from ``/tmp/dump`` is then restored to ``security_service``. If this fails, the backup from ``security_service_bak`` is restored to ``security_service``, and there won't be a backup copy anymore in ``security_service_bak`` anymore. |
|
| 616 | + |
|
| 617 | +The script is used as a CRON job for user ``sailing@sap-p1-1``. |
|
| 618 | + |
|
| 619 | +### get-replica-ips |
|
| 620 | + |
|
| 621 | +Lists the public IP addresses of all running replicas in the regions described in ``regions.txt`` on its standard output. Progress information will be sent to standard error. Example invocation: |
|
| 622 | +<pre> |
|
| 623 | + $ ./get-replica-ips |
|
| 624 | + Region: eu-west-3 |
|
| 625 | + Region: ap-northeast-1 |
|
| 626 | + Region: ap-southeast-2 |
|
| 627 | + Region: us-west-1 |
|
| 628 | + Region: us-east-1 |
|
| 629 | + 34.245.148.130 18.183.234.161 3.26.60.130 13.52.238.81 18.232.169.1 |
|
| 630 | +</pre> |
|
| 631 | + |
|
| 632 | +### launch-replicas-in-all-regions.sh |
|
| 633 | + |
|
| 634 | +Will launch as many new replicas in the regions listed in ``regions.txt`` with the release specified with ``-R`` as there are currently healthy auto-replicas registered with the ``S-paris2024`` target group in the region (at least one) which will register at the master proxy ``paris-ssh.internal.sapsailing.com:8888`` and RabbitMQ at ``rabbit-eu-west-3.sapsailing.com:5672``, then when healthy get added to target group ``S-paris2024`` in that region, with all auto-replicas registered before removed from the target group. |
|
| 635 | + |
|
| 636 | +The script uses the ``launch-replicas-in-region.sh`` script for each region where replicas are to be launched. |
|
| 637 | + |
|
| 638 | +Example invocation: |
|
| 639 | +<pre> |
|
| 640 | + launch-replicas-in-all-regions.sh -R build-202107210711 -b 1234567890ABCDEFGH/+748397= |
|
| 641 | +</pre> |
|
| 642 | + |
|
| 643 | +Invoke without arguments to see a documentation of possible parameters. |
|
| 644 | + |
|
| 645 | +### launch-replicas-in-region.sh |
|
| 646 | + |
|
| 647 | +Will launch one or more (see ``-c``) new replicas in the AWS region specified with ``-g`` with the release specified with ``-R`` which will register at the master proxy ``paris-ssh.internal.sapsailing.com:8888`` and RabbitMQ at ``rabbit-eu-west-3.sapsailing.com:5672``, then when healthy get added to target group ``S-paris2024`` in that region, with all auto-replicas registered before removed from the target group. Specify ``-r`` and ``-p`` if you are launching in ``eu-west-1`` because it has a special non-default MongoDB environment. |
|
| 648 | + |
|
| 649 | +Example invocation: |
|
| 650 | +<pre> |
|
| 651 | + launch-replicas-in-region.sh -g us-east-1 -R build-202107210711 -b 1234567890ABCDEFGH/+748397= |
|
| 652 | +</pre> |
|
| 653 | + |
|
| 654 | +Invoke without arguments to see a documentation of possible parameters. |
|
| 655 | + |
|
| 656 | +### stop-all-cloud-replicas.sh |
|
| 657 | + |
|
| 658 | +Will tell all replicas in the cloud in those regions described by the ``regions.txt`` file to stop replicating. This works by invoking the ``get-replica-ips script`` and for each of them to stop replicating, using the ``stopReplicating.sh`` script in their ``/home/sailing/servers/paris2024`` directory, passing through the bearer token. Note: this will NOT stop replication on the local replica on ``sap-p1-2``! |
|
| 659 | + |
|
| 660 | +The script must be invoked with the bearer token needed to authenticate a user with replication permission for the ``paris2024`` application replica set. |
|
| 661 | + |
|
| 662 | +Example invocation: |
|
| 663 | +<pre> |
|
| 664 | + stop-all-cloud-replicas.sh -b 1234567890ABCDEFGH/+748397= |
|
| 665 | +</pre> |
|
| 666 | + |
|
| 667 | +Invoke without arguments to see a documentation of possible parameters. |
|
| 668 | + |
|
| 669 | +### update-launch-configuration.sh |
|
| 670 | + |
|
| 671 | +Will upgrade the auto-scaling group ``paris2024*`` (such as ``paris2024-auto-replicas``) in the regions from ``regions.txt`` with a new launch configuration that will be derived from the existing launch configuration named ``paris2024-*`` by copying it to ``paris2024-{RELEASE_NAME}`` while updating the ``INSTALL_FROM_RELEASE`` parameter in the user data to the ``{RELEASE_NAME}`` provided in the ``-R`` parameter, and optionally adjusting the AMI, key pair name and instance type if specified by the respective parameters. Note: this will NOT terminate any instances in the target group! |
|
| 672 | + |
|
| 673 | +Example invocation: |
|
| 674 | +<pre> |
|
| 675 | + update-launch-configuration.sh -R build-202107210711 |
|
| 676 | +</pre> |
|
| 677 | + |
|
| 678 | +Invoke without arguments to see a documentation of possible parameters. |
|
| 679 | + |
|
| 680 | +### upgrade-landscape.sh |
|
| 681 | + |
|
| 682 | +See the introduction of this main section. Synopsis: |
|
| 683 | +<pre> |
|
| 684 | + ./upgrade-landscape.sh -R <release-name> -b <replication-bearer-token> \[-t <instance-type>\] \[-i <ami-id>\] \[-k <key-pair-name>\] \[-s\]<br> |
|
| 685 | + -b replication bearer token; mandatory |
|
| 686 | + -i Amazon Machine Image (AMI) ID to use to launch the instance; defaults to latest image tagged with image-type:sailing-analytics-server |
|
| 687 | + -k Key pair name, mapping to the --key-name parameter |
|
| 688 | + -R release name; must be provided to select the release, e.g., build-202106040947 |
|
| 689 | + -t Instance type; defaults to |
|
| 690 | + -s Skip release download |
|
| 691 | +</pre> |
|
| 692 | + |
|
| 693 | +## Log File Analysis |
|
| 694 | + |
|
| 695 | +Athena table definitions and queries have been provided in region ``eu-west-3`` (Paris) where we hosted our EU part during the event after a difficult start in ``eu-west-1`` with the single MongoDB live replica set not scaling well for all the replicas that were required in the region. |
|
| 696 | + |
|
| 697 | +The key to the Athena set-up is to have a table definition per bucket, with a dedicated S3 bucket per region where ALB logs were recorded. An example of a query based on the many tables the looks like this: |
|
| 698 | +<pre> |
|
| 699 | + with union_table AS |
|
| 700 | + (select * |
|
| 701 | + from alb_logs_ap_northeast_1 |
|
| 702 | + union all |
|
| 703 | + select * |
|
| 704 | + from alb_logs_ap_southeast_2 |
|
| 705 | + union all |
|
| 706 | + select * |
|
| 707 | + from alb_logs_eu_west_3 |
|
| 708 | + union all |
|
| 709 | + select * |
|
| 710 | + from alb_logs_us_east_1 |
|
| 711 | + union all |
|
| 712 | + select * |
|
| 713 | + from alb_logs_us_west_1) |
|
| 714 | + select date_trunc('day', parse_datetime(time,'yyyy-MM-dd''T''HH:mm:ss.SSSSSS''Z')), count(distinct concat(client_ip,user_agent)) |
|
| 715 | + from union_table |
|
| 716 | + where (parse_datetime(time,'yyyy-MM-dd''T''HH:mm:ss.SSSSSS''Z') |
|
| 717 | + between parse_datetime('2021-07-21-00:00:00','yyyy-MM-dd-HH:mm:ss') |
|
| 718 | + and parse_datetime('2021-08-08-02:00:00','yyyy-MM-dd-HH:mm:ss')) |
|
| 719 | + group by date_trunc('day', parse_datetime(time,'yyyy-MM-dd''T''HH:mm:ss.SSSSSS''Z')) |
|
| 720 | +</pre> |
|
| 721 | +It defines a ``union_table`` which unites all contents from all buckets scanned. |
|
| ... | ... | \ No newline at end of file |