a2ebff683eb94df69561c346122aaff4986ffb2a
configuration/httpd/conf.d/000-macros.conf
| ... | ... | @@ -50,20 +50,10 @@ |
| 50 | 50 | Define ARCHIVE_IP 172.31.41.241 |
| 51 | 51 | Define ARCHIVE_FAILOVER_IP 172.31.47.152 |
| 52 | 52 | Define PRODUCTION |
| 53 | -#move to main config file within if mod_proxy |
|
| 54 | -ProxyHCExpr ok2 {%{REQUEST_STATUS} =~ /^[2]/} |
|
| 55 | -<Proxy balancer://archive> |
|
| 56 | - BalancerMember http://${ARCHIVE_IP}:8888 hcmethod=GET hcexpr=ok2 hcuri=/gwt/status hcfails=2 hcpasses=3 timeout=86400 connectiontimeout=20 keepalive=on |
|
| 57 | - BalancerMember http://${ARCHIVE_FAILOVER_IP}:8888 status=+H |
|
| 58 | - ProxySet lbmethod=bytraffic |
|
| 59 | -</Proxy> |
|
| 53 | + |
|
| 60 | 54 | |
| 61 | 55 | <Macro ArchiveRewrite> |
| 62 | - ProxyPass "/" "balancer://archive/" nocanon |
|
| 63 | - #nocanon passes raw url to backend without normalising the url |
|
| 64 | - ProxyPassReverse "/" "balancer://archive/" |
|
| 65 | - ProxyPreserveHost On |
|
| 66 | - #ensures host is preserved on forwarding to balancer member |
|
| 56 | + Use Rewrite ${PRODUCTION} 8888 |
|
| 67 | 57 | </Macro> |
| 68 | 58 | |
| 69 | 59 | <Macro Headers> |
configuration/httpd/conf.d/switchoverArchive.sh
| ... | ... | @@ -1,57 +1,69 @@ |
| 1 | 1 | #!/bin/bash |
| 2 | 2 | MACROS_PATH=$1 |
| 3 | +EMAIL=$2 |
|
| 4 | +TIMEOUT1=2 |
|
| 5 | +TIMEOUT2=9 |
|
| 6 | +#Purpose: Script is used to switch to the failover archive if the primary is unhealthy by altering the macros |
|
| 7 | +#definitions and then reloading. |
|
| 8 | + |
|
| 9 | +#These next lines get the current ip values for the archive and failover, plus they store the value of production, |
|
| 10 | +#which is a variable pointing to either the primary or failover value. |
|
| 3 | 11 | archiveIp="$(sed -n -E 's/^Define ARCHIVE_IP (.*)/\1/p' ${MACROS_PATH} | tr -d '[:space:]')" |
| 4 | 12 | failoverIp="$(sed -n -E 's/^Define ARCHIVE_FAILOVER_IP (.*)/\1/p' ${MACROS_PATH} | tr -d '[:space:]')" |
| 5 | 13 | productionIp="$(sed -n -E 's/^Define PRODUCTION (.*)/\1/p' ${MACROS_PATH} | tr -d '[:space:]')" |
| 14 | +#Checks if the macro.conf is set as healthy or unhealthy currently. |
|
| 6 | 15 | if [[ "${productionIp}" == "\${ARCHIVE_IP}" ]] |
| 7 | 16 | then |
| 8 | 17 | alreadyHealthy=1 |
| 9 | - echo "currently healthy" |
|
| 18 | + logger -t archive "currently healthy" |
|
| 10 | 19 | else |
| 11 | 20 | alreadyHealthy=0 |
| 12 | - echo "currently unhealthy" |
|
| 21 | + logger -t archive "currently unhealthy" |
|
| 13 | 22 | fi |
| 23 | +#Sets the production value to point to the variable defining the main archive IP. |
|
| 14 | 24 | setProductionMainIfNotSet() { |
| 15 | 25 | if [[ $alreadyHealthy -eq 0 ]] |
| 16 | 26 | then |
| 17 | 27 | #currently unhealthy |
| 18 | 28 | #set production to archive |
| 19 | - echo "setting production to main archive" |
|
| 29 | + logger -t archive "Healthy: setting production to main archive" |
|
| 20 | 30 | sed -i -E "s/Define PRODUCTION .*/Define PRODUCTION \${ARCHIVE_IP}/" ${MACROS_PATH} |
| 21 | 31 | systemctl reload httpd |
| 22 | 32 | { |
| 23 | - echo "To: thomasstokes@yahoo.co.uk" |
|
| 33 | + echo "To: ${EMAIL}" |
|
| 24 | 34 | echo Subject: Healthy |
| 25 | 35 | echo |
| 26 | 36 | echo Healthy: main archive online |
| 27 | 37 | } | /usr/sbin/sendmail -t |
| 28 | 38 | else |
| 29 | - echo "currently healthy; no change needed" |
|
| 39 | + #If already healthy then no reload or notification occurs. |
|
| 40 | + logger -t archive "Healthy: already set, no change needed" |
|
| 30 | 41 | fi |
| 31 | 42 | |
| 32 | 43 | } |
| 33 | -echo "begin" |
|
| 34 | -curl -s --connect-timeout 2 "http://${archiveIp}:8888/gwt/status" >> /dev/null |
|
| 44 | +logger -t archive "begin check" |
|
| 45 | +curl -s --connect-timeout ${TIMEOUT1} "http://${archiveIp}:8888/gwt/status" >> /dev/null |
|
| 35 | 46 | if [[ $? -ne 0 ]] |
| 36 | 47 | then |
| 37 | - echo "first check failed" |
|
| 38 | - curl -s --connect-timeout 5 "http://${archiveIp}:8888/gwt/status" >> /dev/null |
|
| 48 | + logger -t archive "first check failed" |
|
| 49 | + curl -s --connect-timeout ${TIMEOUT2} "http://${archiveIp}:8888/gwt/status" >> /dev/null |
|
| 39 | 50 | if [[ $? -ne 0 ]] |
| 40 | 51 | then |
| 41 | 52 | if [[ $alreadyHealthy -eq 1 ]] |
| 42 | 53 | then |
| 43 | - #set production to failover if not already. Separate if statement in case the curl statement fails but the production is already set to point to the backup |
|
| 54 | + #set production to failover if not already. Separate if statement in case the curl statement |
|
| 55 | + #fails but the production is already set to point to the backup |
|
| 44 | 56 | sed -i -E "s/Define PRODUCTION .*/Define PRODUCTION \${ARCHIVE_FAILOVER_IP}/" ${MACROS_PATH} |
| 45 | - echo "switching to failover" |
|
| 57 | + logger -t archive "Unhealthy: second check failed, switching to failover" |
|
| 46 | 58 | systemctl reload httpd |
| 47 | 59 | { |
| 48 | - echo "To: thomasstokes@yahoo.co.uk" |
|
| 60 | + echo "To: ${EMAIL}" |
|
| 49 | 61 | echo Subject: Unhealthy |
| 50 | 62 | echo |
| 51 | 63 | echo Unhealthy: main archive offline |
| 52 | 64 | } | /usr/sbin/sendmail -t |
| 53 | 65 | else |
| 54 | - echo "unhealthy, failover already in use" |
|
| 66 | + logger -t archive "Unhealthy: second check still fails, failover already in use" |
|
| 55 | 67 | fi |
| 56 | 68 | else |
| 57 | 69 | setProductionMainIfNotSet |
wiki/info/landscape/archive-server-upgrade.md
| ... | ... | @@ -100,27 +100,45 @@ Following the mandatory automated content comparison you should do a few spot ch |
| 100 | 100 | |
| 101 | 101 | ### Switching in Reverse Proxy |
| 102 | 102 | |
| 103 | -Once you are content with the quality of the new archive server candidate's contents it's time to switch. Technically, switching archive servers is done by adjusting the corresponding configuration in the central Apache reverse proxy server. You find this in ``root@sapsailing.com:/etc/httpd/conf.d/000-macros.conf`` in the definition of the macro ``ArchiveRewrite`` defined usually at the top of the file. You'll find a macro definition that looks like this: |
|
| 104 | -```<Macro ArchiveRewrite> |
|
| 105 | -# ARCHIVE, based on i3.2xlarge, 64GB RAM and 1.9TB swap |
|
| 106 | -# Use Rewrite 172.31.4.106 8888 |
|
| 107 | - Use Rewrite 172.31.9.176 8888 |
|
| 108 | -</Macro> |
|
| 103 | +Once you are content with the quality of the new archive server candidate's contents it's time to switch. Technically, switching archive servers is done by adjusting the corresponding configuration in the central Apache reverse proxy server. You find this in ``root@sapsailing.com:/etc/httpd/conf.d/000-macros.conf`` at the top. There should be a couple of statements at the top: |
|
| 104 | + |
|
| 105 | +``` |
|
| 106 | +Define ARCHIVE_IP xxx.xx.xx.xxx |
|
| 107 | +Define ARCHIVE_FAILOVER_IP xxx.xx.xx.xxx |
|
| 108 | + |
|
| 109 | +<Macro ArchiveRewrite> |
|
| 110 | + Use Rewrite ${ARCHIVE_IP} 8888 |
|
| 111 | +</Macro> |
|
| 112 | +``` |
|
| 113 | +In the past we changed this directly within the ArchiveRewrite macro, but this was slow if we needed to switchover. As an improvement, which happened to also be neater, we added variables -- defined at the top -- including a variable for an up-and-running failover. In the case of an outage, we could comment the current archive and rename the failover (and then reload). This way we could also switch back if the primary returns to healthy. However, we have worked on an automation script, which now changes the PRODUCTION value to point to the variables ARCHIVE_IP or ARCHIVE_FAILOVER_IP. |
|
| 109 | 114 | ``` |
| 110 | -Copy the uncommented ``Use Rewrite`` line and in the new copy adjust the internal IP to match the internal IP of your new archive server candidate. Then comment the line that pointed to the currently active primary archive server. Your macro definition then would look something like this, assuming that ``172.31.8.7`` were the IP address of your new archive server candidate: |
|
| 111 | -```<Macro ArchiveRewrite> |
|
| 112 | -# ARCHIVE, based on i3.2xlarge, 64GB RAM and 1.9TB swap |
|
| 113 | -# Use Rewrite 172.31.4.106 8888 |
|
| 114 | -# Use Rewrite 172.31.9.176 8888 |
|
| 115 | - Use Rewrite 172.31.8.7 8888 |
|
| 116 | -</Macro> |
|
| 115 | +Define ARCHIVE_IP xxx.xx.xx.xxx |
|
| 116 | +Define ARCHIVE_FAILOVER_IP xxx.xx.xx.xxx |
|
| 117 | +Define PRODUCTION ${ARCHIVE_IP} |
|
| 118 | + |
|
| 119 | +<Macro ArchiveRewrite> |
|
| 120 | + Use Rewrite ${PRODUCTION} 8888 |
|
| 121 | +</Macro> |
|
| 117 | 122 | ``` |
| 118 | -Exit your editor and reload the reverse proxy configuration by issuing the following command: |
|
| 123 | +The script can be found in the git at **switchoverArchive.sh**. This script has 1 parameter which is the path to the macros file, containing the above macros (currently in /etc/httpd/conf.d/000-macros.conf. Run ```crontab -e``` to edit the cronjobs and add |
|
| 119 | 124 | ``` |
| 120 | - # service httpd reload |
|
| 125 | +* * * * * switchoverArchive "/etc/httpd/conf.d/000-macros.conf" |
|
| 121 | 126 | ``` |
| 122 | -Check that the new archive service is now active, e.g., by looking at [sapsailing.com/gwt/status](https://sapsailing.com/gwt/status). It should reflect the new release in its ``release`` field. |
|
| 127 | +Then exit the editor. |
|
| 128 | +To setup switchoverArchive as a command for easy usage run ``` ln -s /home/wiki/gitwiki/configuration/switchoverArchive.sh switchoverArchive```. This creates a symbolic link and makes the command accessible anywhere. |
|
| 129 | + |
|
| 130 | +Check that the new archive service is now active, e.g., by looking at [sapsailing.com/gwt/status](https://sapsailing.com/gwt/status). It should reflect the new release in its ``release`` field. |
|
| 131 | +## Tests |
|
| 132 | + |
|
| 133 | +1. Healthy -> Stay healthy |
|
| 134 | +2. Healthy -> Unhealthy |
|
| 135 | +3. Unhealthy -> Stay unhealthy |
|
| 136 | +4. Unhealthy -> Become healthy |
|
| 137 | +5. Multiple cycles |
|
| 138 | +6. Different order combinations: eg. 1,2,3,4; 2,4,1,2,3 |
|
| 123 | 139 | |
| 124 | 140 | ### Clean up EC2 Names and Instances |
| 125 | 141 | |
| 126 | -Next, you should terminate the previous fail-over archive server instance, and you need to adjust the ``Name`` tags in the EC2 console of the old primary to show that it's now the fail-over, and for the candidate to show that it's now the primary. Select the old fail-over instance and terminate it. Then change the name tag of "SL Archive" to "SL Archive (Failover)", then change that of "SL Archive (New Candidate)" to "SL Archive", and you're done. |
|
| 142 | +Next, you should terminate the previous fail-over archive server instance, and you need to adjust the ``Name`` tags in the EC2 console of the old primary to show that it's now the fail-over, and for the candidate to show that it's now the primary. Select the old fail-over instance and terminate it. Then change the name tag of "SL Archive" to "SL Archive (Failover)", then change that of "SL Archive (New Candidate)" to "SL Archive", and you're done for now.... |
|
| 143 | + |
|
| 144 | +If you establish that the old primary will not recover you must setup a new failover and reconfigure the httpd and then run ```systemctl reload httpd ```, which won't drop any connections. |
|
| ... | ... | \ No newline at end of file |