1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
|
#!/usr/bin/python -u
# Copyright (c) 2010-2012 OpenStack Foundation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
from swiftclient import client
from unittest import main
from swift.common.exceptions import LockTimeout
from swift.common.manager import Manager
from swift.common.utils import hash_path, readconf, Timestamp
from swift.container.backend import ContainerBroker
from test.probe.common import (
kill_nonprimary_server, kill_server, start_server, ReplProbeTest)
# Why is this not called test_container_orphan? Because the crash
# happens in the account server, so both account and container
# services are involved.
#
# The common way users do this is to use TripleO to deploy an overcloud
# and add Gnocchi. Gnocchi is hammering Swift, its container has updates
# all the time. Then, users crash the overcloud and re-deploy it,
# using the new suffix in swift.conf. Thereafter, container service
# inherits old container with outstanding updates, container updater
# tries to send updates to the account server, while the account cannot
# be found anymore. In this situation, in Swift 2.25.0, account server
# tracebacks, and the cycle continues without end.
class TestOrphanContainer(ReplProbeTest):
def get_account_db_files(self, account):
# This is "more correct" than (port_num%100)//10, but is it worth it?
# We have the assumption about port_num vs node_id embedded all over.
account_configs = {}
for _, cname in self.configs['account-server'].items():
conf = readconf(cname)
# config parser cannot know if it's a number or not, so int()
port = int(conf['app:account-server']['bind_port'])
account_configs[port] = conf
part, nodes = self.account_ring.get_nodes(account)
hash_str = hash_path(account)
ret = []
for node in nodes:
data_dir = 'accounts'
device = node['device']
conf = account_configs[node['port']]
devices = conf['app:account-server']['devices']
# os.path.join is for the weak
db_file = '%s/%s/%s/%s/%s/%s/%s.db' % (
devices, device, data_dir, part,
hash_str[-3:], hash_str, hash_str)
ret.append(db_file)
return ret
def test_update_pending(self):
# Create container
container = 'contx'
client.put_container(self.url, self.token, container)
part, nodes = self.account_ring.get_nodes(self.account)
anode = nodes[0]
# Stop a quorum of account servers
# This allows the put to continue later.
kill_nonprimary_server(nodes, self.ipport2server)
kill_server((anode['ip'], anode['port']), self.ipport2server)
# Put object
# This creates an outstanding update.
client.put_object(self.url, self.token, container, 'object1', b'123')
cont_db_files = self.get_container_db_files(container)
self.assertEqual(len(cont_db_files), 3)
# Collect the observable state from containers
outstanding_files = []
for cfile in cont_db_files:
broker = ContainerBroker(cfile)
try:
info = broker.get_info()
except LockTimeout:
self.fail('LockTimeout at %s' % (cfile,))
if Timestamp(info['put_timestamp']) <= 0:
self.fail('No put_timestamp at %s' % (cfile,))
# Correct even if reported_put_timestamp is zero.
if info['put_timestamp'] > info['reported_put_timestamp']:
outstanding_files.append(cfile)
self.assertGreater(len(outstanding_files), 0)
# At this point the users shut everything down and screw up the
# hash in swift.conf. But we destroy the account DB instead.
files = self.get_account_db_files(self.account)
for afile in files:
os.unlink(afile)
# Restart the stopped primary server
start_server((anode['ip'], anode['port']), self.ipport2server)
# Make sure updaters run
Manager(['container-updater']).once()
# Collect the observable state from containers again and examine it
outstanding_files_new = []
for cfile in cont_db_files:
# We aren't catching DatabaseConnectionError, because
# we only want to approve of DBs that were quarantined,
# and not otherwise damaged. So if the code below throws
# an exception for other reason, we want the test to fail.
if not os.path.exists(cfile):
continue
broker = ContainerBroker(cfile)
try:
info = broker.get_info()
except LockTimeout:
self.fail('LockTimeout at %s' % (cfile,))
if Timestamp(info['put_timestamp']) <= 0:
self.fail('No put_timestamp at %s' % (cfile,))
# Correct even if reported_put_timestamp is zero.
if info['put_timestamp'] > info['reported_put_timestamp']:
outstanding_files_new.append(cfile)
self.assertLengthEqual(outstanding_files_new, 0)
self.get_to_final_state()
if __name__ == '__main__':
main()
|