File: test_pool.py

package info (click to toggle)
python-boto 2.34.0-2
links: PTS, VCS
area: main
in suites: jessie, jessie-kfreebsd
size: 8,584 kB
ctags: 10,521
sloc: python: 78,553; makefile: 123
file content (245 lines) | stat: -rw-r--r-- 8,255 bytes
parent folder | download | duplicates (13)
# Copyright (c) 2011 Brian Beach
# All rights reserved.
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the
# "Software"), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish, dis-
# tribute, sublicense, and/or sell copies of the Software, and to permit
# persons to whom the Software is furnished to do so, subject to the fol-
# lowing conditions:
#
# The above copyright notice and this permission notice shall be included
# in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABIL-
# ITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT
# SHALL THE AUTHOR BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 
# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
# IN THE SOFTWARE.

"""
Some multi-threading tests of boto in a greenlet environment.
"""
from __future__ import print_function

import boto
import time
import uuid

from threading import Thread

def spawn(function, *args, **kwargs):
    """
    Spawns a new thread.  API is the same as
    gevent.greenlet.Greenlet.spawn.
    """
    t = Thread(target = function, args = args, kwargs = kwargs)
    t.start()
    return t

def put_object(bucket, name):
    bucket.new_key(name).set_contents_from_string(name)

def get_object(bucket, name):
    assert bucket.get_key(name).get_contents_as_string().decode('utf-8') == name

def test_close_connections():
    """
    A test that exposes the problem where connections are returned to the
    connection pool (and closed) before the caller reads the response.
    
    I couldn't think of a way to test it without greenlets, so this test
    doesn't run as part of the standard test suite.  That way, no more
    dependencies are added to the test suite.
    """
    
    print("Running test_close_connections")

    # Connect to S3
    s3 = boto.connect_s3()

    # Clean previous tests.
    for b in s3.get_all_buckets():
        if b.name.startswith('test-'):
            for key in b.get_all_keys():
                key.delete()
            b.delete()

    # Make a test bucket
    bucket = s3.create_bucket('test-%d' % int(time.time()))

    # Create 30 threads that each create an object in S3.  The number
    # 30 is chosen because it is larger than the connection pool size
    # (20). 
    names = [str(uuid.uuid4) for _ in range(30)]
    threads = [
        spawn(put_object, bucket, name)
        for name in names
        ]
    for t in threads:
        t.join()

    # Create 30 threads to read the contents of the new objects.  This
    # is where closing the connection early is a problem, because
    # there is a response that needs to be read, and it can't be read
    # if the connection has already been closed.
    threads = [
        spawn(get_object, bucket, name)
        for name in names
        ]
    for t in threads:
        t.join()

# test_reuse_connections needs to read a file that is big enough that
# one read() call on the socket won't read the whole thing.  
BIG_SIZE = 10000

class WriteAndCount(object):

    """
    A file-like object that counts the number of characters written.
    """

    def __init__(self):
        self.size = 0

    def write(self, data):
        self.size += len(data)
        time.sleep(0) # yield to other threads

def read_big_object(s3, bucket, name, count):
    for _ in range(count):
        key = bucket.get_key(name)
        out = WriteAndCount()
        key.get_contents_to_file(out)
        if out.size != BIG_SIZE:
            print(out.size, BIG_SIZE)
        assert out.size == BIG_SIZE
        print("    pool size:", s3._pool.size())

class LittleQuerier(object):

    """
    An object that manages a thread that keeps pulling down small
    objects from S3 and checking the answers until told to stop.
    """

    def __init__(self, bucket, small_names):
        self.running = True
        self.bucket = bucket
        self.small_names = small_names
        self.thread = spawn(self.run)

    def stop(self):
        self.running = False
        self.thread.join()

    def run(self):
        count = 0
        while self.running:
            i = count % 4
            key = self.bucket.get_key(self.small_names[i])
            expected = str(i)
            rh = { 'response-content-type' : 'small/' + str(i) }
            actual = key.get_contents_as_string(response_headers = rh).decode('utf-8')
            if expected != actual:
                print("AHA:", repr(expected), repr(actual))
            assert expected == actual
            count += 1

def test_reuse_connections():
    """
    This test is an attempt to expose problems because of the fact
    that boto returns connections to the connection pool before
    reading the response.  The strategy is to start a couple big reads
    from S3, where it will take time to read the response, and then
    start other requests that will reuse the same connection from the
    pool while the big response is still being read.

    The test passes because of an interesting combination of factors.
    I was expecting that it would fail because two threads would be
    reading the same connection at the same time.  That doesn't happen
    because httplib catches the problem before it happens and raises
    an exception.

    Here's the sequence of events:

       - Thread 1: Send a request to read a big S3 object.
       - Thread 1: Returns connection to pool.
       - Thread 1: Start reading the body if the response.

       - Thread 2: Get the same connection from the pool.
       - Thread 2: Send another request on the same connection.
       - Thread 2: Try to read the response, but
                   HTTPConnection.get_response notices that the
                   previous response isn't done reading yet, and
                   raises a ResponseNotReady exception.
       - Thread 2: _mexe catches the exception, does not return the
                   connection to the pool, gets a new connection, and
                   retries.

       - Thread 1: Finish reading the body of its response.
       
       - Server:   Gets the second request on the connection, and
                   sends a response.  This response is ignored because
                   the connection has been dropped on the client end.

    If you add a print statement in HTTPConnection.get_response at the
    point where it raises ResponseNotReady, and then run this test,
    you can see that it's happening.
    """

    print("Running test_reuse_connections")

    # Connect to S3
    s3 = boto.connect_s3()

    # Make a test bucket
    bucket = s3.create_bucket('test-%d' % int(time.time()))

    # Create some small objects in S3.
    small_names = [str(uuid.uuid4()) for _ in range(4)]
    for (i, name) in enumerate(small_names):
        bucket.new_key(name).set_contents_from_string(str(i))

    # Wait, clean the connection pool, and make sure it's empty.
    print("    waiting for all connections to become stale")
    time.sleep(s3._pool.STALE_DURATION + 1)
    s3._pool.clean()
    assert s3._pool.size() == 0
    print("    pool is empty")
    
    # Create a big object in S3.
    big_name = str(uuid.uuid4())
    contents = "-" * BIG_SIZE
    bucket.new_key(big_name).set_contents_from_string(contents)

    # Start some threads to read it and check that they are reading
    # the correct thing.  Each thread will read the object 40 times.
    threads = [
        spawn(read_big_object, s3, bucket, big_name, 20)
        for _ in range(5)
        ]

    # Do some other things that may (incorrectly) re-use the same
    # connections while the big objects are being read.
    queriers = [
        LittleQuerier(bucket, small_names)
        for _ in range(5)
        ]

    # Clean up.
    for t in threads:
        t.join()
    for q in queriers:
        q.stop()

def main():
    test_close_connections()
    test_reuse_connections()

if __name__ == '__main__':
    main()