File: SR12885Test.java

package info (click to toggle)
libdb-je-java 3.3.98-3
  • links: PTS, VCS
  • area: main
  • in suites: forky, sid, trixie
  • size: 13,052 kB
  • sloc: java: 153,077; xml: 2,034; makefile: 3
file content (274 lines) | stat: -rw-r--r-- 9,849 bytes parent folder | download | duplicates (3)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
/*-
 * See the file LICENSE for redistribution information.
 *
 * Copyright (c) 2002,2010 Oracle.  All rights reserved.
 *
 * $Id: SR12885Test.java,v 1.8.2.2 2010/01/04 15:30:42 cwl Exp $
 */

package com.sleepycat.je.cleaner;

import java.io.File;
import java.io.IOException;

import junit.framework.TestCase;

import com.sleepycat.je.CheckpointConfig;
import com.sleepycat.je.Database;
import com.sleepycat.je.DatabaseConfig;
import com.sleepycat.je.DatabaseEntry;
import com.sleepycat.je.DatabaseException;
import com.sleepycat.je.DbInternal;
import com.sleepycat.je.Environment;
import com.sleepycat.je.EnvironmentConfig;
import com.sleepycat.je.OperationStatus;
import com.sleepycat.je.Transaction;
import com.sleepycat.je.config.EnvironmentParams;
import com.sleepycat.je.log.FileManager;
import com.sleepycat.je.util.TestUtils;

/**
 * Reproduces a problem found in SR12885 where we failed to migrate a pending
 * LN if the slot was reused by an active transaction and that transaction was
 * later aborted.
 *
 * This bug can manifest as a LogNotFoundException.  However, there was another
 * bug that caused this bug to manifest sometimes as a NOTFOUND return value.
 * This secondary problem -- more sloppyness than a real bug -- was that the
 * PendingDeleted flag was not cleared during an abort.  If the PendingDeleted
 * flag is set, the low level fetch method will return null rather than
 * throwing a LogFileNotFoundException.  This caused a NOTFOUND in some cases.
 *
 * The sequence that causes the bug is:
 *
 * 1) The cleaner processes a file containing LN-A (node A) for key X.  Key X
 * is a non-deleted LN.
 *
 * 2) The cleaner sets the migrate flag on the BIN entry for LN-A.
 *
 * 3) In transaction T-1, LN-A is deleted and replaced by LN-B with key X,
 * reusing the same slot but assigning a new node ID.  At this point both node
 * IDs (LN-A and LN-B) are locked.
 *
 * 4) The cleaner (via a checkpoint or eviction that logs the BIN) tries to
 * migrate LN-B, the current LN in the BIN, but finds it locked.  It adds LN-B
 * to the pending LN list.
 *
 * 5) T-1 aborts, putting the LSN of LN-A back into the BIN slot.
 *
 * 6) In transaction T-2, LN-A is deleted and replaced by LN-C with key X,
 * reusing the same slot but assigning a new node ID.  At this point both node
 * IDs (LN-A and LN-C) are locked.
 *
 * 7) The cleaner (via a checkpoint or wakeup) processes the pending LN-B.  It
 * first gets a lock on node B, then does the tree lookup.  It finds LN-C in
 * the tree, but it doesn't notice that it has a different node ID than the
 * node it locked.
 *
 * 8) The cleaner sees that LN-C is deleted, and therefore no migration is
 * necessary -- this is incorrect.  It removes LN-B from the pending list,
 * allowing the cleaned file to be deleted.
 *
 * 9) T-2 aborts, putting the LSN of LN-A back into the BIN slot.
 *
 * 10) A fetch of key X will fail, since the file containing the LSN for LN-A
 * has been deleted.  If we didn't clear the PendingDeleted flag, this will
 * cause a NOTFOUND error instead of a LogFileNotFoundException.
 */
public class SR12885Test extends TestCase {

    private static final String DB_NAME = "foo";

    private static final CheckpointConfig forceConfig = new CheckpointConfig();
    static {
        forceConfig.setForce(true);
    }

    private File envHome;
    private Environment env;
    private Database db;

    public SR12885Test() {
        envHome = new File(System.getProperty(TestUtils.DEST_DIR));
    }

    public void setUp()
        throws IOException, DatabaseException {

        TestUtils.removeLogFiles("Setup", envHome, false);
        TestUtils.removeFiles("Setup", envHome, FileManager.DEL_SUFFIX);
    }

    public void tearDown()
        throws IOException, DatabaseException {

        try {
            if (env != null) {
                env.close();
            }
        } catch (Throwable e) {
            System.out.println("tearDown: " + e);
        }

        try {
            TestUtils.removeLogFiles("tearDown", envHome, true);
            TestUtils.removeFiles("tearDown", envHome, FileManager.DEL_SUFFIX);
        } catch (Throwable e) {
            System.out.println("tearDown: " + e);
        }

        db = null;
        env = null;
        envHome = null;
    }

    /**
     * Opens the environment and database.
     */
    private void openEnv()
        throws DatabaseException {

        EnvironmentConfig config = TestUtils.initEnvConfig();
	DbInternal.disableParameterValidation(config);
        config.setTransactional(true);
        config.setAllowCreate(true);
        /* Do not run the daemons. */
        config.setConfigParam
            (EnvironmentParams.ENV_RUN_CLEANER.getName(), "false");
        config.setConfigParam
            (EnvironmentParams.ENV_RUN_EVICTOR.getName(), "false");
        config.setConfigParam
	    (EnvironmentParams.ENV_RUN_CHECKPOINTER.getName(), "false");
        config.setConfigParam
            (EnvironmentParams.ENV_RUN_INCOMPRESSOR.getName(), "false");
        /* Use a small log file size to make cleaning more frequent. */
        config.setConfigParam(EnvironmentParams.LOG_FILE_MAX.getName(),
                              Integer.toString(1024));
        env = new Environment(envHome, config);

        openDb();
    }

    /**
     * Opens that database.
     */
    private void openDb()
        throws DatabaseException {

        DatabaseConfig dbConfig = new DatabaseConfig();
        dbConfig.setTransactional(true);
        dbConfig.setAllowCreate(true);
        db = env.openDatabase(null, DB_NAME, dbConfig);
    }

    /**
     * Closes the environment and database.
     */
    private void closeEnv()
        throws DatabaseException {

        if (db != null) {
            db.close();
            db = null;
        }
        if (env != null) {
            env.close();
            env = null;
        }
    }

    public void testSR12885()
        throws DatabaseException {

        openEnv();

        final int COUNT = 10;
        DatabaseEntry key = new DatabaseEntry();
        DatabaseEntry data = new DatabaseEntry(TestUtils.getTestArray(0));
        OperationStatus status;

        /* Add some records, enough to fill a log file. */
        for (int i = 0; i < COUNT; i += 1) {
            key.setData(TestUtils.getTestArray(i));
            status = db.putNoOverwrite(null, key, data);
            assertEquals(OperationStatus.SUCCESS, status);
        }

        /*
         * Delete all but key 0, so the first file can be cleaned but key 0
         * will need to be migrated.
         */
        for (int i = 1; i < COUNT; i += 1) {
            key.setData(TestUtils.getTestArray(i));
            status = db.delete(null, key);
            assertEquals(OperationStatus.SUCCESS, status);
        }

        /*
         * Checkpoint and clean to set the migrate flag for key 0.  This must
         * be done when key 0 is not locked, so that it will not be put onto
         * the pending list yet.  Below we cause it to be put onto the pending
         * list with a different node ID.
         */
        env.checkpoint(forceConfig);
        int cleaned = env.cleanLog();
        assertTrue("cleaned=" + cleaned, cleaned > 0);

        /*
         * Using a transaction, delete then insert key 0, reusing the slot.
         * The insertion assigns a new node ID.  Don't abort the transaction
         * until after the cleaner migration is finished.
         */
        Transaction txn = env.beginTransaction(null, null);
        key.setData(TestUtils.getTestArray(0));
        status = db.delete(txn, key);
        assertEquals(OperationStatus.SUCCESS, status);
        status = db.putNoOverwrite(txn, key, data);
        assertEquals(OperationStatus.SUCCESS, status);

        /*
         * Checkpoint again to perform LN migration.  LN migration will not
         * migrate key 0 because it is locked -- it will be put onto the
         * pending list.  But the LN put on the pending list will be the newly
         * inserted node, which has a different node ID than the LN that needs
         * to be migrated -- this is the first condition for the bug.
         */
        env.checkpoint(forceConfig);

        /*
         * Abort the transaction to revert to the original node ID for key 0.
         * Then perform a delete with a new transaction.  This makes the
         * current LN for key 0 deleted.
         */
        txn.abort();
        txn = env.beginTransaction(null, null);
        key.setData(TestUtils.getTestArray(0));
        status = db.delete(txn, key);
        assertEquals(OperationStatus.SUCCESS, status);

        /*
         * The current state of key 0 is that the BIN contains a deleted LN,
         * and that LN has a node ID that is different than the one in the
         * pending LN list.  This node is the one that needs to be migrated.
         *
         * Perform a checkpoint to cause pending LNs to be processed and then
         * delete the cleaned file.  When we process the pending LN, we'll lock
         * the pending LN's node ID (the one we inserted and aborted), which is
         * the wrong node ID.  We'll then examine the current LN, find it
         * deleted, and neglect to migrate the LN that needs to be migrated.
         * The error is that we don't lock the node ID of the current LN.
         *
         * Then abort the delete transaction.  That will revert the BIN entry
         * to the node we failed to migrate.  If we then try to fetch key 0,
         * we'll get LogNotFoundException.
         */
        env.checkpoint(forceConfig);
        txn.abort();
        status = db.get(null, key, data, null);
        assertEquals(OperationStatus.SUCCESS, status);

        /* If we get this far without LogNotFoundException, it's fixed. */

        closeEnv();
    }
}