File: LuceneSupportTest.java

package info (click to toggle)
derby 10.14.2.0-2
links: PTS, VCS
area: main
in suites: bookworm, bullseye
size: 78,896 kB
sloc: java: 691,930; sql: 42,686; xml: 20,511; sh: 3,373; sed: 96; makefile: 60
file content (530 lines) | stat: -rw-r--r-- 18,563 bytes
parent folder | download | duplicates (4)
/**
 *  Derby - Class org.apache.derbyTesting.functionTests.tests.lang.LuceneSupportTest
 *  
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.derbyTesting.functionTests.tests.lang;

import java.io.IOException;
import java.io.Reader;
import java.sql.CallableStatement;
import java.sql.PreparedStatement;
import java.sql.SQLException;
import java.sql.Statement;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import javax.xml.parsers.DocumentBuilderFactory;
import junit.framework.Test;
import org.apache.derby.optional.api.LuceneIndexDescriptor;
import org.apache.derby.optional.api.LuceneUtils;
import org.apache.derbyTesting.junit.BaseJDBCTestCase;
import org.apache.derbyTesting.junit.BaseTestSuite;
import org.apache.derbyTesting.junit.JDBC;
import org.apache.derbyTesting.junit.LocaleTestSetup;
import org.apache.derbyTesting.junit.SecurityManagerSetup;
import org.apache.derbyTesting.junit.TestConfiguration;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.queryparser.classic.MultiFieldQueryParser;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.util.Version;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;

/**
 * <p>
 * Basic test of the optional tool which provides Lucene indexing of
 * columns in Derby tables.
 * </p>
 */
public class LuceneSupportTest extends BaseJDBCTestCase {

    private static  final   String  ILLEGAL_CHARACTER = "42XBD";
    
	public LuceneSupportTest(String name) {
		super(name);
	}
	
	public static Test suite() {
        BaseTestSuite suite = new BaseTestSuite("LuceneSupportTest");

        Test    baseTest = TestConfiguration.embeddedSuite(LuceneSupportTest.class);
        Test        singleUseTest = TestConfiguration.singleUseDatabaseDecorator( baseTest );
        Test        localizedTest = new LocaleTestSetup( singleUseTest, new Locale( "en", "US" ) );
		
		suite.addTest(SecurityManagerSetup.noSecurityManager(localizedTest));
 
		return suite;
	}
	
	public void testCreateAndQueryIndex() throws Exception {
		CallableStatement cSt;
		Statement s = createStatement();

        // verify that we are in an en Locale
        getConnection().prepareStatement
            (
             "create function getDatabaseLocale() returns varchar( 20 )\n" +
             "language java parameter style java reads sql data\n" +
             "external name 'org.apache.derbyTesting.functionTests.tests.lang.LuceneSupportPermsTest.getDatabaseLocale()'\n"
             ).executeUpdate();
	    JDBC.assertFullResultSet
            (
             s.executeQuery
             (
              "values ( substr( getDatabaseLocale(), 1, 2 ) )"
              ),
             new String[][]
             {
                 { "en" }
             }
             );
        getConnection().prepareStatement( "drop function getDatabaseLocale" ).executeUpdate();
	    
		cSt = prepareCall
            ( "call LuceneSupport.createIndex('lucenetest','titles','title', null )" );
	    assertUpdateCount(cSt, 0);
	    
	    String[][] expectedRows = new String[][]
            {
                { "1","0","0.8048013" },
	    		{ "3","2","0.643841" }
            };
	    JDBC.assertFullResultSet
            (
             s.executeQuery
             (
              "select * from table ( lucenetest.titles__title( 'grapes', 1000, null ) ) luceneResults"
              ),
             expectedRows
             );

	    expectedRows = new String[][]
            {
	    		{ "3","2","0.643841" }
            };
	    JDBC.assertFullResultSet
            (
             s.executeQuery
             (
              "select * from table ( lucenetest.titles__title( 'grapes', 1000, .75 ) ) luceneResults"
              ),
             expectedRows
             );

	    JDBC.assertEmpty
            (
             s.executeQuery
             (
              "select * from table ( lucenetest.titles__title( 'grapes',  1000, 0.5) ) luceneResults"
              )
             );

	    expectedRows = new String[][]
            {
                { "The Grapes Of Wrath", "John Steinbeck", "The Viking Press", "0"},
	    		{"Vines, Grapes, and Wines", "Jancis Robinson", "Alfred A. Knopf", "2"}
            };
	    JDBC.assertFullResultSet
            (
             s.executeQuery
             (
              "select title, author, publisher, documentID\n" +
              "from lucenetest.titles t, table ( lucenetest.titles__title( 'grapes', 1000, null ) ) l\n" +
              "where t.id = l.id\n" 
              ),
             expectedRows
             );
	   
		cSt = prepareCall
            ( "call LuceneSupport.dropIndex('lucenetest','titles','title')" );
	    assertUpdateCount(cSt, 0);

	}
	
	public void testUpdateIndex() throws Exception {
		CallableStatement cSt;
		Statement s = createStatement();
		
		cSt = prepareCall
            ( "call LuceneSupport.createIndex('lucenetest','titles','title', null)" );
	    assertUpdateCount(cSt, 0);

	    JDBC.assertEmpty
            (
             s.executeQuery
             (
              "select *\n" +
              "from table ( lucenetest.titles__title( 'mice', 1000, null ) ) luceneResults\n"
              )
             );
	    
	    cSt = prepareCall( "update TITLES SET TITLE='Of Mice and Men' WHERE ID=1" );
	    assertUpdateCount(cSt, 1);
	    
	    JDBC.assertEmpty
            (
             s.executeQuery
             (
              "select *\n" +
              "from table ( lucenetest.titles__title( 'mice', 1000, null ) ) luceneResults\n"
              )
             );
	    
		cSt = prepareCall
            ( "call LuceneSupport.updateIndex('lucenetest','titles','title', null)" );
	    assertUpdateCount(cSt, 0);

	    String[][] expectedRows = new String[][]
            {
                { "1","0","1.058217" }
            };
	    JDBC.assertFullResultSet
            (
             s.executeQuery
             (
              "select *\n" +
              "from table ( lucenetest.titles__title( 'mice', 1000, null ) ) luceneResults\n"
              ),
             expectedRows
             );

		cSt = prepareCall
            ( "call LuceneSupport.dropIndex('lucenetest','titles','title')" );
	    assertUpdateCount(cSt, 0);

	}
	
	public void testListIndex() throws Exception {
		CallableStatement cSt;
		Statement s = createStatement();

	    cSt = prepareCall
            ( "call LuceneSupport.createIndex('lucenetest','titles','title', null)" );
	    assertUpdateCount(cSt, 0);
	    
		cSt = prepareCall
            ( "call LuceneSupport.createIndex('lucenetest','titles','author', null)" );
	    assertUpdateCount(cSt, 0);
	    
	    // leave out lastmodified as the date will change
	    String[][] expectedRows = new String[][]
            {
                { "LUCENETEST", "TITLES", "AUTHOR" },
	    		{ "LUCENETEST", "TITLES", "TITLE" }
            };
	    JDBC.assertFullResultSet
            (
             s.executeQuery
             (
              "select schemaname, tablename, columnname from table ( LuceneSupport.listIndexes() ) listindexes order by schemaname, tablename, columnname"
              ),
             expectedRows
             );

		cSt = prepareCall
            ( "call LuceneSupport.dropIndex('lucenetest','titles','title')" );
	    assertUpdateCount(cSt, 0);

	    expectedRows = new String[][]
            {
                { "LUCENETEST", "TITLES", "AUTHOR" },
            };
	    JDBC.assertFullResultSet
            (
             s.executeQuery
             (
              "select schemaname, tablename, columnname from table ( LuceneSupport.listIndexes() ) listindexes order by schemaname, tablename, columnname"
              ),
             expectedRows
             );

		cSt = prepareCall
            ( "call LuceneSupport.dropIndex('lucenetest','titles','author')" );
	    assertUpdateCount(cSt, 0);
	    
	    JDBC.assertEmpty
            (
             s.executeQuery
             (
              "select schemaname, tablename, columnname from table ( LuceneSupport.listIndexes() ) listindexes"
              )
             );

	}
	
	public void testDropIndexBadCharacters() throws Exception {
		CallableStatement st;
	    
		assertCallError( ILLEGAL_CHARACTER, "call LuceneSupport.dropIndex('../','','')");
		assertCallError( ILLEGAL_CHARACTER, "call LuceneSupport.dropIndex('','../','')");
		assertCallError( ILLEGAL_CHARACTER, "call LuceneSupport.dropIndex('','','../')");
		
	}

    //////////////////////////////////////////////////////////////
    //
    //  BEGIN TEST FOR MULTIPLE FIELDS
    //
    //////////////////////////////////////////////////////////////
	
    public void testMultipleFields() throws SQLException
    {
        println( "Running multi-field test." );
        
        Statement s = createStatement();

        s.execute("create table multifield(id int primary key, c clob)");
        s.execute("insert into multifield values "
                + "(1, '<document><secret/>No one must know!</document>'), "
                + "(2, '<document>No secret here!</document>')");

        s.execute("call lucenesupport.createindex('lucenetest', 'multifield', "
                  + "'c', '" + getClass().getName() + ".makeMultiFieldIndexDescriptor')");

        PreparedStatement ps = prepareStatement(
                "select id from table(multifield__c(?, 100, null)) t");

        String[][] bothRows = { {"1"}, {"2"} };

        ps.setString(1, "text:secret");
        JDBC.assertSingleValueResultSet(ps.executeQuery(), "2");
        ps.setString(1, "tags:secret");
        JDBC.assertSingleValueResultSet(ps.executeQuery(), "1");
        ps.setString(1, "secret");
        JDBC.assertUnorderedResultSet(ps.executeQuery(), bothRows);
    }

    /** Create the custom index descriptor for the multi-field test */
    public  static  LuceneIndexDescriptor   makeMultiFieldIndexDescriptor()
    {
        return new MultiFieldIndexDescriptor();
    }
    /**
     * Create a simple query parser for multiple fields, which uses
     * StandardAnalyzer instead of the XMLAnalyzer that was used to create
     * the index.
     */
    public static QueryParser createXMLQueryParser(
            Version version, String[] fields, Analyzer analyzer) {
        return new MultiFieldQueryParser(
                version, fields, new StandardAnalyzer(version));
    }

    /**
     * Custom analyzer for XML files. It indexes the tags and the text
     * separately.
     */
    public static class XMLAnalyzer extends Analyzer {

        public XMLAnalyzer() {
            // We want different tokenizers for different fields. Set reuse
            // policy to per-field to achieve that.
            super(PER_FIELD_REUSE_STRATEGY);
        }

        @Override
        protected TokenStreamComponents createComponents(
                String fieldName, Reader reader) {

            if (fieldName.equals("text")) {
                return new TokenStreamComponents(new XMLTextTokenizer(reader));
            }

            if (fieldName.equals("tags")) {
                return new TokenStreamComponents(new XMLTagsTokenizer(reader));
            }

            fail("unknown field name: " + fieldName);
            return null;
        }
    }

    /** Common logic for XMLTextTokenizer and XMLTagsTokenizer. */
    private abstract static class AbstractTokenizer extends Tokenizer {
        Iterator<String> tokens;
        final CharTermAttribute charTermAttr =
                addAttribute(CharTermAttribute.class);
        final PositionIncrementAttribute posIncrAttr
                = addAttribute(PositionIncrementAttribute.class);

        AbstractTokenizer(Reader reader) {
            super(reader);
        }

        @Override
        public boolean incrementToken() throws IOException {
            if (tokens == null) {
                tokens = getTokens().iterator();
            }

            if (tokens.hasNext()) {
                charTermAttr.setEmpty();
                charTermAttr.append(tokens.next());
                posIncrAttr.setPositionIncrement(1);
                return true;
            } else {
                return false;
            }
        }

        @Override
        public void reset() throws IOException {
            tokens = null;
            super.reset();
        }

        abstract Iterable<String> getTokens();
    }

    private static class XMLTextTokenizer extends AbstractTokenizer {

        XMLTextTokenizer(Reader in) {
            super(in);
        }

        @Override
        Iterable<String> getTokens() {
            StringBuilder text = new StringBuilder();
            getAllText(parseXMLDocument(input), text);
            return Arrays.asList(text.toString().split("[ \r\n\t]"));
        }

    }

    private static class XMLTagsTokenizer extends AbstractTokenizer {

        XMLTagsTokenizer(Reader in) {
            super(in);
        }

        @Override
        Iterable<String> getTokens() {
            return getAllXMLTags(parseXMLDocument(input));
        }

    }

    /** Parse an XML document from a Reader. */
    private static Document parseXMLDocument(Reader reader) {
        Document doc = null;

        try {
            doc = DocumentBuilderFactory.newInstance()
                    .newDocumentBuilder().parse(new InputSource(reader));
            reader.close();
        } catch (Exception e) {
            fail("Failed to parse XML document", e);
        }

        return doc;
    }

    /** Get a list of all the XML tags in a node. */
    private static List<String> getAllXMLTags(Node node) {
        ArrayList<String> list = new ArrayList<String>();
        NodeList nl = node.getChildNodes();
        for (int i = 0; i < nl.getLength(); i++) {
            Node n = nl.item(i);
            if (n.getNodeType() == Node.ELEMENT_NODE) {
                list.add(n.getNodeName());
                list.addAll(getAllXMLTags(n));
            }
        }
        return list;
    }

    /** Strip out all tags from an XML node, so that only the text is left. */
    private static void getAllText(Node node, StringBuilder sb) {
        if (node.getNodeType() == Node.TEXT_NODE) {
            sb.append(node.getNodeValue());
        } else {
            NodeList nl = node.getChildNodes();
            for (int i = 0; i < nl.getLength(); i++) {
                getAllText(nl.item(i), sb);
            }
        }
    }

    public static class MultiFieldIndexDescriptor implements LuceneIndexDescriptor
    {
        public  String[]    getFieldNames() { return new String[] { "tags", "text" }; }
    
        public Analyzer getAnalyzer()   { return new XMLAnalyzer(); }

        public  QueryParser getQueryParser()
        {
            Version version = LuceneUtils.currentVersion();
            
            return new MultiFieldQueryParser
                (
                 version,
                 getFieldNames(),
                 new StandardAnalyzer( version )
                 );
        }

    }

    //////////////////////////////////////////////////////////////
    //
    //  END TEST FOR MULTIPLE FIELDS
    //
    //////////////////////////////////////////////////////////////
	
	protected void setUp() throws SQLException {
		CallableStatement cSt;			    
		Statement st = createStatement();
		
		try {
			st.executeUpdate("create schema lucenetest");
		} catch (Exception e) {
		}
		st.executeUpdate("set schema lucenetest");	
		st.executeUpdate("create table titles (ID int generated always as identity primary key, ISBN varchar(16), PRINTISBN varchar(16), title varchar(1024), subtitle varchar(1024), author varchar(1024), series varchar(1024), publisher varchar(1024), collections varchar(128), collections2 varchar(128))");
		st.executeUpdate("insert into titles (ISBN, PRINTISBN, TITLE, SUBTITLE, AUTHOR, SERIES, PUBLISHER, COLLECTIONS, COLLECTIONS2) values ('9765087650324','9765087650324','The Grapes Of Wrath','The Great Depression in Oklahoma','John Steinbeck','Noble Winners','The Viking Press','National Book Award','Pulitzer Prize')");
		st.executeUpdate("insert into titles (ISBN, PRINTISBN, TITLE, SUBTITLE, AUTHOR, SERIES, PUBLISHER, COLLECTIONS, COLLECTIONS2) values ('6754278542987','6754278542987','Identical: Portraits of Twins','Best Photo Book 2012 by American Photo Magazine','Martin Schoeller','Portraits','teNeues','Photography','')");
		st.executeUpdate("insert into titles (ISBN, PRINTISBN, TITLE, SUBTITLE, AUTHOR, SERIES, PUBLISHER, COLLECTIONS, COLLECTIONS2) values ('2747583475882','2747583475882','Vines, Grapes, and Wines','The wine drinker''s guide to grape varieties','Jancis Robinson','Reference','Alfred A. Knopf','Wine','')");	
		st.executeUpdate("insert into titles (ISBN, PRINTISBN, TITLE, SUBTITLE, AUTHOR, SERIES, PUBLISHER, COLLECTIONS, COLLECTIONS2) values ('4356123483483','4356123483483','A Tale of Two Cities','A fictional account of events leading up to the French revolution','Charles Dickens','Classics','Chapman & Hall','Fiction','Social Criticism')");	

		cSt = prepareCall
            ( "call syscs_util.syscs_register_tool('luceneSupport',true)" );
	    assertUpdateCount(cSt, 0);

	}
	
	protected void tearDown() throws Exception {
		CallableStatement cSt;
		Statement st = createStatement();
		
		st.executeUpdate("drop table titles");
		
		cSt = prepareCall
            ( "call syscs_util.syscs_register_tool('luceneSupport',false)" );
	    assertUpdateCount(cSt, 0);
	    super.tearDown();
	}
}