copy FSdirectory to InfinispanDirectory

FSdirectory vs InfinispanDirectory vs RAMDirectory

The RAMDirectory provided by Lucene is not really meant for high performance. The filesystem based implementations using NIO and memory map are likely more efficient, unless you’re dealing with indexes meant for proof of concepts and unit tests.

The Infinispan Directory is – like the filesystem one – tuned for good performance; it is in fact a bit faster than the filesystem ones to perform write operations (for obvious reasons); the speed race on read performance is a subtle battle, strongly depending on your actual use case.

The main reason to use the Infinispan Directory is not raw performance but:

  1. it’s capability to replicate and distribute the index across multiple nodes: using shared filesystems for FSDirectory is usually problematic and not fast at all.
  2. It’s able to work as a caching write-through store to slower persistence services. FS is one, but it might be a relational database, Cassandra, cloud storage services such as S3, …[write your plugin]
  3. It’s (optionally) Transactional. You could have it participate in XA transactions if needed.

Migration steps:

  • get the infinispan lucene configed using lucene-infinispan-config.xml. can be found online.
    1. Need to pay attention to the <transport> tag, which I uses jGroup.
    2. for DB, if in web env, use org.infinispan.loaders.jdbc.connectionfactory.ManagedConnectionFactory for connectionFactoryClass in property then add the jndi stuff. if in standalone env, use org.infinispan.loaders.jdbc.connectionfactory.PooledConnectionFactory and then add jdbc stuff.
  • creat InfinispanDirectory using the lucene-infinispan-config.xml. Here in the constructor, specify the chunk size if the index file is huge. Otherwise it would use the default buffer size 16K, which may kill the DB if we have 2GB or larger index file. I use 30MB.
  • get the files from FSDirectory ready.
  • use API(srcdir.copy(destdir, src, dest)) to do migration.
  • Last step is test~~
Directory dir = null;
            Cache&lt;Object, Object&gt; cache = new DefaultCacheManager("lucene-infinispan-config.xml").getCache("INDX_MD");
            Directory infinispanDirectory = new InfinispanDirectory(cache, "tempTestIndex", 31457280, new NoopSegmentReadLocker());
            if (args == null || args.length != 1)
            {
                throw new IllegalArgumentException("please specify index file path as the 1st argument.");
            }
            else
            {
                dir = FSDirectory.open(new File(args[0]));
            }
            for (String file : dir.listAll())
            {
                dir.copy(infinispanDirectory, file, file);
                System.out.println("--&gt; " + file + "  copying ... ");
            }

for Test:

import java.io.File;
import java.io.IOException;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.infinispan.Cache;
import org.infinispan.lucene.InfinispanDirectory;
import org.infinispan.manager.DefaultCacheManager;
import org.junit.Assert;
import org.junit.Test;

/**
 * Created with IntelliJ IDEA. User: LiHa Date: 11/5/12
 */
public class InfinispanSearchTest
{
    private static File fsDirectoryBase = new File("C:\\Users\\LiHa\\Desktop\\indexFiles\\");
    private static File fsDirectoryTest1 = new File(fsDirectoryBase, "test1");

    /**
     * write doc into FSDirectory in the file system
     *
     * @throws IOException
     */
    @Test

    public void beforeMigrationTest() throws IOException
    {
        Directory dir = FSDirectory.open(fsDirectoryBase);
        StandardAnalyzer sa = new StandardAnalyzer(Version.LUCENE_35);
        IndexWriter iw = new IndexWriter(dir, sa, IndexWriter.MaxFieldLength.UNLIMITED);
        for (int i = 0; i &lt; 10; i++)
        {
            Document d = new Document();
            d.add(new Field("ID", Integer.toString(i), Field.Store.YES, Field.Index.NO));
            d.add(new Field("X", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
            d.add(new Field("Y", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
            d.add(new Field("VALID", Boolean.TRUE.toString(), Field.Store.NO, Field.Index.ANALYZED));

            iw.addDocument(d);
        }
        iw.commit();
        iw.waitForMerges();
        iw.close(true);

        IndexReader ir = IndexReader.open(dir, true);
        IndexSearcher is = new IndexSearcher(ir);
        long t = System.currentTimeMillis();
        TopDocs td = is.search(new TermQuery(new Term("VALID", "true")), 1);
        System.out.println(System.currentTimeMillis() - t);

        Assert.assertEquals(td.totalHits, 10);
        is.close();
        ir.close();

    }

    /**
     * after migration. test index existence.
     *
     * @throws IOException
     */
    @Test
    public void afterMigrationTest() throws IOException, ParseException
    {

        Cache cache = new DefaultCacheManager("lucene-infinispan-config.xml").getCache("INDX_MD");

        Directory dir = new InfinispanDirectory(cache, "tempTestIndex");

        IndexReader ir = IndexReader.open(dir, true);

        IndexSearcher is = new IndexSearcher(ir);
        long t = System.currentTimeMillis();
                TopDocs td = is.search(new TermQuery(new Term("VALID", "true")), 1);
                System.out.println("5 : "+ (t-System.currentTimeMillis()));
                Assert.assertEquals(td.totalHits, 10);

        //        TopDocs td = is.search(new TermQuery(new Term("formType", "RPR")), 1);
        //        System.out.println("total hits : "+td.totalHits);

        for (int i = 0; i &lt; 10; i++)
        {
        StandardAnalyzer sa = new StandardAnalyzer(Version.LUCENE_35);
        QueryParser parser = new QueryParser(Version.LUCENE_35, "key", sa);
        Query query = parser.parse("formType:RPR +key:2*");

            t = System.currentTimeMillis();
            TopDocs topDocs = is.search(query, 1);
            System.out.println("search "+i+" time: " + (t - System.currentTimeMillis()));

            System.out.println("search "+i+" total hits : " + topDocs.totalHits);
        }
        is.close();
        ir.close();

    }

    /**
     * add doc to a infinispan directory.
     *
     * @throws IOException
     */
    @Test
    public void addDocToInfinispanTest() throws IOException
    {
        Cache cache = new DefaultCacheManager("lucene-infinispan-config.xml").getCache("INDX_MD");
        Directory dir = new InfinispanDirectory(cache, "tempTestIndex");

        StandardAnalyzer sa = new StandardAnalyzer(Version.LUCENE_35);
        IndexWriter iw = new IndexWriter(dir, sa, IndexWriter.MaxFieldLength.UNLIMITED);
        for (int i = 0; i &lt; 10; i++)
        {
            Document d = new Document();
            d.add(new Field("ID", Integer.toString(i), Field.Store.YES, Field.Index.NO));
            d.add(new Field("X", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
            d.add(new Field("Y", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
            d.add(new Field("VALID", Boolean.TRUE.toString(), Field.Store.NO, Field.Index.ANALYZED));

            iw.addDocument(d);
        }
        iw.commit();
        iw.waitForMerges();
        iw.close(true);

        IndexReader ir = IndexReader.open(dir, true);
        IndexSearcher is = new IndexSearcher(ir);
        TopDocs td = is.search(new TermQuery(new Term("VALID", "true")), 1);
        Assert.assertEquals(td.totalHits, 10);
        is.close();
        ir.close();

    }

    /**
     * add doc into a test directory.
     *
     * @throws IOException
     */
    @Test
    public void addDocTest() throws IOException
    {
        Directory dir = FSDirectory.open(fsDirectoryTest1);
        StandardAnalyzer sa = new StandardAnalyzer(Version.LUCENE_35);
        IndexWriter iw = new IndexWriter(dir, sa, IndexWriter.MaxFieldLength.UNLIMITED);
        for (int i = 0; i &lt; 10; i++)
        {
            Document d = new Document();
            d.add(new Field("ID", Integer.toString(i), Field.Store.YES, Field.Index.NO));
            d.add(new Field("X", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
            d.add(new Field("Y", Integer.toString(i), Field.Store.NO, Field.Index.ANALYZED));
            d.add(new Field("VALID", Boolean.TRUE.toString(), Field.Store.NO, Field.Index.ANALYZED));

            iw.addDocument(d);
        }
        iw.commit();
        iw.waitForMerges();
        iw.close(true);

        IndexReader ir = IndexReader.open(dir, true);
        IndexSearcher is = new IndexSearcher(ir);
        TopDocs td = is.search(new TermQuery(new Term("VALID", "true")), 1);
        Assert.assertEquals(td.totalHits, 10);
        is.close();
        ir.close();

    }

}
Advertisements

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s