Sunday, March 25, 2012

Lucene: Sort the Search Result

Introduction

This post is about Sort the Search Result in Lucene 3.5.0.

The Program

SortTest.java

package test.lucene.testtwo;

import java.io.IOException;
import java.util.Date;
import java.util.Locale;
import java.util.TimeZone;

import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SortField;

import test.lucene.utils.DateUtils;
import test.lucene.wrapper.WrappedDocument;
import test.lucene.wrapper.WrappedIndexWriter;
import test.lucene.wrapper.WrappedQuery;
import test.lucene.wrapper.WrappedSearcher;
import test.lucene.wrapper.WrappedSort;

public class SortTest {
    public static void main(String[] args) {
        try{
            // Create WrappedIndexWriter
            WrappedIndexWriter wiw = new WrappedIndexWriter();
            long oneDay = 1000L*60*60*24;
            long currentTime = System.currentTimeMillis();
    
            // create dates
            Date dateOne = new Date(currentTime - (oneDay*30)); // 30 days ago
            Date dateTwo = new Date(currentTime - (oneDay*20)); // 20 days ago
            Date dateThree = new Date(currentTime - (oneDay*10)); // 10 days ago
            Date dateFour = new Date(currentTime); // just now
    
            // convert dates to strings
            String sdOne = DateUtils.getStringMillis(dateOne);
            String sdTwo = DateUtils.getStringMillis(dateTwo);
            String sdThree = DateUtils.getStringMillis(dateThree);
            String sdFour = DateUtils.getStringMillis(dateFour);

            Locale.setDefault(Locale.US);
            TimeZone.setDefault(TimeZone.getTimeZone("EST"));

            // Create IndexWriter
            wiw.createIndexWriter();
            // add Documents and close
            // based on the Javadoc, the sort field (time)
            // should be NOT_ANALYZED
            wiw.addDocument(new WrappedDocument("title", "Test Title One")
                                .addField("content", "Test Content One")
                                .addField("time", sdOne, true, false)
                                .getDocument())
                .addDocument(new WrappedDocument("title", "Test Title Two")
                                .addField("content", "Test Content Two")
                                .addField("time", sdTwo, true, false)
                                .getDocument())
                .addDocument(new WrappedDocument("title", "Test Title Three")
                                .addField("content", "Test Content Three")
                                .addField("time", sdThree, true, false)
                                .getDocument())
                .addDocument(new WrappedDocument("title", "Test Title Four")
                                .addField("content", "Test Content Four")
                                .addField("time", sdFour, true, false)
                                .getDocument())
                .close();
        
            // create WrappedSearcher, initiate searcher
            WrappedSearcher ws = new WrappedSearcher().initSearcher(wiw);
    
            // do search by range query of date
            // not include upper bound, include lower bound,
            // result: dateOne and dateTwo
            // Sort by time descending
            searchAndDisplay(wiw, ws,
                new WrappedQuery()
                    .createTermRangeQuery("time", sdOne, sdThree.toString(), true, false),
                "time", true);

            // do search by range query of date
            // include upper bound and lower bound
            // results: from dateTwo to dateFour
            // Sort by time ascending
            searchAndDisplay(wiw, ws,
                new WrappedQuery()
                    .createTermRangeQuery("time", sdTwo.toString(), sdFour.toString(), true, true),
                "time", false);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
    public static void searchAndDisplay (WrappedIndexWriter wiw, WrappedSearcher ws, WrappedQuery wq,
                                            String sortField, boolean reverse)
        throws IOException, ParseException, java.text.ParseException {
        ScoreDoc[] results = ws.doSearch( wq, 10,
                        new WrappedSort(new SortField(sortField, SortField.STRING, reverse)));
    
        // display results
        System.out.println(results.length + " results.");
        for(int i=0; i < results.length; i++) {
            int docId = results[i].doc;
            Document doc = ws.getIndexSearcher().doc(docId);
            System.out.println((i + 1) + "\ttitle: " + doc.get("title")
                                + "\n\tcontent: " + doc.get("content")
                                + "\n\tcontent: " + DateUtils.stringToDate(doc.get("time"))
                                + "\n");
        }
    }
}

This is the main program,
very similar to the RangeQueryTest.java in this post
http://ben-bai.blogspot.com/2012/03/lucene-searching-range-of-dates-by.html

only change the time field to NOT_ANALYZED and add the Sort condition.

WrappedSort.java

package test.lucene.wrapper;

import java.util.*;

import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;

/**
 * This class wrap the Sort class of Lucene so that we can
 * add SortField(s) and keep them append new SortField easily
 * until we really need the Sort
 *
 */
public class WrappedSort {
    private Sort _sort; // Sort
    private List<SortField> _sortFields = new LinkedList<SortField>(); // SortField(s)
    /**
     * Constructor that only create the instance of Sort
     */
    public WrappedSort () {
        createSort();
    }
    /**
     * Constructor that create the instance of Sort and add one SortField
     * @param sortField The SortField to add
     */
    public WrappedSort (SortField sortField) {
        createSort().addSortField(sortField);
    }
    /**
     * Constructor that create the instance of Sort and add a SortField List
     * @param sortFields The List of SortField to add
     */
    public WrappedSort (List<SortField> sortFields) {
        createSort().addSortFields(sortFields);
    }
    /**
     * Create/renew the instance of Sort
     * @return Self instance so we can do something else directly.
     */
    public WrappedSort createSort () {
        _sort = new Sort();
        return this;
    }
    /**
     * Renew the instance of the List of SortField
     * @return Self instance so we can do something else directly.
     */
    public WrappedSort createSortField () {
        _sortFields = new LinkedList<SortField>();
        return this;
    }
    /**
     * Add one SortField
     * @param sortField The SortField to add
     * @return Self instance so we can do something else directly.
     */
    public WrappedSort addSortField (SortField sortField) {
        _sortFields.add(sortField);
        return this;
    }
    /**
     * Add a List of SortField
     * @param sortFields The List of SortField to add
     * @return Self instance so we can do something else directly.
     */
    public WrappedSort addSortFields (List<SortField> sortFields) {
        _sortFields.addAll(sortFields);
        return this;
    }
    /**
     * Get the List of SortField
     * @return The List of SortField
     */
    public List<SortField> getSortFields () {
        return _sortFields;
    }
    /**
     * Set SortField(s) to Sort and return the instance of Sort.
     * @return The instance of Sort
     */
    public Sort getNativeSort () {
        if (_sortFields.size() == 1)
            _sort.setSort(_sortFields.get(0));
        else
            _sort.setSort((SortField[])_sortFields.toArray());
        return _sort;
    }
}

This class wrap the Lucene Sort class that
store the Sort instance and the SortFields in it.

The fragment added to WrappedSearcher.java

    /**
     * Do search with the given Sort and return the results
     * @param wq WrappedQuery
     * @param resultsPerPage
     * @param ws WrappedSort
     * @return ScoreDoc[] The results
     * @throws IOException
     * @throws ParseException
     */
    public ScoreDoc[] doSearch (WrappedQuery wq, int resultsPerPage, WrappedSort ws)
            throws IOException, ParseException{
        return _searcher.search(wq.getQuery(), resultsPerPage, ws.getNativeSort()).scoreDocs;
    }

This fragment do search with Sort.

The Result


The first search sort dateOne and dateTwo in descending order,
the second search sort dateTwo to dateFour in ascending order.

Download:

The full project is at github:
https://github.com/benbai123/JSP_Servlet_Practice/tree/master/Practice/JAVA/Search/LuceneTest

Reference:

http://lucene.apache.org/core/old_versioned_docs/versions/3_5_0/api/all/org/apache/lucene/search/Sort.html

http://lucene.apache.org/core/old_versioned_docs/versions/3_5_0/api/all/org/apache/lucene/search/SortField.html

Saturday, March 24, 2012

Lucene: Searching a range of dates by TermRangeQuery

Introduction

This post is about Range Search in Lucene

The Program

RangeQueryTest.java


package test.lucene.testtwo;

import java.io.IOException;
import java.util.*;

import org.apache.lucene.document.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.*;

import test.lucene.utils.*;
import test.lucene.wrapper.*;

public class RangeQueryTest {
    public static void main(String[] args) {
        try{
            // Create WrappedIndexWriter
            WrappedIndexWriter wiw = new WrappedIndexWriter();
            long oneDay = 1000L*60*60*24;
            long currentTime = System.currentTimeMillis();
    
            // create dates
            Date dateOne = new Date(currentTime - (oneDay*30)); // 30 days ago
            Date dateTwo = new Date(currentTime - (oneDay*20)); // 20 days ago
            Date dateThree = new Date(currentTime - (oneDay*10)); // 10 days ago
            Date dateFour = new Date(currentTime); // just now
    
            // convert dates to strings
            String sdOne = DateUtils.getStringMillis(dateOne);
            String sdTwo = DateUtils.getStringMillis(dateTwo);
            String sdThree = DateUtils.getStringMillis(dateThree);
            String sdFour = DateUtils.getStringMillis(dateFour);

            Locale.setDefault(Locale.US);
            TimeZone.setDefault(TimeZone.getTimeZone("EST"));

            // Create IndexWriter
            wiw.createIndexWriter();
            // add Documents and close
            wiw.addDocument(new WrappedDocument("title", "Test Title One")
                                .addField("content", "Test Content One")
                                .addField("time", sdOne)
                                .getDocument())
                .addDocument(new WrappedDocument("title", "Test Title Two")
                                .addField("content", "Test Content Two")
                                .addField("time", sdTwo)
                                .getDocument())
                .addDocument(new WrappedDocument("title", "Test Title Three")
                                .addField("content", "Test Content Three")
                                .addField("time", sdThree)
                                .getDocument())
                .addDocument(new WrappedDocument("title", "Test Title Four")
                                .addField("content", "Test Content Four")
                                .addField("time", sdFour)
                                .getDocument())
                .close();
        
            // create WrappedSearcher, initiate searcher
            WrappedSearcher ws = new WrappedSearcher().initSearcher(wiw);
    
            // do search by range query of date
            // not include upper bound, not include lower bound,
            // result: only dateTwo
            searchAndDisplay(wiw, ws, new WrappedQuery()
                    .createTermRangeQuery("time", sdOne,
                            sdThree.toString(), false, false));

            // do search by range query of date
            // include upper bound and lower bound
            // results: from dateTwo to dateFour
            searchAndDisplay(wiw, ws, new WrappedQuery()
                    .createTermRangeQuery("time", sdTwo.toString(),
                        sdFour.toString(), true, true));
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
    public static void searchAndDisplay (WrappedIndexWriter wiw, WrappedSearcher ws, WrappedQuery wq)
        throws IOException, ParseException, java.text.ParseException {
        ScoreDoc[] results =
            ws.doSearch( wq, 10);
    
        // display results
        System.out.println(results.length + " results.");
        for(int i=0; i < results.length; i++) {
            int docId = results[i].doc;
            Document doc = ws.getIndexSearcher().doc(docId);
            System.out.println((i + 1) + "\ttitle: " + doc.get("title")
                                + "\n\tcontent: " + doc.get("content")
                                + "\n\tcontent: " + DateUtils.stringToDate(doc.get("time"))
                                + "\n");
        }
    }
}

This is the main program, create four documents with four dates,
do range query

DateUtils.java

package test.lucene.utils;

import java.text.ParseException;
import java.util.*;

import org.apache.lucene.document.*;

public class DateUtils {
    /**
     * Convert Date to String in MILLISECOND Resolution
     * @param date The date to convert
     * @return String The converted String
     */
    public static String getStringMillis (Date date) {
        return DateTools.dateToString(date,
                            DateTools.Resolution.MILLISECOND);
    }
    /**
     * Convert String to Date
     * @param str The String to convert
     * @return Date The converted Date
     * @throws ParseException
     */
    public static Date stringToDate (String str)
        throws ParseException {
        return DateTools.stringToDate(str);
    }
}

The Utils that converting dates to strings and vice-versa.

The fragment added to WrappedDocument.java

    public WrappedDocument (String name, String value)
            throws IllegalStateException, IOException {
        createDoc(name, value, true, true);
    }
    /**
     * Add field with initial stored and analyzed
     * @param name Field name
     * @param value Field value
     * @return DocumentWrapper Self instance
     * @throws IOException
     */
    public WrappedDocument addField(String name, String value) throws IOException {
        return addField(name, value, true, true);
    }

for easily add stored and analyzed field without
input lots of true, true, true, ...

The Result



Download

The full project is at github:
https://github.com/benbai123/JSP_Servlet_Practice/tree/master/Practice/JAVA/Search/LuceneTest


Reference
http://www.java-community.de/archives/119-Date-ranges-in-Lucene.html

Sunday, March 18, 2012

Wrap Lucene classes to do search easily.

This post is about build some wrapper to wrap the classes in Lucene
to do search easily.

The sample is as the first one at
http://ben-bai.blogspot.com/2012/03/basic-command-line-lucene-test.html

but use some wrapper in stead of call Lucene Classes directly.

The Test and Wrappers

LuceneTestTwo.java

package test.lucene.testtwo;

import java.io.IOException;

import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.ScoreDoc;

import test.lucene.wrapper.*;

public class LuceneTestTwo {
    public static void main(String[] args)
    throws IOException, ParseException {
        // Create IndexWriter
        WrappedIndexWriter wiw = new WrappedIndexWriter();
        wiw.createIndexWriter();

        // add Documents and close
        wiw.addDocument(new WrappedDocument("title", "Test Title One", true, true)
                            .addField("content", "Test Content One", true, true)
                            .getDocument())
            .addDocument(new WrappedDocument("title", "Test Title Two", true, true)
                            .addField("content", "Test Content Two", true, false)
                            .getDocument())
            .addDocument(new WrappedDocument("title", "Test Title Three", true, true)
                            .addField("content", "Test Content Three", true, true)
                            .getDocument())
            .addDocument(new WrappedDocument("title", "Test Title Four", true, true)
                            .addField("content", "Test Content Four", false, true)
                            .getDocument())
            .close();

        // create WrappedSearcher, initiate searcher and do search
        WrappedSearcher ws = new WrappedSearcher();
        ScoreDoc[] results =
            ws.initSearcher(wiw)
                .doSearch( new WrappedQuery()
                                .createQuery(wiw, "content", "Test Content Three"),
                            10);

        // display results
        System.out.println(results.length + " results.");
        for(int i=0; i < results.length; i++) {
            int docId = results[i].doc;
            Document doc = ws.getIndexSearcher().doc(docId);
            System.out.println((i + 1) + "\ttitle: " + doc.get("title")
                                + "\n\tcontent: " + doc.get("content"));
        }
    }
}

WrappedIndexWriter.java

package test.lucene.wrapper;

import java.io.IOException;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.store.*;
import org.apache.lucene.util.Version;

/**
 * Wrap the index writer with Analyzer/Directory,
 * to make sure we can get the correct one.
 *
 */
public class WrappedIndexWriter {
    // The analyzer for tokenizing text, indexing and searching
    private Analyzer _analyzer;
    // create the index
    private Directory _dir;
    // create the index writer config
    private IndexWriterConfig _config;
    // create index writer by index and config
    private IndexWriter _iw;

    /**
     * Create the IndexWriter
     * @throws CorruptIndexException
     * @throws LockObtainFailedException
     * @throws IOException
     */
    public void createIndexWriter ()
        throws CorruptIndexException, LockObtainFailedException, IOException {
        // The analyzer for tokenizing text, indexing and searching
        _analyzer = new StandardAnalyzer(Version.LUCENE_35);
        // create the index
        _dir = new RAMDirectory();
        // create the index writer config
        _config = new IndexWriterConfig(Version.LUCENE_35, _analyzer);
        // create index writer by index and config
        _iw = new IndexWriter(_dir, _config);
    }
    /**
     * Get the Analyzer of this IndexWriter
     * @return Analyzer
     */
    public Analyzer getAnalyzer () {
        return _analyzer;
    }
    /**
     * Get the Directory of this IndexWriter
     * @return Directory
     */
    public Directory getDirectory () {
        return _dir;
    }
    /**
     * Get the wrapped IndexWriter
     * @return IndexWriter The wrapped IndexWriter
     */
    public IndexWriter getIndexWriter() {
        return _iw;
    }
    /**
     * Add document into IndexWriter
     * @param doc The document to add
     * @return WrappedIndexWriter Self instance
     * @throws CorruptIndexException
     * @throws IOException
     */
    public WrappedIndexWriter addDocument (Document doc)
        throws CorruptIndexException, IOException {
        _iw.addDocument(doc);
        return this;
    }
    /**
     * Update a Document in the IndexWriter
     * @param term The term that indicate the Document to be updated
     * @param doc The new Document
     * @return WrappedIndexWriter Self instance
     * @throws CorruptIndexException
     * @throws IOException
     */
    public WrappedIndexWriter updateDocument (Term term, Document doc)
        throws CorruptIndexException, IOException {
        _iw.updateDocument(term, doc);
        return this;
    }
    /**
     * Close the IndexWriter
     * @throws IOException 
     * @throws CorruptIndexException 
     * 
     */
    public void close() throws CorruptIndexException, IOException {
        _iw.close();
    }
}

The Analyzer and Directory is stored with IndexWriter in this wrapper,
so we can make sure that will get the correct instance from it.

WrappedDocument.java

package test.lucene.wrapper;

import java.io.IOException;

import org.apache.lucene.document.*;

/**
 * Wrap the Document so we can do get document in more convenient way:
 * 
 * DocumentWrapper.createDoc(...).addField(...)
 * .addField.......getDocument()
 *
 */
public class WrappedDocument {
    // The wrapped Document
    private Document _doc;

    public WrappedDocument () {
        
    }
    public WrappedDocument (String name, String value,
            boolean store, boolean analyzed)
            throws IllegalStateException, IOException {
        createDoc(name, value, store, analyzed);
    }
    /**
     * create document and add field.
     * @param name Field name
     * @param value Field value
     * @param store Store value or not
     * @param analyzed analyze value or not
     * @return DocumentWrapper Self instance
     * @throws IOException
     * @throws IllegalStateException
     */
    public WrappedDocument createDoc(String name, String value,
        boolean store, boolean analyzed)
            throws IOException, IllegalStateException {
        if (_doc != null)
            throw new IllegalStateException ("Document already created!");
        _doc = new Document();
        return addField(name, value, store, analyzed);
    }
    /**
     * Add field
     * @param name Field name
     * @param value Field value
     * @param store Store value or not
     * @param analyzed Analyze value or not
     * @return DocumentWrapper Self instance
     * @throws IOException
     */
    public WrappedDocument addField(String name, String value,
            boolean store, boolean analyzed) throws IOException {
        _doc.add(new Field(name,
                value,
                store? Field.Store.YES : Field.Store.NO,
                analyzed? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED));
        return this;
    }
    /**
     * Get the document
     * @return The document wrapped by this wrapper.
     */
    public Document getDocument () {
        return _doc;
    }
}

This is almost the same as the Document wrapper in first test.

WrappedQuery.java

package test.lucene.wrapper;

import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.*;
import org.apache.lucene.util.Version;
/**
 * Wrap Query so we can create various type of Query instance and
 * do something like createBooleanQuery.addQuery.addQuery...
 *
 */
public class WrappedQuery {
    // the wrapped Query
    private Query _query;
    // Create a Query by QueryParser
    public WrappedQuery createQuery (WrappedIndexWriter wiw, String field, String content)
        throws ParseException {
        _query = new QueryParser(Version.LUCENE_35, field, wiw.getAnalyzer())
            .parse(content);
        return this;
    }

    /**
     * Create a BooleanQuery
     * @return WrappedQuery Self instance
     */
    public WrappedQuery createBooleanQuery () {
        _query = new BooleanQuery();
        return this;
    }

    /**
     * Create a TermQuery
     * @param term The term to query
     * @return WrappedQuery Self instance
     */
    public WrappedQuery createTermQuery (Term term) {
        _query = new TermQuery(term);
        return this;
    }

    /**
     * Create a TermRangeQuery
     * @param field The field to query
     * @param lower The lower bound
     * @param upper The upper bound
     * @param includeLower Grater Equal (true) or Greater Then (false)
     * @param includeUpper Less Equal (true) or Less Then (false)
     * @return WrappedQuery Self instance
     */
    public WrappedQuery createTermRangeQuery (String field, String lower,
            String upper, boolean includeLower, boolean includeUpper) {
        _query = new TermRangeQuery(field, lower, upper,
                                    includeLower, includeUpper);
        return this;
    }

    /**
     * Create a WildcardQuery
     * @param term The term to query
     * @return WrappedQuery Self instance
     */
    public WrappedQuery createWildcardQuery (Term term) {
        _query = new WildcardQuery(term);
        return this;
    }

    /**
     * Add query into a BooleanQuery
     * @param term The term to add
     * @param occur MUST, MUST_NOT or SHOULD
     * @return WrappedQuery Self instance
     */
    public WrappedQuery addQuery (Term term, BooleanClause.Occur occur) {
        if (_query instanceof BooleanQuery)
            ((BooleanQuery) _query).add(new TermQuery(term), occur);
        else
            throw new UnsupportedOperationException("addQuery only works with BooleanQuery");
        return this;
    }

    /**
     * Get the wrapped Query
     * @return Query The wrapped query
     */
    public Query getQuery () {
        return _query;
    }
}

Wrap the Query so we can create various type of Query as need.

WrappedSearcher.java

package test.lucene.wrapper;

import java.io.IOException;

import org.apache.lucene.index.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.*;
/**
 * Wrap the Searcher to encapsulate the initiate and
 * search process
 *
 */
public class WrappedSearcher {
    /**
     * The only instance of searcher
     */
    private static IndexSearcher _searcher;

    /**
     * Initiate or renew the _searcher
     * @param wiw Thw wrapped index writer
     * @return WrappedSearcher Self instance
     * @throws CorruptIndexException
     * @throws IOException
     */
    public WrappedSearcher initSearcher (WrappedIndexWriter wiw)
        throws CorruptIndexException, IOException {
        IndexReader ir = IndexReader.open(wiw.getDirectory());
        _searcher = new IndexSearcher(ir);
        return this;
    }
    /**
     * Get the searcher instance
     * @return IndexSearcher The instance of searcher
     */
    public IndexSearcher getIndexSearcher () {
        return _searcher;
    }

    /**
     * Search and return the results
     * @param wq WrappedQuery
     * @param resultsPerPage
     * @return ScoreDoc[] The results
     * @throws IOException
     * @throws ParseException
     */
    public ScoreDoc[] doSearch (WrappedQuery wq, int resultsPerPage)
        throws IOException, ParseException{

        TopScoreDocCollector collector =
            TopScoreDocCollector.create(resultsPerPage, true);
        _searcher.search(wq.getQuery(), collector);
        ScoreDoc[] results = collector.topDocs().scoreDocs;
        
        return results;
    }
}

Wrap the Searcher so we do not need to worry about
the initiate and search process.

Download:

The test project is at github
https://github.com/benbai123/JSP_Servlet_Practice/tree/master/Practice/JAVA/Search/LuceneTest

Reference:
Official Javadoc
http://lucene.apache.org/core/old_versioned_docs/versions/3_5_0/api/all/
Using Apache Lucene to search text (IBM)
http://www.ibm.com/developerworks/java/library/os-apache-lucenesearch/index.html

Saturday, March 17, 2012

Basic Command Line Lucene Test

This post is about the basic command line Lucene sample,
include Create Document, Add Field, Analyze, Store, Indexing and Search.

Getting Started:

Download lucene here:
http://www.apache.org/dyn/closer.cgi/lucene/java/3.5.0

LuceneTest.java

package test.lucene;

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.*;
import org.apache.lucene.queryParser.*;
import org.apache.lucene.search.*;
import org.apache.lucene.store.*;
import org.apache.lucene.util.Version;

import java.io.IOException;

public class LuceneTest {
    // The analyzer for tokenizing text, indexing and searching
    public static StandardAnalyzer analyzer =
        new StandardAnalyzer(Version.LUCENE_35);

    public static void main(String[] args)
        throws IOException, ParseException {
        // create the index
        Directory index = new RAMDirectory();
        // create the index writer config
        IndexWriterConfig config =
            new IndexWriterConfig(Version.LUCENE_35, analyzer);
        // create index writer by index and config
        IndexWriter iw = new IndexWriter(index, config);

        // add documents into index writer
        addDocuments(iw);
        iw.close();

        // Search the 'Test Content One' in documents content
        doSearch(index, "content", "Test Content Three");

    }
    private static void addDocuments(IndexWriter iw)
        throws IOException, ParseException {

        iw.addDocument(new DocumentWrapper()
                .createDoc("title", "Test Title One", true, true)
                .addField("content", "Test Content One", true, true)
                .getDocument());
        // content of this one not analyzed,
        // can not be searched
        iw.addDocument(new DocumentWrapper()
                .createDoc("title", "Test Title Two", true, true)
                .addField("content", "Test Content Two", true, false)
                .getDocument());
        iw.addDocument(new DocumentWrapper()
                .createDoc("title", "Test Title Three", true, true)
                .addField("content", "Test Content Three", true, true)
                .getDocument());
        // content of this one not is stored,
        // can be searched but the content is null
        iw.addDocument(new DocumentWrapper()
                .createDoc("title", "Test Title Four", true, true)
                .addField("content", "Test Content Four", false, true)
                .getDocument());
    }
    private static void doSearch (Directory index, String field, String content)
        throws IOException, ParseException{
        // query string
        String querystr = content;

        // query, with default field
        Query q = new QueryParser(Version.LUCENE_35, field, analyzer)
                        .parse(querystr);

        // search
        int hitsPerPage = 10;
        IndexReader reader = IndexReader.open(index);
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector =
            TopScoreDocCollector.create(hitsPerPage, true);
        searcher.search(q, collector);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;
        
        // display results
        System.out.println(hits.length + " results.");
        for(int i=0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document doc = searcher.doc(docId);
            System.out.println((i + 1) + "\ttitle: " + doc.get("title")
                                + "\n\tcontent: " + doc.get("content"));
        }

        // close searcher 
        searcher.close();
    }
}

Create analyzer, index and index writer, add some documents into index writer,
then search the 'Test Content Three' in the content field.

DocumentWrapper.java

package test.lucene;

import java.io.IOException;

import org.apache.lucene.document.*;

/**
 * Wrap the Document so we can do get document in more convenient way:
 * 
 * DocumentWrapper.createDoc(...).addField(...)
 * .addField.......getDocument()
 *
 */
public class DocumentWrapper {
    // The wrapped Document
    private Document _doc;

    /**
     * create document and add field.
     * @param name Field name
     * @param value Field value
     * @param store Store value or not
     * @param analyzed analyze value or not
     * @return DocumentWrapper instance
     * @throws IOException
     * @throws IllegalStateException
     */
    public DocumentWrapper createDoc(String name, String value,
        boolean store, boolean analyzed)
            throws IOException, IllegalStateException {
        if (_doc != null)
            throw new IllegalStateException ("Document already created!");
        _doc = new Document();
        return addField(name, value, store, analyzed);
    }
    /**
     * Add field
     * @param name Field name
     * @param value Field value
     * @param store Store value or not
     * @param analyzed Analyze value or not
     * @return DocumentWrapper instance
     * @throws IOException
     */
    public DocumentWrapper addField(String name, String value,
            boolean store, boolean analyzed) throws IOException {
        _doc.add(new Field(name,
                value,
                store? Field.Store.YES : Field.Store.NO,
                analyzed? Field.Index.ANALYZED : Field.Index.NOT_ANALYZED));
        return this;
    }
    /**
     * Get the document
     * @return The document wrapped by this wrapper.
     */
    public Document getDocument () {
        return _doc;
    }
}

Wrap a document, has two function 'createDoc' and 'addField'
that can be called as a chain.

Execute result:



Test Content Three is the first result as expected,
the Test Content Two is not in the result list because it is not analyzed,
the Test Content For displayed as 'null' because it is not stored.

Download:

The full project is at github:
https://github.com/benbai123/JSP_Servlet_Practice/tree/master/Practice/JAVA/Search/LuceneTest

Reference:
http://www.lucenetutorial.com/lucene-in-5-minutes.html

Sunday, March 11, 2012

JSP/Servlet: Use Jawr to reduce page load times.

Introduction

From official site:
Jawr is a tunable packaging solution for Javascript and CSS which allows for rapid development of resources in separate module files. Developers can work with a large set of split javascript files in development mode, then Jawr bundles all together into one or several files in a configurable way.
By using a tag library, Jawr allows you to use the same, unchanged pages for development and production. Jawr also minifies and compresses the files, resulting in reduced page load times.

Getting started

Get required jars:
download Jawr 3.3.3:
http://java.net/projects/jawr/downloads/directory/release
get log4j:
http://mvnrepository.com/artifact/log4j/log4j/1.2.16

Then put them into WEB-INF/lib

Create Javascript/CSS files:
The project architecture and Javascript/CSS files are as below



Config Javascript/CSS Servlet in WEB-INF/web.xml:

<?xml version="1.0" encoding="UTF-8"?>
<web-app xmlns="http://java.sun.com/xml/ns/javaee"
    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://java.sun.com/xml/ns/javaee http://java.sun.com/xml/ns/javaee/web-app_3_0.xsd"
    version="3.0">
    <servlet>
        <servlet-name>JavascriptServlet</servlet-name>
        <servlet-class>net.jawr.web.servlet.JawrServlet</servlet-class>

        <!-- Location in classpath of the config file
            it should be a path under classpath,
            the value is /test/jawr.properties here because
            we put the file "jawr.properties" under /src/test and
            it will be moved to classpath/test/ after compiled -->
        <init-param>
            <param-name>configLocation</param-name>
            <param-value>/test/jawr.properties</param-value>
        </init-param>
        <load-on-startup>1</load-on-startup>
    </servlet>

    <servlet>
        <servlet-name>CSSServlet</servlet-name>
        <servlet-class>net.jawr.web.servlet.JawrServlet</servlet-class>

        <!-- Location in classpath of the config file
            it should be a path under classpath,
            the value is /test/jawr.properties here because
            we put the file "jawr.properties" under /src/test and
            it will be moved to classpath/test/ after compiled -->
        <init-param>
            <param-name>configLocation</param-name>
            <param-value>/test/jawr.properties</param-value>
        </init-param>
        <init-param>
            <param-name>type</param-name>
            <param-value>css</param-value>
        </init-param>
        <load-on-startup>1</load-on-startup>
    </servlet>

    <!-- the url-pattern can be any value, we use *.jsbundle / *.cssbundle here -->
    <servlet-mapping>
        <servlet-name>JavascriptServlet</servlet-name>
        <url-pattern>*.jsbundle</url-pattern>
    </servlet-mapping> 

    <servlet-mapping>
        <servlet-name>CSSServlet</servlet-name>
        <url-pattern>*.cssbundle</url-pattern>
    </servlet-mapping>
</web-app>

Config jawr in src/test/jawr.properties:

# Common properties
jawr.debug.on=false
jawr.gzip.on=true
jawr.gzip.ie6.on=false
jawr.charset.name=UTF-8

# Javascript properties and mappings
jawr.js.bundle.basedir=/js

# All files within /js/bundleOne will be together in a bundle
# mappings to /bundles/jsbundle_one.jsbundle.
# All files within /js/bundleTwo will be together in a bundle
# mappings to /bundles/jsbundle_two.jsbundle.
# the jsbundleOne / jsbundleTwo in the mapping key
# can be any value  
jawr.js.bundle.jsbundleOne.id=/bundles/jsbundle_one.jsbundle
jawr.js.bundle.jsbundleOne.mappings=/js/bundleOne/**
jawr.js.bundle.jsbundleTwo.id=/bundles/jsbundle_two.jsbundle
jawr.js.bundle.jsbundleTwo.mappings=/js/bundleTwo/**

# CSS properties and mappings
jawr.css.bundle.basedir=/css

jawr.css.bundle.cssbundle.id=/bundles/cssbundle.cssbundle
jawr.css.bundle.cssbundle.mappings=/css/**

########## another way ##########
## note: singlebundle currently only works with
##       url-pttern ends with .css,
##       have to modify the web.xml and
##       test.jsp before use this way
## CSS properties and mappings
# jawr.css.bundle.basedir=/css
#
## CSS files will be all bundled together automatically
# jawr.css.factory.use.singlebundle=true
# jawr.css.factory.singlebundle.bundlename=/bundles/cssbundle.css

Note singlebundle currently only works with the url-pattern endswith .css for css files,
please refer jawr tracker:
http://java.net/jira/browse/JAWR-228

test file index.jsp (without Jawr)

<%@ page isErrorPage="true" language="java"
    contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
<%@ page isELIgnored ="false" %>
<html>
    <head>
        <meta http-equiv="Content-Type" 
            content="text/html; charset=UTF-8"/>
        <title>Normal - without JAWR</title>
        <link href="css/aaa.css" rel="stylesheet" type="text/css">
        <link href="css/bbb.css" rel="stylesheet" type="text/css">
        <link href="css/ccc.css" rel="stylesheet" type="text/css">
        <link href="css/ddd.css" rel="stylesheet" type="text/css">
        <link href="css/eee.css" rel="stylesheet" type="text/css">
        <script type="text/javascript" src="js/bundleOne/aaa.js"></script>
        <script type="text/javascript" src="js/bundleOne/bbb.js"></script>
        <script type="text/javascript" src="js/bundleOne/ccc.js"></script>
        <script type="text/javascript" src="js/bundleTwo/ddd.js"></script>
        <script type="text/javascript" src="js/bundleTwo/eee.js"></script>
        <script type="text/javascript">
            onload = function () {
                funcA();
                funcB();
                funcC();
                funcD();
                funcE();
            }
        </script>
    </head>
    <body>
        <div class="class_a">
            js file <span id="funcA"></span> loaded
        </div>
        <div class="class_b">
            js file <span id="funcB"></span> loaded
        </div>
        <div class="class_c">
            js file <span id="funcC"></span> loaded
        </div>
        <div class="class_d">
            js file <span id="funcD"></span> loaded
        </div>
        <div class="class_e">
            js file <span id="funcE"></span> loaded
        </div>
    </body>
</html>

test file test.jsp (use Jawr)

<%@ page isErrorPage="true" language="java"
    contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
<%@ page isELIgnored ="false" %>
<%@ taglib uri="http://jawr.net/tags" prefix="jwr" %>
<html>
    <head>
        <meta http-equiv="Content-Type" 
            content="text/html; charset=UTF-8"/>
        <title>Normal - without JAWR</title>
        <jwr:style src="/bundles/cssbundle.cssbundle" />
        <jwr:script src="/bundles/jsbundle_one.jsbundle"/>
        <jwr:script src="/bundles/jsbundle_two.jsbundle"/>
        <script type="text/javascript">
            onload = function () {
                funcA();
                funcB();
                funcC();
                funcD();
                funcE();
            }
        </script>
    </head>
    <body>
        <div class="class_a">
            js file <span id="funcA"></span> loaded
        </div>
        <div class="class_b">
            js file <span id="funcB"></span> loaded
        </div>
        <div class="class_c">
            js file <span id="funcC"></span> loaded
        </div>
        <div class="class_d">
            js file <span id="funcD"></span> loaded
        </div>
        <div class="class_e">
            js file <span id="funcE"></span> loaded
        </div>
    </body>
</html>

Test Result:

In short, it is about 2 times faster.

The result that load index.jsp  (without Jawr) 3 times as below:







The result that load test.jsp (with Jawr) 3 times as below:






Download:
The full project at github
https://github.com/benbai123/JSP_Servlet_Practice/tree/master/Practice/Tools/JAWR

References:
http://jawr.java.net/
http://jawr.java.net/tutorials/quickstart.html

Sunday, March 4, 2012

C/C++ Practice: Struct Practice Two, Copy Struct.

Introduction:

This post practice two types of struct copy, Shallow Copy and Deep Copy, in C.

Shallow copy:
copy all member field values, the copied pointer and
the original pointer will point to the same address.

Deep copy:
copy the field values that are not pointer,
create new pointer for pointer value,
and copy the real content from old address to new address.

A simple sample:

#include <stdio.h>
#include <stdlib.h>
/**
  * This sample practice the shallow copy and
  * deep copy of struct in c.
  *
  * Shallow copy:
  *          copy all member field values, the copied pointer and
  *          the original pointer will point to the same address
  * Deep copy:
  *           copy the field values that are not pointer,
  *           create new pointer for pointer value,
  *           and copy the real content from old address to new address.
  */
typedef struct DataStruct {
        int data_one;
        int* data_two;
} DataStruct;
void deepCopy(DataStruct* to, DataStruct* from);
int main () {
    DataStruct dsOne;
    DataStruct dsTwo;
    DataStruct dsThree;

    int data_two = 2;
    // assign the datas to mainsOne
    dsOne.data_one = 1;
    dsOne.data_two = &data_two;

    // shallow copy dsOne to dsTwo
    memcpy(&dsTwo, &dsOne, sizeof(DataStruct));
    // deep copy dsOne to dsThree
    deepCopy(&dsThree, &dsOne);
    // show the value of original data
    printf("original data_one is %d,\noriginal data_two is %d\n\n", dsOne.data_one, *dsOne.data_two);

    // change the data of dsOne
    dsOne.data_one = 3;
    *dsOne.data_two = 4;

    // show the data of dsTwo after dsOne changed
    // actually dsOne.data_two and dsTwo.data_two
    // point to the same address, so the value of
    // *dsTwo->data_two is changed
    printf("data_one in dsTwo is %d,\ndata_two in dsTwo is %d\n\n", dsTwo.data_one, *dsTwo.data_two);
    // show the data of dsThree after dsOne changed
    // the address that dsThree.data_two points to and
    // the address of dsOne.data_two are not the same,
    // so *dsThree.data_two keep the original value.
    printf("data_one in dsThree is %d,\ndata_two in dsThree is %d\n\n", dsThree.data_one, *dsThree.data_two);

    system("PAUSE");
}

void deepCopy(DataStruct* to, DataStruct* from) {
     to->data_one = from->data_one;
     // copy the real value to the address
     // that to->data_two points to
     memcpy(to->data_two, from->data_two, sizeof(int));
}

Result:



Download:
struct_practice_002__copy_struct.c at github:
https://github.com/benbai123/C_Cplusplus_Practice/tree/master/C_Struct

Reference:
http://www.learncpp.com/cpp-tutorial/912-shallow-vs-deep-copying/

Saturday, March 3, 2012

Basic Servlet Practice: Dynamic add servlet programmatically.

Sometimes we may want to dynamically add the servlet and/or change their feature,
for example, support the servlet 3 asynchronous processing.

To achieve this, we can dynamically add servlet in
ServletContextListener as below:


DynamicAddListener.java

package test.servlet.dynamicaddservlet;

import javax.servlet.*;
import javax.servlet.ServletContextListener;
import javax.servlet.ServletRegistration;
import javax.servlet.annotation.WebListener;

/**
 * This listener will add the DynamicAddedServlet if servlet version is 3.0 or later
 *
 */
@WebListener()
public class DynamicAddListener implements ServletContextListener {
    private static final long serialVersionUID = -8873939883201271898L;

    @Override
    public void contextInitialized(ServletContextEvent event) {
        // Dynamic add servlet for servlet 3.x or later
        ServletContext sc = event.getServletContext();
        if (sc.getMajorVersion() >= 3) {
            final ServletRegistration.Dynamic dn =
                    sc.addServlet("DynamicAddedServlet", DynamicAddedServlet.class);
            dn.setAsyncSupported(true);
            dn.addMapping("/added.jsp");
        }
    }

    @Override
    public void contextDestroyed(ServletContextEvent sce) {
        
    }
}

DynamicAddedServlet.java

package test.servlet.dynamicaddservlet;

import java.io.IOException;
import java.io.PrintWriter;

import javax.servlet.ServletContext;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;

/**
 * This servlet will be added by DynamicAddListener#contextInitialized
 *
 */
public class DynamicAddedServlet extends HttpServlet {

    private static final long serialVersionUID = -8873939883201271898L;

    @Override
    protected void doGet(HttpServletRequest req, HttpServletResponse resp)
            throws ServletException, IOException {
        ServletContext sc = getServletContext();
        PrintWriter out = resp.getWriter();
        out.println("<html>");
        out.println("<head>");
        out.println("<title> Servlet Added </title>");
        out.println("</head>");
        out.println("<body>");
        out.println("<h2 style=\"margin: 50px;\">The Servlet Version is 3.0 or later so<br />"
                    + "&nbsp;the Servlet is added.<br /><br />"
                    + "&nbsp;&nbsp;Servlet Version: "+sc.getMajorVersion()+"."+sc.getMinorVersion()+"</h2>");
        out.println("</body>");
        out.println("</html>");
        out.close();
    }
}


If we run this Web-App with Tomcat7 then connect to /added.jsp,
the result is:



Download:

Files at github:
https://github.com/benbai123/JSP_Servlet_Practice/tree/master/Practice/BasicServletPractice/src/test/servlet/dynamicaddservlet

Reference:

http://docs.oracle.com/javaee/6/api/javax/servlet/ServletContext.html#addServlet(java.lang.String, java.lang.Class)