Sunday, March 25, 2012

Lucene: Sort the Search Result

Introduction

This post is about Sort the Search Result in Lucene 3.5.0.

The Program

SortTest.java

package test.lucene.testtwo;

import java.io.IOException;
import java.util.Date;
import java.util.Locale;
import java.util.TimeZone;

import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SortField;

import test.lucene.utils.DateUtils;
import test.lucene.wrapper.WrappedDocument;
import test.lucene.wrapper.WrappedIndexWriter;
import test.lucene.wrapper.WrappedQuery;
import test.lucene.wrapper.WrappedSearcher;
import test.lucene.wrapper.WrappedSort;

public class SortTest {
    public static void main(String[] args) {
        try{
            // Create WrappedIndexWriter
            WrappedIndexWriter wiw = new WrappedIndexWriter();
            long oneDay = 1000L*60*60*24;
            long currentTime = System.currentTimeMillis();
    
            // create dates
            Date dateOne = new Date(currentTime - (oneDay*30)); // 30 days ago
            Date dateTwo = new Date(currentTime - (oneDay*20)); // 20 days ago
            Date dateThree = new Date(currentTime - (oneDay*10)); // 10 days ago
            Date dateFour = new Date(currentTime); // just now
    
            // convert dates to strings
            String sdOne = DateUtils.getStringMillis(dateOne);
            String sdTwo = DateUtils.getStringMillis(dateTwo);
            String sdThree = DateUtils.getStringMillis(dateThree);
            String sdFour = DateUtils.getStringMillis(dateFour);

            Locale.setDefault(Locale.US);
            TimeZone.setDefault(TimeZone.getTimeZone("EST"));

            // Create IndexWriter
            wiw.createIndexWriter();
            // add Documents and close
            // based on the Javadoc, the sort field (time)
            // should be NOT_ANALYZED
            wiw.addDocument(new WrappedDocument("title", "Test Title One")
                                .addField("content", "Test Content One")
                                .addField("time", sdOne, true, false)
                                .getDocument())
                .addDocument(new WrappedDocument("title", "Test Title Two")
                                .addField("content", "Test Content Two")
                                .addField("time", sdTwo, true, false)
                                .getDocument())
                .addDocument(new WrappedDocument("title", "Test Title Three")
                                .addField("content", "Test Content Three")
                                .addField("time", sdThree, true, false)
                                .getDocument())
                .addDocument(new WrappedDocument("title", "Test Title Four")
                                .addField("content", "Test Content Four")
                                .addField("time", sdFour, true, false)
                                .getDocument())
                .close();
        
            // create WrappedSearcher, initiate searcher
            WrappedSearcher ws = new WrappedSearcher().initSearcher(wiw);
    
            // do search by range query of date
            // not include upper bound, include lower bound,
            // result: dateOne and dateTwo
            // Sort by time descending
            searchAndDisplay(wiw, ws,
                new WrappedQuery()
                    .createTermRangeQuery("time", sdOne, sdThree.toString(), true, false),
                "time", true);

            // do search by range query of date
            // include upper bound and lower bound
            // results: from dateTwo to dateFour
            // Sort by time ascending
            searchAndDisplay(wiw, ws,
                new WrappedQuery()
                    .createTermRangeQuery("time", sdTwo.toString(), sdFour.toString(), true, true),
                "time", false);
        } catch (Exception e) {
            e.printStackTrace();
        }

    }
    public static void searchAndDisplay (WrappedIndexWriter wiw, WrappedSearcher ws, WrappedQuery wq,
                                            String sortField, boolean reverse)
        throws IOException, ParseException, java.text.ParseException {
        ScoreDoc[] results = ws.doSearch( wq, 10,
                        new WrappedSort(new SortField(sortField, SortField.STRING, reverse)));
    
        // display results
        System.out.println(results.length + " results.");
        for(int i=0; i < results.length; i++) {
            int docId = results[i].doc;
            Document doc = ws.getIndexSearcher().doc(docId);
            System.out.println((i + 1) + "\ttitle: " + doc.get("title")
                                + "\n\tcontent: " + doc.get("content")
                                + "\n\tcontent: " + DateUtils.stringToDate(doc.get("time"))
                                + "\n");
        }
    }
}

This is the main program,
very similar to the RangeQueryTest.java in this post
http://ben-bai.blogspot.com/2012/03/lucene-searching-range-of-dates-by.html

only change the time field to NOT_ANALYZED and add the Sort condition.

WrappedSort.java

package test.lucene.wrapper;

import java.util.*;

import org.apache.lucene.search.Sort;
import org.apache.lucene.search.SortField;

/**
 * This class wrap the Sort class of Lucene so that we can
 * add SortField(s) and keep them append new SortField easily
 * until we really need the Sort
 *
 */
public class WrappedSort {
    private Sort _sort; // Sort
    private List<SortField> _sortFields = new LinkedList<SortField>(); // SortField(s)
    /**
     * Constructor that only create the instance of Sort
     */
    public WrappedSort () {
        createSort();
    }
    /**
     * Constructor that create the instance of Sort and add one SortField
     * @param sortField The SortField to add
     */
    public WrappedSort (SortField sortField) {
        createSort().addSortField(sortField);
    }
    /**
     * Constructor that create the instance of Sort and add a SortField List
     * @param sortFields The List of SortField to add
     */
    public WrappedSort (List<SortField> sortFields) {
        createSort().addSortFields(sortFields);
    }
    /**
     * Create/renew the instance of Sort
     * @return Self instance so we can do something else directly.
     */
    public WrappedSort createSort () {
        _sort = new Sort();
        return this;
    }
    /**
     * Renew the instance of the List of SortField
     * @return Self instance so we can do something else directly.
     */
    public WrappedSort createSortField () {
        _sortFields = new LinkedList<SortField>();
        return this;
    }
    /**
     * Add one SortField
     * @param sortField The SortField to add
     * @return Self instance so we can do something else directly.
     */
    public WrappedSort addSortField (SortField sortField) {
        _sortFields.add(sortField);
        return this;
    }
    /**
     * Add a List of SortField
     * @param sortFields The List of SortField to add
     * @return Self instance so we can do something else directly.
     */
    public WrappedSort addSortFields (List<SortField> sortFields) {
        _sortFields.addAll(sortFields);
        return this;
    }
    /**
     * Get the List of SortField
     * @return The List of SortField
     */
    public List<SortField> getSortFields () {
        return _sortFields;
    }
    /**
     * Set SortField(s) to Sort and return the instance of Sort.
     * @return The instance of Sort
     */
    public Sort getNativeSort () {
        if (_sortFields.size() == 1)
            _sort.setSort(_sortFields.get(0));
        else
            _sort.setSort((SortField[])_sortFields.toArray());
        return _sort;
    }
}

This class wrap the Lucene Sort class that
store the Sort instance and the SortFields in it.

The fragment added to WrappedSearcher.java

    /**
     * Do search with the given Sort and return the results
     * @param wq WrappedQuery
     * @param resultsPerPage
     * @param ws WrappedSort
     * @return ScoreDoc[] The results
     * @throws IOException
     * @throws ParseException
     */
    public ScoreDoc[] doSearch (WrappedQuery wq, int resultsPerPage, WrappedSort ws)
            throws IOException, ParseException{
        return _searcher.search(wq.getQuery(), resultsPerPage, ws.getNativeSort()).scoreDocs;
    }

This fragment do search with Sort.

The Result


The first search sort dateOne and dateTwo in descending order,
the second search sort dateTwo to dateFour in ascending order.

Download:

The full project is at github:
https://github.com/benbai123/JSP_Servlet_Practice/tree/master/Practice/JAVA/Search/LuceneTest

Reference:

http://lucene.apache.org/core/old_versioned_docs/versions/3_5_0/api/all/org/apache/lucene/search/Sort.html

http://lucene.apache.org/core/old_versioned_docs/versions/3_5_0/api/all/org/apache/lucene/search/SortField.html

No comments:

Post a Comment