Java Lucene 4.5如何按不区分大小写进行搜索
我们已经实现了 Java Lucene 搜索引擎 4.5,即使字段值不区分大小写,我也会尝试搜索内容(例如,如果我搜索名称为Banglore"的城市,我会得到结果,但是当我搜索名称为banglore"的城市我得到 0 个结果).
We have implemented Java Lucene search engine 4.5, I am trying to search the content even if the field value is case insensitive (e.g., if I search a city with name "Banglore" I get a result, but when I search a city with name "banglore" I get 0 results).
我使用 StandardAnalyzer
分析数据并使用 WildcardQuery
来匹配 Like
条件(我尝试了上述 这里没有成功).
I have used StandardAnalyzer
for analyzing the data and WildcardQuery
to match a Like
condition (I tried as mentioned here without success).
我不确定我哪里出错了.感谢任何有关解决此区分大小写问题的指导.
I am not sure where I have gone wrong. I appreciate any guidance on fixing this case sensitivity problem.
public SearchHelper
{
Analyzer analyzer;
Directory index;
public IndexSearcher searcher = null;
public IndexWriter indexWriter = null;
public QueryParser parser = null;
private static int hitsPerPage = 100;
/**
* @param indexFileLocation
* @throws IOException
*/
public SearchHelper(String indexFileLocation) throws IOException
{
// this.analyzer =new StandardAnalyzer();
this.analyzer = new CaseStandardAnalyzer();
// analyzer = new ThaiAnalyzer();
this.index = FSDirectory.open(java.nio.file.Paths.get(indexFileLocation));
}
/**
* @param create
* @return
* @throws IOException
*/
public IndexWriter getIndexWriter(boolean create) throws IOException
{
if (indexWriter == null)
{
IndexWriterConfig iwc = new IndexWriterConfig(this.analyzer);
this.indexWriter = new IndexWriter(this.index, iwc);
}
return this.indexWriter;
} //End of getIndexWriter
/**
* @throws IOException
*/
public void closeIndexWriter() throws IOException
{
if (this.indexWriter != null)
{
this.indexWriter.commit();//optimize(); LUCENE_36
this.indexWriter.close();
}
} //End closeIndexWriter
/**
* @param indexFileLocation
* @throws CorruptIndexException
* @throws IOException
*/
public void startSearch(String indexFileLocation) throws CorruptIndexException, IOException
{
// searcher = new IndexSearcher(FSDirectory.open(new File(indexFileLocation)));
IndexReader reader = DirectoryReader.open(FSDirectory.open(java.nio.file.Paths.get(indexFileLocation)));
// IndexReader.open(this.index);
// open(getIndexWriter(true), true);
this.searcher = new IndexSearcher(reader);
}
/**
* @param fieldNames
* @param fieldValues
* @return
* @throws IOException
* @throws ParseException
*
* <p></p>
* https://stackoverflow.com/questions/2005084/how-to-specify-two-fields-in-lucene-queryparser
*/
public ScoreDoc[] searchSEO(String[] fieldNames, String[] fieldValues, int limitSize) throws IOException, ParseException
{
this.analyzer = new StandardAnalyzer();
int searchFieldSize = (null == fieldNames) ? 0 : fieldNames.length;
BooleanQuery booleanQuery = new BooleanQuery();
for (int i = 0; i < searchFieldSize; i++)
{
Query query1 = searchIndexWithWildcardQuery(fieldNames[i], fieldValues[i]);
addQueries(booleanQuery, query1, 2);
}
TopScoreDocCollector collector = null; // Or use by default hitsPerPage instead limitSize
if (limitSize > 0)
{
collector = TopScoreDocCollector.create(limitSize);
} else {
collector = TopScoreDocCollector.create(hitsPerPage);
}
this.searcher.search(booleanQuery,collector);
return collector.topDocs().scoreDocs;
}
/**
* @param whichField
* @param searchString
* @return
* @throws IOException
* @throws ParseException
*/
public Query searchIndexWithWildcardQuery(String whichField, String searchString) throws IOException, ParseException
{
Term term = addTerm(whichField, "*" + searchString + "*");
Query query = new WildcardQuery(term);
return query;
}
/**
* @param whichField
* @param searchString
* @return
*/
public Term addTerm(String whichField, String searchString)
{
Term term = new Term(whichField, searchString);
return term;
}
/**
* @param searchString
* @param operation
* @return
* @throws ParseException
*/
public Query addConditionOpertaion(String searchString, String operation) throws ParseException
{
Query query = null;
if ("and".equals(operation))
{
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
} else if("or".equals(operation)) {
parser.setDefaultOperator(QueryParser.AND_OPERATOR);
}
query = parser.parse(searchString);
return query;
}
/**
* @param booleanQuery <code>BooleanQuery</code>
* @param q <code>Query</code>
* @param type <code>int</code> , 1--> Must, 2-->Should, 3 --> Must Not
*/
public void addQueries(BooleanQuery booleanQuery, Query q, int type)
{
switch(type)
{
case 1: booleanQuery.add(q, Occur.MUST);
break;
case 2: booleanQuery.add(q, Occur.SHOULD);
break;
default:booleanQuery.add(q, Occur.MUST_NOT);
break;
} //End of switch
}
public QueryParser getParser()
{
return parser;
}
public void setParser(String fieldName)
{
this.parser = new QueryParser(fieldName, this.analyzer);
}
public void getDefaultByStatus(int status)
{
this.analyzer = new StandardAnalyzer();
this.parser = new QueryParser("status", this.analyzer);
}
protected void doClear(File dir,boolean deleteSubDir)
{
for (File file: dir.listFiles())
{
if (file.isDirectory() && deleteSubDir)
{
doClear(file,deleteSubDir);
}
file.delete();
}
} //End of doClear();
protected void doClose() throws IOException
{
this.searcher.getIndexReader().close();
}
public boolean add(Object Obj) throws Exception
{
User currentUser = (User)Obj;
boolean isAdded = false;
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));
// addRelatedFields(luceneDoc,city.getStateCode());
IndexWriter writer = getIndexWriter(false);
writer.addDocument(luceneDoc);
closeIndexWriter();
isAdded = true;
System.out.println(isAdded);
return isAdded;
} // End of add
public boolean update(Object Obj) throws Exception
{
boolean isUpdated = false;
User currentUser = (User) Obj;
org.apache.lucene.document.Document luceneDoc = new org.apache.lucene.document.Document();
// luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new IntField("oid", currentUser.getOid(), Field.Store.YES));
luceneDoc.add(new StringField("login", currentUser.getLogin(), Field.Store.YES));
luceneDoc.add(new IntField("status", currentUser.getStatus(), Field.Store.YES));
luceneDoc.add(new StringField("fName", currentUser.getFirstName(), Field.Store.YES));
luceneDoc.add(new StringField("lName", currentUser.getLastName(), Field.Store.NO));
luceneDoc.add(new StringField("email", currentUser.getEmailId(), Field.Store.YES));
luceneDoc.add(new StringField("city", currentUser.getCity(), Field.Store.YES));
// addRelatedFields(luceneDoc,city.getStateCode());
IndexWriter writer = getIndexWriter(false);
writer.updateDocument(new Term("login", currentUser.getLogin()),luceneDoc);
closeIndexWriter();
isUpdated = true;
return isUpdated;
} // End of update
public boolean delete(Object Obj) throws Exception
{
boolean isDeleted = false;
User currentUser = (User) Obj;
Term deleteTerm = new Term("login", currentUser.getLogin());
IndexWriter writer = getIndexWriter(false);
writer.deleteDocuments(deleteTerm); // Or use Query
writer.forceMergeDeletes();
closeIndexWriter();
isDeleted = true;
return isDeleted;
} // End of delete
@Override
public Object search(String[] fieldNames, String[] fieldValues, int returnType, int limit) throws Exception
{
Object obj = null;
org.apache.lucene.search.ScoreDoc[] hits = searchSEO(fieldNames,fieldValues, limit);
int hitSize = (null == hits) ? 0 : hits.length;
System.out.println("total:" + hitSize);
doClose();
return obj;
} // End of search
public void addThreadUser()
{
User user = new User();
addUserPojo(user);
add(user);
}
public void updateThreadUser()
{
User user = new User();
addUserPojo(user);
update(user);
}
public void deleteThreadUser()
{
User user = new User();
addUserPojo(user);
delete(user);
}
private void addUserPojo(User user)
{
user.setOid(3);
user.setLogin("senthil");
user.setFirstName("Semthil");
user.setLastName("Semthil");
user.setStatus(1);
user.setCity("Combiatore");
user.setEmailId("semthil@xyz.com");
}
public void searchUser()
{
searchUser(new String[] {"login"}, new String[] {"Se"}, null);
}
public static void main(String[] args)
{
SearchHelper test = new SearchHelper();
test.searchUser();
}
}
推荐答案
您正在使用StringField
来索引您的数据,但该字段将绕过分析器链并始终索引无论您的分析仪如何,您的术语逐字作为一个标记.如果你想分析你的数据并且 StandardAnalyzer
已经做了小写,你应该使用 TextField
.除此之外,WildcardQuery
确实不 分析其术语,因此如果您搜索 Banglore,它将与索引中现在小写的 Banglore 不匹配.您必须自己将搜索词小写(或对其使用分析器).
You are usingStringField
to index your data but this field will bypass the analyzer chain and always index your term verbatim as one token, regardless of your analyzer. You should use TextField
if you want to have your data analyzed and the StandardAnalyzer
already does lower-casing.
Other than that, the WildcardQuery
does not analyze its term, so if you search for Banglore, it won't match the now-lower-case banglore from the index. You have to lowercase the searchterm yourself (or use an analyzer on it).
相关文章