??xml version="1.0" encoding="utf-8" standalone="yes"?>
用于判断Unicode LetterQ?br>
String UnicodeLetterPattern = "[(\u0041-\u005a)|(\u0061-\u007a)|(\u00c0-\u00d6)|(\u00d8-\u00f6)|(\u00f8-\u00ff)|(\u0100-\u1fff)]";
用于判断亚洲语言字符Q中国,日本Q韩国)Q?br>
String UnicodeCJPattern = "[(\u3040-\u318f)|(\u3300-\u337f)|(\u3400-\u3d2d)|(\u4e00-\u9fff)|(\uf900-\ufaff)|(\uac00-\ud7af)]";
用于判断Unicode中的数字Q?br>
String UnicodeDigitPattern = "[(\u0030-\u0039)|(\u0660-\u0669)|(\u06f0-\u06f9)|(\u0966-\u096f)|(\u09e6-\u09ef)|(\u0a66-\u0a6f)|(\u0ae6-\u0aef)|(\u0b66-\u0b6f)|(\u0be7-\u0bef)|(\0c66-\u0c6f)|(\u0ce6-\u0cef)|(\u0d66-\u0d6f)|(\u0e50-\u0e59)|(\u0ed0-\u0ed9)|(\u1040-\u1049)]";
QueryParser调用静态方?/span>Parse后会q回Query的实例,原子查询。例如:“Info:电视?/span> AND ID:
Lucene内徏Query对象Q?/span>
TermQueryQ词条查询。通过Ҏ个词条的指定Q实现检索烦引中存在该词条的所有文?/span>
BooleanQueryQ布查询?/span>Lucene中包含逻辑关系Q?#8220;?#8221;Q?#8220;?#8221;Q?#8220;?#8221;的复杂查询,最l都会表C成BooleanQuery。布查询就是一个由多个子句和子句之间组成的布尔逻辑所l成的查询?/span>
RangeQueryQ范围查询。这U范围可以是日期Q时_数字Q大等{?/span>
PrefixQueryQ前~查询?/span>
PhraseQueryQ短语查询。默认ؓ完全匚wQ但可以指定坡度Q?/span>SlopQ默认ؓ0Q改变范围。比?/span>Slop=1Q检索短语ؓ“电台”Q那么在“电台”中间有一个字的也可以被查扑և来,比如“电视?#8221;?/span>
MultiPhraseQueryQ多短语查询?/span>
FuzzyQueryQ模p查询。模p查询用的匚w法?/span>levenshitein法。此法在比较两个字W串Ӟ动作分?/span>3U:加一个字母(InsertQ,删一个字母(DeleteQ,改变一个字母(SubstituteQ?/span>
WildcardQueryQ通配W查询?#8220;*”可C?/span>0到多个字W,“Q?#8221;表示单个字符?/span>
SpanQueryQ跨度查询。此cMؓ抽象cR?/span>
SpanTermQueryQ检索效果完全同TermQueryQ但内部会记录一些位|信息,?/span>SpanQuery的其?/span>API使用Q是其它属于SpanQuery?/span>Query的基?/span>
SpanFirstQueryQ查找方式ؓ?/span>Field的内容v始位|开始,在一个固定的宽度内查找所指定的词条?/span>
SpanNearQueryQ功能类?/span>PharaseQuery?/span>SpanNearQuery查找所匚w的不一定是短语Q还有可能是另一?/span>SpanQuery的查询结果作为整体考虑Q进行嵌套查询?/span>
SpanOrQueryQ把所?/span>SpanQuery查询l果l合hQ作为检索结果?/span>
SpanNotQueryQ从W一?/span>SpanQuery查询l果中,LW二?/span>SpanQuery查询l果Q作为检索结果?/span>
BooleanClause用于表示布尔查询子句关系的类Q包括:BooleanClause.Occur.MUSTQ?/span>BooleanClause.Occur.MUST_NOTQ?/span>BooleanClause.Occur.SHOULD。有以下6U组合:
1Q?/span>MUST?/span>MUSTQ取得连个查询子句的交集?/span>
2Q?/span>MUST?/span>MUST_NOTQ表C查询结果中不能包含MUST_NOT所对应得查询子句的索结果?/span>
3Q?/span>MUST_NOT?/span>MUST_NOTQ无意义Q检索无l果?/span>
4Q?/span>SHOULD?/span>MUST?/span>SHOULD?/span>MUST_NOTQ?/span>SHOULD?/span>MUSTq用Ӟ无意义,l果?/span>MUST子句的检索结果。与MUST_NOTq用Ӟ功能?/span>MUST?/span>
5Q?/span>SHOULD?/span>SHOULDQ表C?#8220;?#8221;关系Q最l检索结果ؓ所有检索子句的q?/span>
NQ文档集合的大小
MQ词w合的大小
Sj=|PLQ?/span>tjQ?/span>|Q词?/span>tj所涉及文的个?/span>
DFQ?/span>tjQ?/span>=sj/NQ词?/span>tj的文频?/span>
IDFQ?/span>tjQ?/span>=-logDFQ?/span>tjQ:倒置文频率Q其D表C出现的频率高
fi,jQ第j个词?/span>tj在第i个文?/span>di中出现的ơ数
TN=
fi,jQ系l所有文分解后包含词项的总量Q包括重复,即一个多重集Q?span>multi-setQ)
TFQ?span>tjQ?span>=Q?img height=34 alt="" src="http://m.tkk7.com/images/blogjava_net/liangtianyu/image003.gif" width=22 border=0> fiQ?span>jQ?span>/TNQ词?span>tj在文中出现的频度(词频Q?/span>
ITFQ?span>tjQ?span>=logTFQ?span>tjQ:倒置词频Q越表C出现的频率高
倒排文g分ؓ两部分:W一部分是由不同词项l成的烦引,UCؓ词表QVocabularyQ,W二部分由每个词出现过的文集合构成,UCؓ记录文gQPosting FileQ,每个词项的对应部分称为倒排表(Posting ListsQ,可以通过词表讉K?
public static string TimeToString(long l, Resolution resolution)
{
DateTime dt = LongToDateTime(l);
string result = string .Empty ;
switch (resolution)
{
case Resolution.YEAR:
result = dt.ToString(YEAR_FORMAT);
break;
case Resolution.MONTH:
result = dt.ToString(MONTH_FORMAT);
break;
case Resolution.DAY:
result = dt.ToString(DAY_FORMAT);
break;
case Resolution.HOUR:
result = dt.ToString(HOUR_FORMAT);
break;
case Resolution.MINUTE:
result = dt.ToString(MINUTE_FORMAT);
break;
case Resolution.SECOND:
result = dt.ToString(SECOND_FORMAT);
break;
case Resolution.MILLISECOND:
result = dt.ToString(MILLISECOND_FORMAT);
break;
}
return result;
}
public static string TimeToString(DateTime time, Resolution resolution)
{
return TimeToString(DateTimeToLong(time), resolution);
}
public static long Round(long time, Resolution resolution)
{
DateTime dt = BASE_UNIVERSAL_TIME.AddMilliseconds(time).ToUniversalTime();
switch (resolution)
{
case Resolution.YEAR:
dt = dt.AddMonths(1 - dt.Month).AddDays(1 - dt.Day).AddHours(0 - dt.Hour).AddMinutes(0 - dt.Minute).AddSeconds(0 - dt.Second).AddMilliseconds(0 - dt.Millisecond);
break;
case Resolution.MONTH:
dt = dt.AddDays(1 - dt.Day).AddHours(0 - dt.Hour).AddMinutes(0 - dt.Minute).AddSeconds(0 - dt.Second).AddMilliseconds(0 - dt.Millisecond);
break;
case Resolution.DAY:
dt = dt.AddHours(0 - dt.Hour).AddMinutes(0 - dt.Minute).AddSeconds(0 - dt.Second).AddMilliseconds(0 - dt.Millisecond);
break;
case Resolution.HOUR:
dt = dt.AddMinutes(0 - dt.Minute).AddSeconds(0 - dt.Second).AddMilliseconds(0 - dt.Millisecond);
break;
case Resolution.MINUTE:
dt = dt.AddSeconds(0 - dt.Second) .AddMilliseconds(0 - dt.Millisecond);
break;
case Resolution.SECOND:
dt = dt.AddMilliseconds(0 - dt.Millisecond);
break;
case Resolution.MILLISECOND:
break;
}
return DateTimeToLong(dt);
}
public static DateTime Round(DateTime date, Resolution resolution)
{
return LongToDateTime(Round(DateTimeToLong(date), resolution));
}
public static long DateTimeToLong(DateTime time)
{
return (long)(time.ToUniversalTime() - BASE_UNIVERSAL_TIME).TotalMilliseconds;
}
public static DateTime LongToDateTime(long l)
{
return BASE_UNIVERSAL_TIME.AddMilliseconds(l).ToUniversalTime();
}
public enum Resolution
{
YEAR,
MONTH,
DAY,
HOUR,
MINUTE,
SECOND,
MILLISECOND
}
}
for (int i = (s.Length-1),j=0; i >=0; i--,j++)
{
char ch = char.ToLower(s[i]);
int v = char.IsDigit(ch)==true?int.Parse(ch.ToString()):(10+(int)(ch-'a'));
result = result + Convert.ToInt64(v * Math.Pow(fromBase, j));
}
return result;
}
//实现Java的Long.toString(long iQint radix)
public static string ConvertToString(long l, int fromBase)
{
int maxP =(int) Math.Floor(Math.Log(l,fromBase));
int maxN = (int)Math.Floor(l / Math.Pow(fromBase, maxP));
char[] chs = new char[maxP+1];
chs[0] = (maxN >= 0 && maxN <= 9) ? char.Parse(maxN.ToString()) : (char)((int)('a')+(maxN - 10));
long n1 = (long)(l - maxN*Math.Pow(fromBase, maxP));
long temp=n1;
for (int i = (maxP-1),j=1; i >= 0;i--,j++)
{
int n2 = (int)Math.Floor(temp/Math.Pow(fromBase,i));
chs[j]=(n2>=0 && n2<=9)?char.Parse(n2.ToString()):(char)((int)('a')+(n2-10));
temp=(long)(temp-n2*Math.Pow(fromBase,i));
}
return new string(chs);
}
q样替换Lucene.Net中的相关代码Q就可以了。而且l过试Q发?Net版本的查询速度比Java版本的快很多Q很搞不清楚?/p>