<rt id="bn8ez"></rt>
<label id="bn8ez"></label>

  • <span id="bn8ez"></span>

    <label id="bn8ez"><meter id="bn8ez"></meter></label>

    posts - 431,  comments - 344,  trackbacks - 0

    HTML 解析器
    package com.rain.util;

    import Java.io.FileInputStream;
    import Java.io.FileNotFoundException;
    import Java.io.IOException;
    import Java.io.InputStream;
    import Java.io.InputStreamReader;
    import Java.io.Reader;
    import Java.io.UnsupportedEncodingException;

    import org.apache.lucene.demo.html.HTMLParser;

    public class HTMLDocParser {

     private String htmlPath;
     private HTMLParser htmlParser;
     
     public HTMLDocParser(String htmlPath){
      this.htmlPath=htmlPath;
      initHtmlParser();
     }
     public void initHtmlParser(){
      InputStream inputStream=null;
      try{
       inputStream=new FileInputStream(htmlPath);
      }catch(FileNotFoundException e){
       e.printStackTrace();
      }
      if(null!=inputStream){
       try{
        htmlParser=new HTMLParser(new InputStreamReader(inputStream,"utf-8"));
       }catch(UnsupportedEncodingException e){
        e.printStackTrace();
       }
      }
     }
     public String getTitle(){
      if(null!=htmlParser){
       try{
        return htmlParser.getTitle();
       }catch(IOException e){
        e.printStackTrace();
       }catch(InterruptedException e){
        e.printStackTrace();
       }
      }
      return "";
     }
     public Reader getContent(){
      if(null!=htmlParser){
       try{
        return htmlParser.getReader();
       }catch(IOException e){
        e.printStackTrace();
       }
      }
      return null;
     }
     public String getPath(){
      return this.htmlPath;
     }
    }


    描述搜索結(jié)果的結(jié)構(gòu)實(shí)體Bean
    package com.rain.search;

    public class SearchResultBean {
        private String htmlPath;
       
        private String htmlTitle;

     public String getHtmlPath() {
      return htmlPath;
     }

     public void setHtmlPath(String htmlPath) {
      this.htmlPath = htmlPath;
     }

     public String getHtmlTitle() {
      return htmlTitle;
     }

     public void setHtmlTitle(String htmlTitle) {
      this.htmlTitle = htmlTitle;
     }
    }


    索引子系統(tǒng)的實(shí)現(xiàn)

    package com.rain.index;

    import Java.io.File;
    import Java.io.IOException;
    import Java.io.Reader;

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.document.Document;
    import org.apache.lucene.index.IndexWriter;
    import org.apache.lucene.store.Directory;
    import org.apache.lucene.store.FSDirectory;
    import org.apache.lucene.document.Field;

    import com.rain.util.HTMLDocParser;

    public class IndexManager {
     
     //the directory that stores HTML files
     private final String dataDir="E:\\dataDir";
     
     //the directory that is used to store a Lucene index
     private final String indexDir="E:\\indexDir";
     
     public boolean creatIndex()throws IOException{
      if(true==inIndexExist()){
       return true;
      }
      File dir=new File(dataDir);
      if(!dir.exists()){
       return false;
      }
      File[] htmls=dir.listFiles();
      Directory fsDirectory=FSDirectory.getDirectory(indexDir,true);
      Analyzer analyzer=new StandardAnalyzer();
      IndexWriter indexWriter=new IndexWriter(fsDirectory,analyzer,true);
      for(int i=0;i<htmls.length;i++){
       String htmlPath=htmls[i].getAbsolutePath();
       if(htmlPath.endsWith(".html")||htmlPath.endsWith("htm")){
        addDocument(htmlPath,indexWriter);
       }
      }
      indexWriter.optimize();
      indexWriter.close();
      return true;
     }
     
     public void addDocument(String htmlPath,IndexWriter indexWriter){
      HTMLDocParser htmlParser=new HTMLDocParser(htmlPath);
      String path=htmlParser.getPath();
      String title=htmlParser.getTitle();
      Reader content=htmlParser.getContent();
      
      Document document=new Document();
      document.add(new Field("path",path,Field.Store.YES,Field.Index.NO));
      document.add(new Field("title",title,Field.Store.YES,Field.Index.TOKENIZED));
         document.add(new Field("content",content));
         try{
          indexWriter.addDocument(document);
         }catch(IOException e){
          e.printStackTrace();
         }
     }
     public String getDataDir(){
      return this.dataDir;
     }
     
     public String getIndexDir(){
      return this.indexDir;
     }
     
     public boolean inIndexExist(){
      File directory=new File(indexDir);
      if(0<directory.listFiles().length){
       return true;
      }else{
       return false;
      }
     }
    }


    搜索功能的實(shí)現(xiàn)
    package com.rain.search;

    import Java.io.IOException;
    import Java.util.ArrayList;
    import Java.util.List;

    import org.apache.lucene.analysis.Analyzer;
    import org.apache.lucene.analysis.standard.StandardAnalyzer;
    import org.apache.lucene.queryParser.ParseException;
    import org.apache.lucene.queryParser.QueryParser;
    import org.apache.lucene.search.Hits;
    import org.apache.lucene.search.IndexSearcher;
    import org.apache.lucene.search.Query;

    import com.rain.index.IndexManager;

    public class SearchManager {
     private String searchWord;
     private IndexManager indexManager;
     private Analyzer analyzer;
     
     public SearchManager(String searchWord){
      this.searchWord=searchWord;
      this.indexManager=new IndexManager();
      this.analyzer=new StandardAnalyzer();
     }
     
     /**
         * do search
         */
     public List search(){
      List searchResult=new ArrayList();
      if(false==indexManager.inIndexExist()){
       try{
        if(false==indexManager.creatIndex()){
         return searchResult;
        }
       }catch(IOException e){
        e.printStackTrace();
        return searchResult;
       }
      }
      IndexSearcher indexSearcher=null;
      try{
       indexSearcher=new IndexSearcher(indexManager.getIndexDir());
      }catch(IOException e){
       e.printStackTrace();
      }
      QueryParser queryParser=new QueryParser("content",analyzer);
      Query query=null;
      try{
       query=queryParser.parse(searchWord);
      }catch(ParseException e){
       e.printStackTrace();
      }
      if(null!=query&&null!=indexSearcher){
       try{
        Hits hits=indexSearcher.search(query);
        for(int i=0;i<hits.length();i++){
         SearchResultBean resultBean=new SearchResultBean();
         resultBean.setHtmlPath(hits.doc(i).get("path"));
         resultBean.setHtmlTitle(hits.doc(i).get("title"));
         searchResult.add(resultBean);
        }
       }catch(IOException e){
        e.printStackTrace();
       }
      }
       return searchResult;
     }

    }


    請求管理器的實(shí)現(xiàn)

    package com.rain.servlet;

    import Java.io.IOException;
    import Java.util.List;

    import javax.servlet.RequestDispatcher;
    import javax.servlet.ServletException;
    import javax.servlet.http.HttpServlet;
    import javax.servlet.http.HttpServletRequest;
    import javax.servlet.http.HttpServletResponse;

    import com.rain.search.SearchManager;

    /**
     * @author zhourui
     * 2007-1-28
     */
    public class SearchController extends HttpServlet {
     private static final long serialVersionUID=1L;
     
     /* (non-Javadoc)
      * @see javax.servlet.http.HttpServlet#doPost(javax.servlet.http.HttpServletRequest, javax.servlet.http.HttpServletResponse)
      */
     @Override
     protected void doPost(HttpServletRequest arg0, HttpServletResponse arg1) throws ServletException, IOException {
      // TODO Auto-generated method stub
      String searchWord=arg0.getParameter("searchWord");
      SearchManager searchManager=new SearchManager(searchWord);
      List searchResult=null;
      searchResult=searchManager.search();
      RequestDispatcher dispatcher=arg0.getRequestDispatcher("search.jsp");
      arg0.setAttribute("searchResult",searchResult);
            dispatcher.forward(arg0, arg1);
     }
     
    }




    向Web服務(wù)器提交搜索請求
    <form action="SearchController" method="post">
          <table>
            <tr>
              <td colspan="3">
                SearchWord:<input type="text" name="searchWord" id="searchWord" size="40">
                <input id="doSearch" type="submit" value="search">
              </td>
            </tr>
          </table>
        </form>
    顯示搜索結(jié)果
     <table class="result">
          <%
            List searchResult=(List)request.getAttribute("searchResult");
            int resultCount=0;
            if(null!=searchResult){
             resultCount=searchResult.size();
            }
            for(int i=0;i<resultCount;i++){
             SearchResultBean resultBean=(SearchResultBean)searchResult.get(i);
             String title=resultBean.getHtmlTitle();
             String path=resultBean.getHtmlPath();
             %>
             <tr>
               <td class="title"><h3><a href="<%=path%>"><%=title%></a></h3></td>
             </tr>
             <%
            }
          %>
        </table>
    posted on 2007-01-29 09:57 周銳 閱讀(845) 評(píng)論(0)  編輯  收藏 所屬分類: Lucene

    只有注冊用戶登錄后才能發(fā)表評(píng)論。


    網(wǎng)站導(dǎo)航:
    博客園   IT新聞   Chat2DB   C++博客   博問  
     
    主站蜘蛛池模板: 国产免费看插插插视频| 亚色九九九全国免费视频| 亚洲国产成人久久一区久久| 亚洲av无码一区二区三区四区 | 亚洲Aⅴ在线无码播放毛片一线天| 99久久99久久精品免费看蜜桃| 亚洲色图古典武侠| 免费阿v网站在线观看g| 亚洲中文字幕乱码一区| 在线播放免费人成视频在线观看| 亚洲国产精品ⅴa在线观看| 日韩免费视频观看| 一区二区免费在线观看| 亚洲深深色噜噜狠狠爱网站| 国内精品免费在线观看| 三级毛片在线免费观看| 亚洲中文字幕无码久久2017| 一区二区免费视频| 四虎必出精品亚洲高清| 啊灬啊灬别停啊灬用力啊免费看| 一区二区三区免费电影| 久久精品国产亚洲| 亚洲第一成年免费网站| 国产精品亚洲色图| 亚洲AV午夜成人影院老师机影院| 91成人免费观看| 久久亚洲精品无码av| 亚洲中文字幕在线乱码| 2019中文字幕在线电影免费| 亚洲色最新高清av网站| 亚洲精品国产高清嫩草影院| 国内精品一级毛片免费看| 中文字幕乱码亚洲无线三区| 亚洲精品无码专区久久同性男| 久操视频在线免费观看| 亚洲欧美精品午睡沙发| 亚洲乱码国产乱码精品精| 成年网站免费视频A在线双飞| 人妻巨大乳hd免费看| 91亚洲视频在线观看| 亚洲国产成人乱码精品女人久久久不卡|