当前位置: 移动技术网 > IT编程>开发语言>Java > Lucene的简单用法

Lucene的简单用法

2018年10月25日  | 移动技术网IT编程  | 我要评论

1.创建索引

  

package com.dingyu.test;

import java.io.file;
import java.io.fileinputstream;
import java.io.filenotfoundexception;
import java.io.ioexception;
import java.io.unsupportedencodingexception;
import java.nio.file.paths;

import org.apache.lucene.analysis.analyzer;
import org.apache.lucene.analysis.standard.standardanalyzer;
import org.apache.lucene.document.document;
import org.apache.lucene.document.field;
import org.apache.lucene.document.field.store;
import org.apache.lucene.document.storedfield;
import org.apache.lucene.index.indexwriter;
import org.apache.lucene.index.indexwriterconfig;
import org.apache.lucene.store.directory;
import org.apache.lucene.store.fsdirectory;
import org.junit.test;

/**
 * 我们的目标是把索引和文档存入索引库中, 所以首先我们需要创建一个索引库 然后创建一个indexwrite对象把索引,和文档对象写入,
 * 文档对象中需要自己设置域,索引是通过分词器对域进行分词产生的, 所以我们需要分词器
 * 
 * @author 丁宇
 *
 */
public class lucenetest {
    /**
     * 创建索引
     * @throws ioexception
     */
    @test
    public void createindex() throws ioexception {
        // 标准分词器
        analyzer analyzer = new standardanalyzer();
        // 创建一个索引
        directory directory = fsdirectory.open(paths.get("d:\\luceneindex"));
        // 创建一个indexwriteconfig对象
        indexwriterconfig config = new indexwriterconfig(analyzer);
        // 创建一个indexwrite对象
        indexwriter write = new indexwriter(directory, config);
        // 获得所有文件下的文件
        file[] files = new file("d:\\lucenetest").listfiles();
        for (file file : files) {
            // 创建一个文档对象
            document document = new document();
            // 增加一个filepath域,不分析 不索引 但会存储在索引库里 把文件路径放到域中
            field field1 = new storedfield("filepath", file.getpath());
            // 增加一个filename域,会分词,会索引,
            field field2 = new org.apache.lucene.document.textfield("filename", file.getname(), store.yes);
            // 增加一个filecontent域,会分词,会索引,只放文件内容的索引
            field field3 = new org.apache.lucene.document.textfield("filecontent", filecontent(file), store.no);
            // 增加一个filesize域,不分析 不索引 但会存储在索引库里 把文件路径放到域中
            field field4 = new storedfield("filesize", file.length());
            document.add(field1);
            document.add(field2);
            document.add(field3);
            document.add(field4);
            write.adddocument(document);
        }
        write.close();
    }
    /**
     * 获得文件内容
     * @param file
     * @return
     */
    public string filecontent(file file)  {
        byte[] filecontent = new byte[(int) file.length()];
        fileinputstream in = null;
        try {
            in = new fileinputstream(file);
        } catch (filenotfoundexception e2) {
            e2.printstacktrace();
        }
        try {
            in.read(filecontent);
        } catch (ioexception e1) {
            e1.printstacktrace();
        }
        try {
            in.close();
        } catch (ioexception e) {
            e.printstacktrace();
        }
        try {
            return new string(filecontent, "utf-8");
        } catch (unsupportedencodingexception e) {
            e.printstacktrace();
        }
        return null;
    }
}

2.查询索引

package com.dingyu.test;

import java.io.ioexception;
import java.nio.file.path;
import java.nio.file.paths;

import org.apache.lucene.document.document;
import org.apache.lucene.index.directoryreader;
import org.apache.lucene.index.indexreader;
import org.apache.lucene.index.term;
import org.apache.lucene.search.booleanclause.occur;
import org.apache.lucene.search.booleanquery;
import org.apache.lucene.search.indexsearcher;
import org.apache.lucene.search.numericrangequery;
import org.apache.lucene.search.query;
import org.apache.lucene.search.scoredoc;
import org.apache.lucene.search.termquery;
import org.apache.lucene.search.topdocs;
import org.apache.lucene.store.directory;
import org.apache.lucene.store.fsdirectory;
import org.junit.test;

/**
 * 查询索引
 * 
 * @author 丁宇
 *
 */
public class lucenetest1 {
    // 获得indexsearcher对象
    private indexsearcher getindexsearcher() throws ioexception {
        // 指定索引库
        directory directory = fsdirectory.open(paths.get("d:\\luceneindex"));
        // 打开索引库
        indexreader reader = directoryreader.open(directory);
        // 创建查询的对象
        indexsearcher searcher = new indexsearcher(reader);
        return searcher;
    }

    // 输出查到的内容
    private void printindex(topdocs docs,indexsearcher searcher) throws ioexception {
        // 获得顶部匹配记录
        scoredoc[] scoredocs = docs.scoredocs;
        // 获得在索引库中存着的文档的id,利用id去寻找文档
        for (scoredoc scoredoc : scoredocs) {
            // 获得id
            int doc = scoredoc.doc;
            // 获得文档
            document document = searcher.doc(doc);
            // 获得这个文档的域
            system.out.println(document.get("filename"));
            system.out.println(document.get("filecontent"));
            system.out.println(document.get("filepath"));
            system.out.println(document.get("filesize"));
            system.out.println("------------------------");
        }
    }
    /**
     * 精准查询
     * @throws ioexception
     */
    @test
    public void termqueryindex() throws ioexception {

        indexsearcher searcher = getindexsearcher();
        // 选择合适的查询方法,这里用最简单的,具体的看下图
        query query = new termquery(new term("filename", "txt"));
        // 执行查询
        topdocs docs = searcher.search(query, 2);
        //输出查询内容
        printindex(docs, searcher);
        // 关闭索引库
        searcher.getindexreader().close();
    }
    /**
     * 范围查询 五个参数 第一个域名,第二个第三个表示范围,第四个第五个表示是否包含最小值和最大值。
     * @throws ioexception
     */
    @test
    public void numrangequeryindex() throws ioexception {
        indexsearcher searcher = getindexsearcher();
        // 选择合适的查询方法,这里用最简单的,具体的看下图
        query query = numericrangequery.newlongrange("filesize", 0l, 1000l, true, true);
        // 执行查询
        topdocs docs = searcher.search(query, 2);
        //输出查询内容
        printindex(docs, searcher);
        // 关闭索引库
        searcher.getindexreader().close();
    }
    /**
     * 组合查询
     * @throws ioexception 
     */
    @test
    public void booleanqueryindex() throws ioexception {
        indexsearcher searcher = getindexsearcher();
        booleanquery booleanquery = new booleanquery();
        query query = new termquery(new term("filename","txt"));
        query query2 = numericrangequery.newlongrange("filesize", 0l, 1000l, true, true);
        //表示query是必须的 query2也是必须 相当于并集
        booleanquery.add(query,occur.must);
        booleanquery.add(query2, occur.must);
        // 执行查询
        topdocs docs = searcher.search(query, 2);
        //输出查询内容
        printindex(docs, searcher);
        // 关闭索引库
        searcher.getindexreader().close();
    }
}

 

 

 

3.删除索引

 

package com.dingyu.test;

import java.io.ioexception;
import java.nio.file.paths;

import org.apache.lucene.analysis.analyzer;
import org.apache.lucene.analysis.standard.standardanalyzer;
import org.apache.lucene.index.indexwriter;
import org.apache.lucene.index.indexwriterconfig;
import org.apache.lucene.index.term;
import org.apache.lucene.search.query;
import org.apache.lucene.search.termquery;
import org.apache.lucene.store.directory;
import org.apache.lucene.store.fsdirectory;
import org.junit.test;

/**
 * 删除索引 一般增删改都是同一个操作对象 这里使用indexwriter对象
 * 
 * @author 丁宇
 *
 */
public class lucenetest3 {
    /**
     * 获得indexwrite对象
     * @return
     * @throws ioexception
     */
    public indexwriter getindexwrite() throws ioexception {
        analyzer analyzer = new standardanalyzer();
        directory directory = fsdirectory.open(paths.get("d:\\luceneindex"));
        indexwriterconfig config = new indexwriterconfig(analyzer);
        return new indexwriter(directory, config);
    }

    /**
     * 删除所有的索引
     * 
     * @throws ioexception
     */
    @test
    public void deleteallindex() throws ioexception {
        indexwriter indexwrite = getindexwrite();
        indexwrite.deleteall();
        indexwrite.close();
    }
    /**
     * 根据条件删除索引,同时删除文档
     * @throws ioexception
     */
    @test
    public void deletesomeindex() throws ioexception {
        indexwriter indexwrite = getindexwrite();
        query query = new termquery(new term("filename","txt"));
        indexwrite.deletedocuments(query);
        indexwrite.close();        
    }
}

 4.修改索引

package com.dingyu.test;

import java.io.ioexception;
import java.nio.file.paths;

import org.apache.lucene.analysis.analyzer;
import org.apache.lucene.analysis.standard.standardanalyzer;
import org.apache.lucene.document.document;
import org.apache.lucene.document.field.store;
import org.apache.lucene.document.stringfield;
import org.apache.lucene.index.indexwriter;
import org.apache.lucene.index.indexwriterconfig;
import org.apache.lucene.index.indexablefield;
import org.apache.lucene.index.term;
import org.apache.lucene.store.directory;
import org.apache.lucene.store.fsdirectory;
import org.junit.test;

/**
 * 索引的修改
 * @author 丁宇
 *
 */
public class lucenetest2 {
    
    private indexwriter getindexwriter() throws ioexception {
        analyzer analyzer = new standardanalyzer();
        directory directory = fsdirectory.open(paths.get("d:\\luceneindex"));
        indexwriterconfig config = new indexwriterconfig(analyzer);
        return new indexwriter(directory, config);
    }
    
    @test
    public void updateindex() throws ioexception {
        indexwriter indexwriter = getindexwriter();
        document document = new document();
        document.add(new stringfield("filename", "think in java", store.yes));
        //update 就是删除一个你指定的 创建一个你想要的 。
        indexwriter.updatedocument(new term("filecontent","txt"), document);
        indexwriter.close();
    }
}

 

如对本文有疑问, 点击进行留言回复!!

相关文章:

验证码:
移动技术网