From c0d7d571eb1fea6bd4631183e973a2113f45f665 Mon Sep 17 00:00:00 2001 From: cjl <122357439@qq.com> Date: Thu, 30 Mar 2017 17:24:43 +0800 Subject: [PATCH 1/5] =?UTF-8?q?=E6=B7=BB=E5=8A=A0IKTokenizerFactory?= =?UTF-8?q?=E7=B1=BB=EF=BC=8C=E5=AE=8C=E6=88=90=E9=85=8D=E7=BD=AE=E6=99=BA?= =?UTF-8?q?=E8=83=BD=E5=88=86=E8=AF=8D=E6=A3=80=E7=B4=A2=E5=8A=9F=E8=83=BD?= =?UTF-8?q?=EF=BC=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../analysis/ik/IKTokenizerFactory.java | 38 +++++++++++++++++++ 1 file changed, 38 insertions(+) create mode 100644 src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java diff --git a/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java b/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java new file mode 100644 index 0000000..4633a98 --- /dev/null +++ b/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java @@ -0,0 +1,38 @@ +package org.huidao.lucene.analysis.ik; + +import java.io.Reader; +import java.util.Map; + +import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.util.TokenizerFactory; +import org.apache.lucene.util.AttributeFactory; +import org.wltea.analyzer.lucene.IKTokenizer; +/** + * + * @author cjl + * + */ +public class IKTokenizerFactory extends TokenizerFactory { + private boolean useSmart; + + public boolean getUseSmart() { + return useSmart; + } + + public void setUseSmart(boolean useSmart) { + this.useSmart = useSmart; + } + + protected IKTokenizerFactory(Map args) { + super(args); + assureMatchVersion(); + this.setUseSmart(args.get("useSmart").toString().equals("true")); + } + + @Override + public Tokenizer create(AttributeFactory factory, Reader input) { + Tokenizer ik = new IKTokenizer(input , this.useSmart); + return ik; + } + +} -- Gitee From a528ebf94ff0892fa9e1dc527faa716d5b897388 Mon Sep 17 00:00:00 2001 From: cjl <122357439@qq.com> Date: Thu, 30 Mar 2017 17:28:00 +0800 Subject: [PATCH 2/5] =?UTF-8?q?=E4=BF=AE=E6=94=B9=E7=89=88=E6=9C=AC?= =?UTF-8?q?=E4=B8=BAsmart?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 8323892..6264651 100644 --- a/pom.xml +++ b/pom.xml @@ -3,7 +3,7 @@ 4.0.0 org.huidao IKAnalyzer - 0.0.1-SNAPSHOT + 0.0.1-smart -- Gitee From a804918be7e804984d0e8461659be54c1cdb0073 Mon Sep 17 00:00:00 2001 From: cjl <122357439@qq.com> Date: Thu, 30 Mar 2017 18:03:02 +0800 Subject: [PATCH 3/5] =?UTF-8?q?=E4=BF=AE=E6=94=B9bug?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java b/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java index 4633a98..9515949 100644 --- a/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java +++ b/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java @@ -23,9 +23,8 @@ public class IKTokenizerFactory extends TokenizerFactory { this.useSmart = useSmart; } - protected IKTokenizerFactory(Map args) { + public IKTokenizerFactory(Map args) { super(args); - assureMatchVersion(); this.setUseSmart(args.get("useSmart").toString().equals("true")); } -- Gitee From 089892d26457193096680fdd809a5313b484c526 Mon Sep 17 00:00:00 2001 From: cjl <122357439@qq.com> Date: Sat, 1 Apr 2017 10:34:02 +0800 Subject: [PATCH 4/5] =?UTF-8?q?=E4=BF=AE=E6=94=B9IKTokenizerFactory?= =?UTF-8?q?=E7=B1=BB=EF=BC=8C=E6=94=AF=E6=8C=81=E9=BB=98=E8=AE=A4=E4=B8=8D?= =?UTF-8?q?=E9=85=8D=E7=BD=AE=E5=8F=82=E6=95=B0=EF=BC=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: cjl <122357439@qq.com> --- pom.xml | 24 ++++++++++++++- .../analysis/ik/IKTokenizerFactory.java | 29 +++++++++---------- .../analysis/ik/IKTokenizerFactoryTest.java | 18 ++++++++++++ 3 files changed, 54 insertions(+), 17 deletions(-) create mode 100644 src/test/java/org/huidao/lucene/analysis/ik/IKTokenizerFactoryTest.java diff --git a/pom.xml b/pom.xml index 6264651..7a92f90 100644 --- a/pom.xml +++ b/pom.xml @@ -5,6 +5,11 @@ IKAnalyzer 0.0.1-smart + + 4.12 + 1.2.17 + 1.6.4 + @@ -24,7 +29,24 @@ lucene-analyzers-common 4.10.3 - + + + org.slf4j + slf4j-log4j12 + ${slf4j.version} + + + log4j + log4j + ${log4j.version} + + + + junit + junit + ${junit.version} + test + diff --git a/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java b/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java index 9515949..852b7bb 100644 --- a/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java +++ b/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java @@ -4,34 +4,31 @@ import java.io.Reader; import java.util.Map; import org.apache.lucene.analysis.Tokenizer; +import org.apache.lucene.analysis.ngram.EdgeNGramTokenFilter; +import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; import org.wltea.analyzer.lucene.IKTokenizer; /** - * - * @author cjl - * + * 汇道科技 + * @author 陈吉林 + * @date 创建于:2017年4月1日 上午10:28:10 */ public class IKTokenizerFactory extends TokenizerFactory { - private boolean useSmart; - - public boolean getUseSmart() { - return useSmart; - } - - public void setUseSmart(boolean useSmart) { - this.useSmart = useSmart; - } + private final boolean useSmart; public IKTokenizerFactory(Map args) { - super(args); - this.setUseSmart(args.get("useSmart").toString().equals("true")); + super(args); + useSmart = getBoolean(args,"useSmart",false); + if (!args.isEmpty()) { + throw new IllegalArgumentException("Unknown parameters: " + args); + } } @Override public Tokenizer create(AttributeFactory factory, Reader input) { - Tokenizer ik = new IKTokenizer(input , this.useSmart); - return ik; + Tokenizer ik = new IKTokenizer(input, this.useSmart); + return ik; } } diff --git a/src/test/java/org/huidao/lucene/analysis/ik/IKTokenizerFactoryTest.java b/src/test/java/org/huidao/lucene/analysis/ik/IKTokenizerFactoryTest.java new file mode 100644 index 0000000..185b103 --- /dev/null +++ b/src/test/java/org/huidao/lucene/analysis/ik/IKTokenizerFactoryTest.java @@ -0,0 +1,18 @@ +package org.huidao.lucene.analysis.ik; + +import static org.junit.Assert.*; + +import org.junit.Test; +/** + * 汇道科技 + * @author 陈吉林 + * @date 创建于:2017年4月1日 上午10:31:41 + */ +public class IKTokenizerFactoryTest { + + @Test + public void test() { + IKTokenizerFactory ikf = new IKTokenizerFactory(null); + } + +} -- Gitee From d2e8357243bdabef4252cbfffece66c7639fcdfe Mon Sep 17 00:00:00 2001 From: cjl <122357439@qq.com> Date: Sat, 1 Apr 2017 10:38:25 +0800 Subject: [PATCH 5/5] ..... --- .../lucene/analysis/ik/IKTokenizerFactory.java | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java b/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java index 852b7bb..50d01ae 100644 --- a/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java +++ b/src/main/java/org/huidao/lucene/analysis/ik/IKTokenizerFactory.java @@ -9,17 +9,31 @@ import org.apache.lucene.analysis.ngram.EdgeNGramTokenizer; import org.apache.lucene.analysis.util.TokenizerFactory; import org.apache.lucene.util.AttributeFactory; import org.wltea.analyzer.lucene.IKTokenizer; + /** - * 汇道科技 + * 汇道科技 + * * @author 陈吉林 * @date 创建于:2017年4月1日 上午10:28:10 */ public class IKTokenizerFactory extends TokenizerFactory { private final boolean useSmart; + /** + * 支持solr的schema.xml文件中配置智能分词功能!! + * + * + * + * + * @param args + */ public IKTokenizerFactory(Map args) { super(args); - useSmart = getBoolean(args,"useSmart",false); + useSmart = getBoolean(args, "useSmart", false); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } -- Gitee