diff --git a/.idea/compiler.xml b/.idea/compiler.xml
index d280c68..6aa88ff 100644
--- a/.idea/compiler.xml
+++ b/.idea/compiler.xml
@@ -6,8 +6,8 @@
-
+
diff --git a/src/main/java/org/wlld/naturalLanguage/Talk.java b/src/main/java/org/wlld/naturalLanguage/Talk.java
index ff2dd4a..7bbfbf2 100644
--- a/src/main/java/org/wlld/naturalLanguage/Talk.java
+++ b/src/main/java/org/wlld/naturalLanguage/Talk.java
@@ -34,11 +34,21 @@ public class Talk {
//拆词
List sentences = new ArrayList<>();
for (int i = 0; i < sens.length; i++) {
- Sentence sentenceWords = new Sentence();
- catchSentence(sentence, sentenceWords);
- sentences.add(sentenceWords);
+ List sentenceList = catchSentence(sentence);
+ int key = 0;
+ int nub = 0;
+ for (int j = 0; j < sentenceList.size(); j++) {
+ Sentence sentence1 = sentenceList.get(j);
+ restructure(sentence1);
+ int size = sentence1.getKeyWords().size();
+ if (size > nub) {
+ key = j;
+ nub = size;
+ }
+ }
+ sentences.add(sentenceList.get(key));
}
- restructure(sentences);
+
//进行识别
if (randomForest != null) {
for (Sentence sentence1 : sentences) {
@@ -91,34 +101,36 @@ public class Talk {
return nub;
}
- private void catchSentence(String sentence, Sentence sentenceWords) {//把句子拆开
+ private List catchSentence(String sentence) {//把句子拆开
int len = sentence.length();
- for (int i = 0; i < len; i++) {
- String word = sentence.substring(0, i + 1);
- sentenceWords.setWord(word);
+ List sentences = new ArrayList<>();
+ for (int j = 0; j < len - 2; j++) {
+ Sentence sentenceWords = new Sentence();
+ for (int i = j; i < len; i++) {
+ String word = sentence.substring(j, i + 1);
+ sentenceWords.setWord(word);
+ }
+ sentences.add(sentenceWords);
}
-
+ return sentences;
}
- private void restructure(List sentences) {//对句子里面的Word进行词频统计
- for (Sentence words : sentences) {
- List listWord = allWorld;
- List waitWorld = words.getWaitWords();
- for (Word word : waitWorld) {
- String myWord = word.getWord();
- WorldBody body = getBody(myWord, listWord);
- if (body == null) {//已经无法查找到对应的词汇了
- word.setWordFrequency(1);
- break;
- }
- listWord = body.getWorldBodies();//这个body报了一次空指针
- word.setWordFrequency(body.getWordFrequency());
+ private void restructure(Sentence words) {//对句子里面的Word进行词频统计
+ List listWord = allWorld;
+ List waitWorld = words.getWaitWords();
+ for (Word word : waitWorld) {
+ String myWord = word.getWord();
+ WorldBody body = getBody(myWord, listWord);
+ if (body == null) {//已经无法查找到对应的词汇了
+ word.setWordFrequency(1);
+ break;
}
+ listWord = body.getWorldBodies();//这个body报了一次空指针
+ word.setWordFrequency(body.getWordFrequency());
}
Tokenizer tokenizer = new Tokenizer();
- for (Sentence words : sentences) {
- tokenizer.radiation(words);
- }
+ tokenizer.radiation(words);
+
}
private WorldBody getBody(String word, List worlds) {
diff --git a/src/test/java/org/wlld/LangTest.java b/src/test/java/org/wlld/LangTest.java
index 9e68b2b..c756ee6 100644
--- a/src/test/java/org/wlld/LangTest.java
+++ b/src/test/java/org/wlld/LangTest.java
@@ -20,7 +20,7 @@ public class LangTest {
TemplateReader templateReader = new TemplateReader();
templateReader.read("/Users/lidapeng/Desktop/myDocment/a1.txt", "UTF-8", IOConst.NOT_WIN);
Talk talk = new Talk();
- List list = talk.talk("我草尼玛");
+ List list = talk.talk("被锁外面了");
System.out.println(list);
}
}