diff --git a/src/main/java/org/wlld/naturalLanguage/Talk.java b/src/main/java/org/wlld/naturalLanguage/Talk.java index cff1ccd..b01ec8d 100644 --- a/src/main/java/org/wlld/naturalLanguage/Talk.java +++ b/src/main/java/org/wlld/naturalLanguage/Talk.java @@ -44,10 +44,9 @@ public class Talk { List keyWords = sentence1.getKeyWords(); for (int i = 0; i < 8; i++) { int nub = 0; - List words = wordTimes.get(i); - String word = keyWords.get(i); - if (word != null) { - nub = getNub(words, word); + if (keyWords.size() > i) { + List words = wordTimes.get(i); + nub = getNub(words, keyWords.get(i)); } features.add(nub); } diff --git a/src/main/java/org/wlld/naturalLanguage/Tokenizer.java b/src/main/java/org/wlld/naturalLanguage/Tokenizer.java index 3978176..7aebc42 100644 --- a/src/main/java/org/wlld/naturalLanguage/Tokenizer.java +++ b/src/main/java/org/wlld/naturalLanguage/Tokenizer.java @@ -42,12 +42,13 @@ public class Tokenizer extends Frequency { } private void number() {//分词编号 + System.out.println("开始编码:" + sentences.size()); for (Sentence sentence : sentences) { List features = sentence.getFeatures(); List sentenceList = sentence.getKeyWords(); int size = sentenceList.size();//时间序列的深度 for (int i = 0; i < size; i++) { - if (!wordTimes.contains(i)) { + if (wordTimes.size() < i + 1) { wordTimes.add(new ArrayList<>()); } List list = wordTimes.get(i); @@ -77,7 +78,7 @@ public class Tokenizer extends Frequency { langBody.setKey(sentence.getKey()); for (int i = 0; i < 8; i++) { int nub = 0; - if (features.contains(i)) { + if (features.size() > i) { nub = features.get(i); } int t = i + 1; diff --git a/src/main/java/org/wlld/naturalLanguage/WordConst.java b/src/main/java/org/wlld/naturalLanguage/WordConst.java index 27e0a15..0e2c1ac 100644 --- a/src/main/java/org/wlld/naturalLanguage/WordConst.java +++ b/src/main/java/org/wlld/naturalLanguage/WordConst.java @@ -2,4 +2,19 @@ package org.wlld.naturalLanguage; public class WordConst { public static double Word_Noise = 0.7;//收缩程度 + public static final int SHOP = 1;//购买类型 + public static final int FOOD = 3;//食物类型 + public static final int DRINK = 4;//饮品类型 + public static final int OTHER = 5;//家庭日用(油盐酱醋卫生纸之类的) + public static final int SMOKE = 10;//烟草 + public static final int ADD = 6;//订单增0.5037412492 + public static final int DEL = 7;//订单删 + public static final int UPDATE = 8;//订单改 + public static final int SELECT = 9;//订单查 + public static final int TALK = 2;//聊天类型 + public static final int ALL = 11;//全文本 + public static final int CHANGE = 12;//分类文本 + public static final int DROP = 13;//消文本 + public static final int CURD = 14;//对订单增删改查类型 + public static final int ANS = 0;//聊天回复 } diff --git a/src/main/java/org/wlld/randomForest/RandomForest.java b/src/main/java/org/wlld/randomForest/RandomForest.java index 7e4f97d..ef93efe 100644 --- a/src/main/java/org/wlld/randomForest/RandomForest.java +++ b/src/main/java/org/wlld/randomForest/RandomForest.java @@ -81,6 +81,7 @@ public class RandomForest { public void study() throws Exception {//学习 for (int i = 0; i < forest.length; i++) { + System.out.println("开始学习==" + i + ",treeNub==" + forest.length); Tree tree = forest[i]; tree.study(); } diff --git a/src/test/java/org/wlld/LangTest.java b/src/test/java/org/wlld/LangTest.java index 47b776b..98f7d77 100644 --- a/src/test/java/org/wlld/LangTest.java +++ b/src/test/java/org/wlld/LangTest.java @@ -16,8 +16,11 @@ public class LangTest { public static void test() throws Exception { TemplateReader templateReader = new TemplateReader(); - templateReader.read("/Users/lidapeng/Desktop/myDocment/a.txt", "UTF-8", IOConst.NOT_WIN); + templateReader.read("/Users/lidapeng/Desktop/myDocment/a2.txt", "UTF-8", IOConst.NOT_WIN); Talk talk = new Talk(); talk.talk("我要吃面包"); + talk.talk("我渴了"); + talk.talk("我要去看望你"); + talk.talk("我买两盒烟"); } }