diff --git a/201621044079/.settings/org.eclipse.core.resources.prefs b/201621044079/.settings/org.eclipse.core.resources.prefs new file mode 100644 index 0000000000000000000000000000000000000000..4824b8026319a8fb303971008a7d59a816d58bd6 --- /dev/null +++ b/201621044079/.settings/org.eclipse.core.resources.prefs @@ -0,0 +1,2 @@ +eclipse.preferences.version=1 +encoding/=UTF-8 diff --git a/201621044079/.settings/org.eclipse.jdt.core.prefs b/201621044079/.settings/org.eclipse.jdt.core.prefs new file mode 100644 index 0000000000000000000000000000000000000000..838bd9d69424290f7e947b867a3b4381e756dad8 --- /dev/null +++ b/201621044079/.settings/org.eclipse.jdt.core.prefs @@ -0,0 +1,11 @@ +eclipse.preferences.version=1 +org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled +org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 +org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve +org.eclipse.jdt.core.compiler.compliance=1.7 +org.eclipse.jdt.core.compiler.debug.lineNumber=generate +org.eclipse.jdt.core.compiler.debug.localVariable=generate +org.eclipse.jdt.core.compiler.debug.sourceFile=generate +org.eclipse.jdt.core.compiler.problem.assertIdentifier=error +org.eclipse.jdt.core.compiler.problem.enumIdentifier=error +org.eclipse.jdt.core.compiler.source=1.7 diff --git a/201621044079/1.txt b/201621044079/1.txt new file mode 100644 index 0000000000000000000000000000000000000000..d3b0ae722dd60714014bbf7eaaa4e99e942fb7a4 --- /dev/null +++ b/201621044079/1.txt @@ -0,0 +1,18 @@ +adasd asdd as d as da s +asdasd das fsdsd +safsdf +asdas asd d asd as asdas + +asd + + +gfh + + +wwwww sadasdas sadas123 + +wwww,.w +WWWWW +qqqqqqqq +uuuuuuu +aaaaaaa \ No newline at end of file diff --git a/201621044079/bin/junitTest/WordDealTest.class b/201621044079/bin/junitTest/WordDealTest.class new file mode 100644 index 0000000000000000000000000000000000000000..5a6971dedc639156d209bdd86691634ccd2d9cbd Binary files /dev/null and b/201621044079/bin/junitTest/WordDealTest.class differ diff --git a/201621044079/bin/wCount/FileDeal.class b/201621044079/bin/wCount/FileDeal.class new file mode 100644 index 0000000000000000000000000000000000000000..6e665a7092a83513102ffca738c62b939f8e26a0 Binary files /dev/null and b/201621044079/bin/wCount/FileDeal.class differ diff --git a/201621044079/bin/wCount/Main.class b/201621044079/bin/wCount/Main.class new file mode 100644 index 0000000000000000000000000000000000000000..a03d4cce0a4d6c2856afa54cc241c4f958bdd3a8 Binary files /dev/null and b/201621044079/bin/wCount/Main.class differ diff --git a/201621044079/bin/wCount/WordDeal$1.class b/201621044079/bin/wCount/WordDeal$1.class new file mode 100644 index 0000000000000000000000000000000000000000..94d0cebe682e0f742ca5a6674c1f4662d3234681 Binary files /dev/null and b/201621044079/bin/wCount/WordDeal$1.class differ diff --git a/201621044079/bin/wCount/WordDeal.class b/201621044079/bin/wCount/WordDeal.class new file mode 100644 index 0000000000000000000000000000000000000000..a4cbd6e0bf5a6db79c64ddf99709327b05ffd090 Binary files /dev/null and b/201621044079/bin/wCount/WordDeal.class differ diff --git a/201621044079/result.txt b/201621044079/result.txt new file mode 100644 index 0000000000000000000000000000000000000000..20c9493e74325a0a282a866a9a1222407e58dcc8 --- /dev/null +++ b/201621044079/result.txt @@ -0,0 +1,13 @@ +155 +15 +1 +:2 +:2 +:1 +:1 +:1 +:1 +:1 +:1 +:1 +:1 diff --git a/201621044079/src/junitTest/WordDealTest.java b/201621044079/src/junitTest/WordDealTest.java new file mode 100644 index 0000000000000000000000000000000000000000..8d65f2348f91ef27db280e12212294955f4bc05f --- /dev/null +++ b/201621044079/src/junitTest/WordDealTest.java @@ -0,0 +1,74 @@ +package junitTest; + +import static org.junit.Assert.*; + +import java.io.IOException; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import org.junit.Before; +import org.junit.Test; + +import wCount.FileDeal; +import wCount.WordDeal; + +public class WordDealTest { + + @Before + public void setUp() throws Exception { + + } + + @Test + public void testGetCharCount() throws IOException {//统计字符数量测试 + FileDeal fd = new FileDeal(); + String text1 = fd.FileToString("text/text1.txt"); + + WordDeal wd1 = new WordDeal(text1); + + int cn1 = wd1.getCharCount(); + + } + + @Test + public void testGetWordCount() throws IOException {//统计单词数量测试 + FileDeal fd = new FileDeal(); + String text1 = fd.FileToString("text/text1.txt"); + WordDeal wd1 = new WordDeal(text1); + int wn1 = wd1.getWordCount(); + + } + + @Test + public void testGetWordFreq() throws IOException {//统计词频测试 + + FileDeal fd = new FileDeal(); + String text1 = fd.FileToString("text/text1.txt"); + WordDeal wd1 = new WordDeal(text1); + List wf1 = wd1.getWordFreq(); + + } + + + @Test + public void testGetLineCount() throws IOException {//统计有效行数测试 + FileDeal fd = new FileDeal(); + String text1 = fd.FileToString("text/text1.txt"); + WordDeal wd1 = new WordDeal(text1); + int wn1 = wd1.getLineCount(); + + } + + @Test + public void testListToArray() throws IOException { + + + FileDeal fd = new FileDeal(); + String text1 = fd.FileToString("text/text1.txt"); + WordDeal wd1 = new WordDeal(text1); + List wf1 = wd1.getWordFreq(); + String[] s1 = wd1.ListToArray(wf1); + } + +} diff --git a/201621044079/src/wCount/FileDeal.java b/201621044079/src/wCount/FileDeal.java new file mode 100644 index 0000000000000000000000000000000000000000..130c1a4720e8210dd00a9312140a18bc9af74bb4 --- /dev/null +++ b/201621044079/src/wCount/FileDeal.java @@ -0,0 +1,33 @@ +package wCount; + +import java.io.*; + +public class FileDeal { //文件处理类 + + public String FileToString(String path) throws IOException { //文件读取 + File file = new File(path); + if (!file.exists() || file.isDirectory()) { + System.out.println("请输入正确文件名!"); + throw new FileNotFoundException(); + } + FileInputStream fis = new FileInputStream(file); + byte[] buf = new byte[1024]; + StringBuffer sb = new StringBuffer(); + while ((fis.read(buf)) != -1) { + sb.append(new String(buf)); + buf = new byte[1024]; + } + return sb.toString(); + } + + public void WriteToFile(String str) throws IOException { // 文件写入 + File writename = new File("result.txt"); + writename.createNewFile(); // 创建新文件 + BufferedWriter out = new BufferedWriter(new FileWriter(writename)); + out.write(str); + out.flush(); + out.close(); + + } + +} diff --git a/201621044079/src/wCount/Main.java b/201621044079/src/wCount/Main.java new file mode 100644 index 0000000000000000000000000000000000000000..a56f65169e7fd7bcf72ce2435fc5bfb703d3a57b --- /dev/null +++ b/201621044079/src/wCount/Main.java @@ -0,0 +1,35 @@ +package wCount; + +import wCount.FileDeal; +import wCount.WordDeal; + +import java.io.IOException; +import java.util.*; + +public class Main { + + public static void main(String[] args) throws IOException { + Scanner sc = new Scanner(System.in); + String file = sc.next(); + FileDeal fd = new FileDeal(); + String[] wFreq; + List wordFreq; + + String text = fd.FileToString(file); + WordDeal wd = new WordDeal(text); + // 调用类中的方法获取相应的数值 + int charNum = wd.getCharCount(); + int wordCount = wd.getWordCount(); + int ValidLine = wd.getLineCount(); + wordFreq = wd.getWordFreq(); + wFreq = wd.ListToArray(wordFreq); + String w = charNum + "\r\n" + wordCount + "\r\n" + ValidLine + "\r\n"; + for (int i = 0; i < wFreq.length; i++) { + w = w + wFreq[i] + "\r\n"; + } + System.out.println(w); + fd.WriteToFile(w); + + } + +} diff --git a/201621044079/src/wCount/WordDeal.java b/201621044079/src/wCount/WordDeal.java new file mode 100644 index 0000000000000000000000000000000000000000..3acb40f9a66c1d4b582c3f73a6ab06de65055816 --- /dev/null +++ b/201621044079/src/wCount/WordDeal.java @@ -0,0 +1,160 @@ +package wCount; + +import java.util.*; +import java.util.Map.Entry; + +public class WordDeal // 该类用于进行文件中的单词等处理 +{ + + String text; // 文件中内容 + private int charNum; // 字符个数 + private int wordCount; // 单词总数 + private int ValidLine; // 有效行数 + private Map wordFreq; // 单词词频 + + public WordDeal(String text) { + this.text = text; + } + + public int getCharCount() // 统计文件字符数 + { + char c; + int i = 0; + while (i < text.length()) { + c = text.charAt(i); + if (c >= 32 && c <= 126 || c == '\r' || c == '\n' || c == '\t') { + charNum++; + } + i++; + } + return charNum; + } + + public int getWordCount() // 统计单词总数 + { + String t = text; + String[] spWord = t.split("\\s"); // 分词 + for (int i = 0; i < spWord.length; i++) { + if (spWord[i].length() < 4) { // 判断长度是否大于等于4 + continue; + } else { + int flag = 1; // 判断字符串的前四位是否是英文字母 + char c; + for (int j = 0; j < 4; j++) { + c = spWord[i].charAt(j); + if (!(c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z')) { + flag = 0; + } + } + if (flag == 1) { + wordCount++; + } + } + } + return wordCount; + } + + /* + * public Map getWordFreq() // + * 统计单词词频(单词:以4个英文字母开头,跟上字母数字符号,单词以分隔符分割,不区分大小写。) { wordFreq = new + * HashMap(); String t = text; + * + * String[] spWord = t.split("\\s"); // 对字符串进行分词操作 for (int i = 0; i < + * spWord.length; i++) { if (spWord[i].length() < 4) { // 判断长度是否大于等于4 + * continue; } else { + * + * int flag = 1; // 判断字符串的前四位是否是英文字母 char c; + * + * for (int j = 0; j < 4; j++) { c = spWord[i].charAt(j); + * + * if (!(c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z')) { flag = 0; } } if + * (flag == 1) { // 将字符串转化为小写 spWord[i] = spWord[i].trim().toLowerCase(); if + * (wordFreq.get(spWord[i]) == null) { // 判断之前Map中是否出现过该字符串 + * wordFreq.put(spWord[i], 1); } else wordFreq.put(spWord[i], + * wordFreq.get(spWord[i]) + 1); + * + * } } } return wordFreq; } + */ + + public List getWordFreq() { // 对单词词频的Map进行排序 + + wordFreq = new HashMap(); + String t = text; + + String[] spWord = t.split("\\s"); // 分词 + for (int i = 0; i < spWord.length; i++) { + if (spWord[i].length() < 4) { + continue; + } else { + + int flag = 1; + char c; + + for (int j = 0; j < 4; j++) { + c = spWord[i].charAt(j); + + if (!(c >= 'A' && c <= 'Z' || c >= 'a' && c <= 'z')) { + flag = 0; + } + } + if (flag == 1) { + spWord[i] = spWord[i].trim().toLowerCase(); + if (wordFreq.get(spWord[i]) == null) { + wordFreq.put(spWord[i], 1); + } else + wordFreq.put(spWord[i], wordFreq.get(spWord[i]) + 1); + + } + } + } + + List> list = new ArrayList>(wordFreq.entrySet()); + Collections.sort(list, new Comparator>() { + + @Override + public int compare(Entry o1, Entry o2) { // 对Map中内容进行排序,先按词频后按字典顺序 + if (o1.getValue() == o2.getValue()) { + return o1.getKey().compareTo(o2.getKey()); + } + return o2.getValue() - o1.getValue(); + } + + }); + return list; + } + + public int getLineCount() { // 统计有效行数 + + String[] line = text.split("\r\n"); // 将每一行分开放入一个字符串数组 + for (int i = 0; i < line.length; i++) { // 找出无效行,统计有效行 + + if (line[i].trim().length() == 0) + continue; + ValidLine = ValidLine + 1; + } + return ValidLine; + } + + public String[] ListToArray(List> list) { // 将排完序的List元素筛选出前十个并存入数组 + String[] arr; + int i = 0; + int len = list.size(); + if (len <= 10) { + arr = new String[len]; + for (Map.Entry m : list) { + arr[i] = "<" + m.getKey() + ">:" + m.getValue(); + i++; + } + } else { + arr = new String[10]; + for (Map.Entry m : list) { + if (i == 10) + break; + arr[i] = "<" + m.getKey() + ">:" + m.getValue(); + i++; + } + } + return arr; + } + +} diff --git a/201621044079/text/text1.txt b/201621044079/text/text1.txt new file mode 100644 index 0000000000000000000000000000000000000000..1770ce9a4a90ab95e57329275cf944e957aa85fb --- /dev/null +++ b/201621044079/text/text1.txt @@ -0,0 +1,12 @@ + + +HANye + 223HUO hanye +HANYE haNyE +j721JBDHW QWEWER +SAD + +FDFDS + + +WE ADASDA adasda E23U