Skip to content

Commit 92d5ff0

Browse files
committed
upload
1 parent 6719f83 commit 92d5ff0

4 files changed

Lines changed: 124 additions & 0 deletions

File tree

Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# -*- coding:utf-8 -*-
2+
# By:Eastmount CSDN 2021-03-19
3+
import csv
4+
import pandas as pd
5+
import numpy as np
6+
import jieba
7+
import jieba.analyse
8+
9+
#添加自定义词典和停用词典
10+
jieba.load_userdict("user_dict.txt")
11+
stop_list = pd.read_csv('stop_words.txt',
12+
engine='python',
13+
encoding='utf-8',
14+
delimiter="\n",
15+
names=['t'])['t'].tolist()
16+
17+
#-----------------------------------------------------------------------
18+
#Jieba分词函数
19+
def txt_cut(juzi):
20+
return [w for w in jieba.lcut(juzi) if w not in stop_list]
21+
22+
#-----------------------------------------------------------------------
23+
#中文分词读取文件
24+
def fenci(filename,result):
25+
#写入分词结果
26+
fw = open(result, "w", newline = '',encoding = 'UTF-8')
27+
writer = csv.writer(fw)
28+
writer.writerow(['label','cutword'])
29+
30+
#使用csv.DictReader读取文件中的信息
31+
labels = []
32+
contents = []
33+
with open(filename, "r", encoding="UTF-8") as f:
34+
reader = csv.DictReader(f)
35+
for row in reader:
36+
#数据元素获取
37+
labels.append(row['label'])
38+
content = row['content']
39+
#中文分词
40+
seglist = txt_cut(content)
41+
#空格拼接
42+
output = ' '.join(list(seglist))
43+
contents.append(output)
44+
45+
#文件写入
46+
tlist = []
47+
tlist.append(row['label'])
48+
tlist.append(output)
49+
writer.writerow(tlist)
50+
print(labels[:5])
51+
print(contents[:5])
52+
fw.close()
53+
54+
#-----------------------------------------------------------------------
55+
#主函数
56+
if __name__ == '__main__':
57+
fenci("news_dataset_train.csv", "news_dataset_train_fc.csv")
58+
fenci("news_dataset_test.csv", "news_dataset_test_fc.csv")
59+
fenci("news_dataset_val.csv", "news_dataset_val_fc.csv")

blog28-CNN-LSTM-GPU/data_show.py

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
# -*- coding: utf-8 -*-
2+
"""
3+
Created on 2021-03-19
4+
@author: xiuzhang Eastmount CSDN
5+
"""
6+
import pandas as pd
7+
import numpy as np
8+
from sklearn import metrics
9+
import matplotlib.pyplot as plt
10+
import seaborn as sns
11+
12+
#---------------------------------------第一步 数据读取------------------------------------
13+
## 读取测数据集
14+
train_df = pd.read_csv("news_dataset_train_fc.csv")
15+
val_df = pd.read_csv("news_dataset_val_fc.csv")
16+
test_df = pd.read_csv("news_dataset_test_fc.csv")
17+
print(train_df.head())
18+
19+
## 解决中文显示问题
20+
plt.rcParams['font.sans-serif'] = ['KaiTi'] #指定默认字体 SimHei黑体
21+
plt.rcParams['axes.unicode_minus'] = False #解决保存图像是负号'
22+
23+
## 查看训练集都有哪些标签
24+
plt.figure()
25+
sns.countplot(train_df.label)
26+
plt.xlabel('Label',size = 10)
27+
plt.xticks(size = 10)
28+
plt.show()
29+
30+
## 分析训练集中词组数量的分布
31+
print(train_df.cutwordnum.describe())
32+
plt.figure()
33+
plt.hist(train_df.cutwordnum,bins=100)
34+
plt.xlabel("词组长度", size = 12)
35+
plt.ylabel("频数", size = 12)
36+
plt.title("训练数据集")
37+
plt.show()
38+
39+
28.5 MB
Binary file not shown.
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# -*- coding: utf-8 -*-
2+
# 2021-05-17 Eastmount CSDN
3+
import cv2
4+
import numpy as np
5+
import matplotlib.pyplot as plt
6+
7+
#读取图像
8+
img=cv2.imread('scenery.png')
9+
grayImage=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
10+
11+
#阈值化处理
12+
ret,thresh1=cv2.threshold(grayImage,127,255,cv2.THRESH_BINARY)
13+
ret,thresh2=cv2.threshold(grayImage,127,255,cv2.THRESH_BINARY_INV)
14+
ret,thresh3=cv2.threshold(grayImage,127,255,cv2.THRESH_TRUNC)
15+
ret,thresh4=cv2.threshold(grayImage,127,255,cv2.THRESH_TOZERO)
16+
ret,thresh5=cv2.threshold(grayImage,127,255,cv2.THRESH_TOZERO_INV)
17+
18+
#显示结果
19+
titles = ['Gray Image','BINARY','BINARY_INV','TRUNC',
20+
'TOZERO','TOZERO_INV']
21+
images = [grayImage, thresh1, thresh2, thresh3, thresh4, thresh5]
22+
for i in range(6):
23+
plt.subplot(2,3,i+1),plt.imshow(images[i],'gray')
24+
plt.title(titles[i])
25+
plt.xticks([]),plt.yticks([])
26+
plt.show()

0 commit comments

Comments
 (0)