upload

eastmountyxz · eastmountyxz · commit 92d5ff0b1a61 · 2021-05-18T16:20:30.000+08:00
diff --git a/blog28-CNN-LSTM-GPU/data_preprocess.py b/blog28-CNN-LSTM-GPU/data_preprocess.py
@@ -0,0 +1,59 @@
+# -*- coding:utf-8 -*-
+# By:Eastmount CSDN 2021-03-19
+import csv
+import pandas as pd
+import numpy as np
+import jieba
+import jieba.analyse
+
+#&#28155;&#21152;&#33258;&#23450;&#20041;&#35789;&#20856;&#21644;&#20572;&#29992;&#35789;&#20856;
+jieba.load_userdict("user_dict.txt")
+stop_list = pd.read_csv('stop_words.txt',
+                        engine='python',
+                        encoding='utf-8',
+                        delimiter="\n",
+                        names=['t'])['t'].tolist()
+
+#-----------------------------------------------------------------------
+#Jieba&#20998;&#35789;&#20989;&#25968;
+def txt_cut(juzi):
+    return [w for w in jieba.lcut(juzi) if w not in stop_list]
+
+#-----------------------------------------------------------------------
+#&#20013;&#25991;&#20998;&#35789;&#35835;&#21462;&#25991;&#20214;
+def fenci(filename,result):
+    #&#20889;&#20837;&#20998;&#35789;&#32467;&#26524;
+    fw = open(result, "w", newline = '',encoding = 'UTF-8')
+    writer = csv.writer(fw)  
+    writer.writerow(['label','cutword'])
+
+    #&#20351;&#29992;csv.DictReader&#35835;&#21462;&#25991;&#20214;&#20013;&#30340;&#20449;&#24687;
+    labels = []
+    contents = []
+    with open(filename, "r", encoding="UTF-8") as f:
+        reader = csv.DictReader(f)
+        for row in reader:
+            #&#25968;&#25454;&#20803;&#32032;&#33719;&#21462;
+            labels.append(row['label'])
+            content = row['content']
+            #&#20013;&#25991;&#20998;&#35789;
+            seglist = txt_cut(content)
+            #&#31354;&#26684;&#25340;&#25509;
+            output = ' '.join(list(seglist))
+            contents.append(output)
+            
+            #&#25991;&#20214;&#20889;&#20837;
+            tlist = []
+            tlist.append(row['label'])
+            tlist.append(output)
+            writer.writerow(tlist)
+    print(labels[:5])
+    print(contents[:5])
+    fw.close()
+
+#-----------------------------------------------------------------------
+#&#20027;&#20989;&#25968;
+if __name__ == '__main__':
+    fenci("news_dataset_train.csv", "news_dataset_train_fc.csv")
+    fenci("news_dataset_test.csv", "news_dataset_test_fc.csv")
+    fenci("news_dataset_val.csv", "news_dataset_val_fc.csv")
diff --git a/blog28-CNN-LSTM-GPU/data_show.py b/blog28-CNN-LSTM-GPU/data_show.py
@@ -0,0 +1,39 @@
+# -*- coding: utf-8 -*-
+"""
+Created on 2021-03-19
+@author: xiuzhang Eastmount CSDN
+"""
+import pandas as pd
+import numpy as np
+from sklearn import metrics
+import matplotlib.pyplot as plt
+import seaborn as sns
+
+#---------------------------------------&#31532;&#19968;&#27493; &#25968;&#25454;&#35835;&#21462;------------------------------------
+## &#35835;&#21462;&#27979;&#25968;&#25454;&#38598;
+train_df = pd.read_csv("news_dataset_train_fc.csv")
+val_df = pd.read_csv("news_dataset_val_fc.csv")
+test_df = pd.read_csv("news_dataset_test_fc.csv")
+print(train_df.head())
+
+## &#35299;&#20915;&#20013;&#25991;&#26174;&#31034;&#38382;&#39064;
+plt.rcParams['font.sans-serif'] = ['KaiTi']  #&#25351;&#23450;&#40664;&#35748;&#23383;&#20307; SimHei&#40657;&#20307;
+plt.rcParams['axes.unicode_minus'] = False   #&#35299;&#20915;&#20445;&#23384;&#22270;&#20687;&#26159;&#36127;&#21495;'
+
+## &#26597;&#30475;&#35757;&#32451;&#38598;&#37117;&#26377;&#21738;&#20123;&#26631;&#31614;
+plt.figure()
+sns.countplot(train_df.label)
+plt.xlabel('Label',size = 10)
+plt.xticks(size = 10)
+plt.show()
+
+## &#20998;&#26512;&#35757;&#32451;&#38598;&#20013;&#35789;&#32452;&#25968;&#37327;&#30340;&#20998;&#24067;
+print(train_df.cutwordnum.describe())
+plt.figure()
+plt.hist(train_df.cutwordnum,bins=100)
+plt.xlabel("&#35789;&#32452;&#38271;&#24230;", size = 12)
+plt.ylabel("&#39057;&#25968;", size = 12)
+plt.title("&#35757;&#32451;&#25968;&#25454;&#38598;")
+plt.show()
+
+
diff --git a/blog39-ImageClassification/photo.rar b/blog39-ImageClassification/photo.rar
diff --git a/blog40-ImageSegmentation/blog40-01-yzfg.py b/blog40-ImageSegmentation/blog40-01-yzfg.py
@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+# 2021-05-17 Eastmount CSDN
+import cv2  
+import numpy as np  
+import matplotlib.pyplot as plt
+
+#&#35835;&#21462;&#22270;&#20687;
+img=cv2.imread('scenery.png')
+grayImage=cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)  
+
+#&#38408;&#20540;&#21270;&#22788;&#29702;
+ret,thresh1=cv2.threshold(grayImage,127,255,cv2.THRESH_BINARY)  
+ret,thresh2=cv2.threshold(grayImage,127,255,cv2.THRESH_BINARY_INV)  
+ret,thresh3=cv2.threshold(grayImage,127,255,cv2.THRESH_TRUNC)  
+ret,thresh4=cv2.threshold(grayImage,127,255,cv2.THRESH_TOZERO)  
+ret,thresh5=cv2.threshold(grayImage,127,255,cv2.THRESH_TOZERO_INV)
+
+#&#26174;&#31034;&#32467;&#26524;
+titles = ['Gray Image','BINARY','BINARY_INV','TRUNC',
+'TOZERO','TOZERO_INV']  
+images = [grayImage, thresh1, thresh2, thresh3, thresh4, thresh5]  
+for i in range(6):  
+   plt.subplot(2,3,i+1),plt.imshow(images[i],'gray')  
+   plt.title(titles[i])  
+   plt.xticks([]),plt.yticks([])  
+plt.show()