forked from zhanghe06/python
-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathtest_pandas.py
More file actions
116 lines (95 loc) · 2.85 KB
/
test_pandas.py
File metadata and controls
116 lines (95 loc) · 2.85 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
#!/usr/bin/env python
# encoding: utf-8
"""
@author: zhanghe
@software: PyCharm
@file: test_pandas.py
@time: 2017/2/27 下午3:21
"""
from pandas import Series, DataFrame
import pandas as pd
import json
def test_csv():
file_path = 'test_pandas.csv'
df = pd.read_csv(file_path)
print df.keys()
print df.values
print df.add_prefix('a')
print df.add_suffix('c')
print df.index
print df.dtypes
print df.rename(columns=['网站', '权重', '类型'])
def test_excel():
file_path = 'test_pandas.xls'
df = pd.read_excel(file_path, sheetname='Sheet1') # sheet_name=str(0)
print df
with pd.ExcelWriter('newxls.xls') as writer:
df.to_excel(writer, sheet_name=str(0))
def test_csv_encode():
"""
测试csv读取,设置编码,并跳过头部
:return:
"""
file_path = 'test_pandas_gb2312.csv'
df = pd.read_csv(file_path, encoding='gb2312', skiprows=8, error_bad_lines=False)
print df.keys()
print df
def test_read_sem_excel():
file_path = 'SEM.xls'
df = pd.read_excel(file_path, sheetname='sem') # sheet_name=str(0)
# [u'sem_site', u'sem_plan', u'sem_unit', u'sc_site', u'sc_site_name', u'cate_id', u'cate_name', u'city_id', u'city_name', u'sc_plat', u'sc_plat_name']
print list(df.keys())
# print df.to_dict()
sem_map = {}
for i in df.values:
sem_map[(i[0], i[1], i[2])] = {
'sc_site': i[3],
'sc_site_name': i[4],
'cate_id': i[5],
'cate_name': i[6],
'city_id': i[7],
'city_name': i[8],
'sc_plat': i[9],
'sc_plat_name': i[10],
}
# print json.dumps(sem_map, indent=4, ensure_ascii=False)
print sem_map
return sem_map
def test_read_area_code_excel():
file_path = '全球区号.xlsx'
df = pd.read_excel(file_path, sheetname='code') # sheet_name=str(0)
print list(df.keys())
# print df.to_dict()
# sem_map = {}
for i in df.values:
code_dict = {
'id': i[0],
'name_c': i[1],
'area_code': i[2],
'phone_pre': '+00%s' % i[2],
'country_area': i[3],
'short_code': i[4],
'name_e': i[5]
}
print json.dumps(code_dict, indent=4, ensure_ascii=False)+','
# print sem_map
def test_read_area_code_map_excel():
file_path = '全球区号.xlsx'
df = pd.read_excel(file_path, sheetname='code') # sheet_name=str(0)
print list(df.keys())
# print df.to_dict()
# sem_map = {}
for i in df.values:
print '%s: \'%s\', # [%s]%s(%s) %s' % (i[0], i[2], i[4], i[1], i[5], i[3])
if __name__ == '__main__':
# test_csv()
# test_excel()
# test_csv_encode()
# test_read_sem_excel()
# test_read_area_code_excel()
test_read_area_code_map_excel()
"""
pip install pandas
# 操作excel
pip install xlrd
"""