分享

python 37 pandas操作csv文件小结,csv文件合并

 hdzgx 2020-01-05
import pandas as pd
df = pd.read_csv("annotations.csv")[0:10]
## 一 DataFrame,数据帧df,可以将其看作表格 ### 列:index,行:columns
df
seriesuid coordX coordY coordZ diameter_mm
0 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… -128.699421 -175.319272 -298.387506 5.651471
1 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… 103.783651 -211.925149 -227.121250 4.224708
2 1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… 69.639017 -140.944586 876.374496 5.786348
3 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… -24.013824 192.102405 -391.081276 8.143262
4 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 2.441547 172.464881 -405.493732 18.545150
5 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 90.931713 149.027266 -426.544715 18.208570
6 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 89.540769 196.405159 -515.073322 16.381276
7 1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… 81.509646 54.957219 -150.346423 10.362321
8 1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… 105.055792 19.825260 -91.247251 21.089619
9 1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… -124.834262 127.247155 -473.064479 10.465854
### 2 取其中某三列
pd.DataFrame(df,columns = ['seriesuid','coordX','coordY','coordZ'])
seriesuid coordX coordY coordZ
0 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… -128.699421 -175.319272 -298.387506
1 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… 103.783651 -211.925149 -227.121250
2 1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… 69.639017 -140.944586 876.374496
3 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… -24.013824 192.102405 -391.081276
4 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 2.441547 172.464881 -405.493732
5 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 90.931713 149.027266 -426.544715
6 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 89.540769 196.405159 -515.073322
7 1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… 81.509646 54.957219 -150.346423
8 1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… 105.055792 19.825260 -91.247251
9 1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… -124.834262 127.247155 -473.064479
### 3 取其中某俩行
pd.DataFrame(df,index = [0,4])
seriesuid coordX coordY coordZ diameter_mm
0 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… -128.699421 -175.319272 -298.387506 5.651471
4 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 2.441547 172.464881 -405.493732 18.545150
## 二 对DataFrame操作 ### 1 排序
df.sort_index(axis=1,ascending=True)
coordX coordY coordZ diameter_mm seriesuid
0 -128.699421 -175.319272 -298.387506 5.651471 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222…
1 103.783651 -211.925149 -227.121250 4.224708 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222…
2 69.639017 -140.944586 876.374496 5.786348 1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793…
3 -24.013824 192.102405 -391.081276 8.143262 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016…
4 2.441547 172.464881 -405.493732 18.545150 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016…
5 90.931713 149.027266 -426.544715 18.208570 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016…
6 89.540769 196.405159 -515.073322 16.381276 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016…
7 81.509646 54.957219 -150.346423 10.362321 1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028…
8 105.055792 19.825260 -91.247251 21.089619 1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408…
9 -124.834262 127.247155 -473.064479 10.465854 1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760…
### 2 算数运算
df['corrd_X_Y'] = df['coordX']*df['coordY']
df
seriesuid coordX coordY coordZ diameter_mm corrd_X_Y
0 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… -128.699421 -175.319272 -298.387506 5.651471 22563.488788
1 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… 103.783651 -211.925149 -227.121250 4.224708 -21994.365650
2 1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… 69.639017 -140.944586 876.374496 5.786348 -9815.242447
3 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… -24.013824 192.102405 -391.081276 8.143262 -4613.113389
4 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 2.441547 172.464881 -405.493732 18.545150 421.081078
5 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 90.931713 149.027266 -426.544715 18.208570 13551.304585
6 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 89.540769 196.405159 -515.073322 16.381276 17586.268931
7 1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… 81.509646 54.957219 -150.346423 10.362321 4479.543419
8 1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… 105.055792 19.825260 -91.247251 21.089619 2082.758414
9 1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… -124.834262 127.247155 -473.064479 10.465854 -15884.804687
### 3 切片
df['diameter_mm']>6
0 False 1 False 2 False 3 True 4 True 5 True 6 True 7 True 8 True 9 True Name: diameter_mm, dtype: bool
df.loc[:,['coordX','coordY']]
coordX coordY
0 -128.699421 -175.319272
1 103.783651 -211.925149
2 69.639017 -140.944586
3 -24.013824 192.102405
4 2.441547 172.464881
5 90.931713 149.027266
6 89.540769 196.405159
7 81.509646 54.957219
8 105.055792 19.825260
9 -124.834262 127.247155
df.iloc[[0,1],2:4]
coordY coordZ
0 -175.319272 -298.387506
1 -211.925149 -227.121250
df[df['diameter_mm']>10]
seriesuid coordX coordY coordZ diameter_mm corrd_X_Y
4 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 2.441547 172.464881 -405.493732 18.545150 421.081078
5 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 90.931713 149.027266 -426.544715 18.208570 13551.304585
6 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 89.540769 196.405159 -515.073322 16.381276 17586.268931
7 1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… 81.509646 54.957219 -150.346423 10.362321 4479.543419
8 1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… 105.055792 19.825260 -91.247251 21.089619 2082.758414
9 1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… -124.834262 127.247155 -473.064479 10.465854 -15884.804687
### 4 合并
pd.concat([df,df,df],ignore_index=True) 
seriesuid coordX coordY coordZ diameter_mm corrd_X_Y
0 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… -128.699421 -175.319272 -298.387506 5.651471 22563.488788
1 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… 103.783651 -211.925149 -227.121250 4.224708 -21994.365650
2 1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… 69.639017 -140.944586 876.374496 5.786348 -9815.242447
3 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… -24.013824 192.102405 -391.081276 8.143262 -4613.113389
4 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 2.441547 172.464881 -405.493732 18.545150 421.081078
5 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 90.931713 149.027266 -426.544715 18.208570 13551.304585
6 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 89.540769 196.405159 -515.073322 16.381276 17586.268931
7 1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… 81.509646 54.957219 -150.346423 10.362321 4479.543419
8 1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… 105.055792 19.825260 -91.247251 21.089619 2082.758414
9 1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… -124.834262 127.247155 -473.064479 10.465854 -15884.804687
10 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… -128.699421 -175.319272 -298.387506 5.651471 22563.488788
11 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… 103.783651 -211.925149 -227.121250 4.224708 -21994.365650
12 1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… 69.639017 -140.944586 876.374496 5.786348 -9815.242447
13 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… -24.013824 192.102405 -391.081276 8.143262 -4613.113389
14 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 2.441547 172.464881 -405.493732 18.545150 421.081078
15 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 90.931713 149.027266 -426.544715 18.208570 13551.304585
16 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 89.540769 196.405159 -515.073322 16.381276 17586.268931
17 1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… 81.509646 54.957219 -150.346423 10.362321 4479.543419
18 1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… 105.055792 19.825260 -91.247251 21.089619 2082.758414
19 1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… -124.834262 127.247155 -473.064479 10.465854 -15884.804687
20 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… -128.699421 -175.319272 -298.387506 5.651471 22563.488788
21 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… 103.783651 -211.925149 -227.121250 4.224708 -21994.365650
22 1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… 69.639017 -140.944586 876.374496 5.786348 -9815.242447
23 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… -24.013824 192.102405 -391.081276 8.143262 -4613.113389
24 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 2.441547 172.464881 -405.493732 18.545150 421.081078
25 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 90.931713 149.027266 -426.544715 18.208570 13551.304585
26 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 89.540769 196.405159 -515.073322 16.381276 17586.268931
27 1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… 81.509646 54.957219 -150.346423 10.362321 4479.543419
28 1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… 105.055792 19.825260 -91.247251 21.089619 2082.758414
29 1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… -124.834262 127.247155 -473.064479 10.465854 -15884.804687
pd.merge(df,df,how='outer')
seriesuid coordX coordY coordZ diameter_mm corrd_X_Y
0 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… -128.699421 -175.319272 -298.387506 5.651471 22563.488788
1 1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… 103.783651 -211.925149 -227.121250 4.224708 -21994.365650
2 1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… 69.639017 -140.944586 876.374496 5.786348 -9815.242447
3 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… -24.013824 192.102405 -391.081276 8.143262 -4613.113389
4 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 2.441547 172.464881 -405.493732 18.545150 421.081078
5 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 90.931713 149.027266 -426.544715 18.208570 13551.304585
6 1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… 89.540769 196.405159 -515.073322 16.381276 17586.268931
7 1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… 81.509646 54.957219 -150.346423 10.362321 4479.543419
8 1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… 105.055792 19.825260 -91.247251 21.089619 2082.758414
9 1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… -124.834262 127.247155 -473.064479 10.465854 -15884.804687

5 合并文件夹下所有同类型的csv的小例子

csv_files = glob.glob('/*/*/*.csv')
df = df = pd.DataFrame(columns=['seriesuid', 'coordX', 'coordY', 'coordZ', 'diameter_mm','des'])   
for csv in csv_files:
    df = pd.merge(df,pd.read_csv(csv),how='outer')
df_to_save = pd.DataFrame(df,columns=['seriesuid', 'coordX', 'coordY', 'coordZ', 'diameter_mm'])      
df_to_save.to_csv('annotations.csv',index=False)

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多