import pandas as pd
df = pd.read_csv("annotations.csv")[0:10]
## 一 DataFrame,数据帧df,可以将其看作表格
### 列:index,行:columns
df
|
seriesuid |
coordX |
coordY |
coordZ |
diameter_mm |
0 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
-128.699421 |
-175.319272 |
-298.387506 |
5.651471 |
1 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
103.783651 |
-211.925149 |
-227.121250 |
4.224708 |
2 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… |
69.639017 |
-140.944586 |
876.374496 |
5.786348 |
3 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
-24.013824 |
192.102405 |
-391.081276 |
8.143262 |
4 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
2.441547 |
172.464881 |
-405.493732 |
18.545150 |
5 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
90.931713 |
149.027266 |
-426.544715 |
18.208570 |
6 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
89.540769 |
196.405159 |
-515.073322 |
16.381276 |
7 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… |
81.509646 |
54.957219 |
-150.346423 |
10.362321 |
8 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… |
105.055792 |
19.825260 |
-91.247251 |
21.089619 |
9 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… |
-124.834262 |
127.247155 |
-473.064479 |
10.465854 |
### 2 取其中某三列
pd.DataFrame(df,columns = ['seriesuid','coordX','coordY','coordZ'])
|
seriesuid |
coordX |
coordY |
coordZ |
0 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
-128.699421 |
-175.319272 |
-298.387506 |
1 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
103.783651 |
-211.925149 |
-227.121250 |
2 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… |
69.639017 |
-140.944586 |
876.374496 |
3 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
-24.013824 |
192.102405 |
-391.081276 |
4 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
2.441547 |
172.464881 |
-405.493732 |
5 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
90.931713 |
149.027266 |
-426.544715 |
6 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
89.540769 |
196.405159 |
-515.073322 |
7 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… |
81.509646 |
54.957219 |
-150.346423 |
8 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… |
105.055792 |
19.825260 |
-91.247251 |
9 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… |
-124.834262 |
127.247155 |
-473.064479 |
### 3 取其中某俩行
pd.DataFrame(df,index = [0,4])
|
seriesuid |
coordX |
coordY |
coordZ |
diameter_mm |
0 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
-128.699421 |
-175.319272 |
-298.387506 |
5.651471 |
4 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
2.441547 |
172.464881 |
-405.493732 |
18.545150 |
## 二 对DataFrame操作
### 1 排序
df.sort_index(axis=1,ascending=True)
|
coordX |
coordY |
coordZ |
diameter_mm |
seriesuid |
0 |
-128.699421 |
-175.319272 |
-298.387506 |
5.651471 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
1 |
103.783651 |
-211.925149 |
-227.121250 |
4.224708 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
2 |
69.639017 |
-140.944586 |
876.374496 |
5.786348 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… |
3 |
-24.013824 |
192.102405 |
-391.081276 |
8.143262 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
4 |
2.441547 |
172.464881 |
-405.493732 |
18.545150 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
5 |
90.931713 |
149.027266 |
-426.544715 |
18.208570 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
6 |
89.540769 |
196.405159 |
-515.073322 |
16.381276 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
7 |
81.509646 |
54.957219 |
-150.346423 |
10.362321 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… |
8 |
105.055792 |
19.825260 |
-91.247251 |
21.089619 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… |
9 |
-124.834262 |
127.247155 |
-473.064479 |
10.465854 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… |
### 2 算数运算
df['corrd_X_Y'] = df['coordX']*df['coordY']
df
|
seriesuid |
coordX |
coordY |
coordZ |
diameter_mm |
corrd_X_Y |
0 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
-128.699421 |
-175.319272 |
-298.387506 |
5.651471 |
22563.488788 |
1 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
103.783651 |
-211.925149 |
-227.121250 |
4.224708 |
-21994.365650 |
2 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… |
69.639017 |
-140.944586 |
876.374496 |
5.786348 |
-9815.242447 |
3 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
-24.013824 |
192.102405 |
-391.081276 |
8.143262 |
-4613.113389 |
4 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
2.441547 |
172.464881 |
-405.493732 |
18.545150 |
421.081078 |
5 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
90.931713 |
149.027266 |
-426.544715 |
18.208570 |
13551.304585 |
6 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
89.540769 |
196.405159 |
-515.073322 |
16.381276 |
17586.268931 |
7 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… |
81.509646 |
54.957219 |
-150.346423 |
10.362321 |
4479.543419 |
8 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… |
105.055792 |
19.825260 |
-91.247251 |
21.089619 |
2082.758414 |
9 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… |
-124.834262 |
127.247155 |
-473.064479 |
10.465854 |
-15884.804687 |
### 3 切片
df['diameter_mm']>6
0 False
1 False
2 False
3 True
4 True
5 True
6 True
7 True
8 True
9 True
Name: diameter_mm, dtype: bool
df.loc[:,['coordX','coordY']]
|
coordX |
coordY |
0 |
-128.699421 |
-175.319272 |
1 |
103.783651 |
-211.925149 |
2 |
69.639017 |
-140.944586 |
3 |
-24.013824 |
192.102405 |
4 |
2.441547 |
172.464881 |
5 |
90.931713 |
149.027266 |
6 |
89.540769 |
196.405159 |
7 |
81.509646 |
54.957219 |
8 |
105.055792 |
19.825260 |
9 |
-124.834262 |
127.247155 |
df.iloc[[0,1],2:4]
|
coordY |
coordZ |
0 |
-175.319272 |
-298.387506 |
1 |
-211.925149 |
-227.121250 |
df[df['diameter_mm']>10]
|
seriesuid |
coordX |
coordY |
coordZ |
diameter_mm |
corrd_X_Y |
4 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
2.441547 |
172.464881 |
-405.493732 |
18.545150 |
421.081078 |
5 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
90.931713 |
149.027266 |
-426.544715 |
18.208570 |
13551.304585 |
6 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
89.540769 |
196.405159 |
-515.073322 |
16.381276 |
17586.268931 |
7 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… |
81.509646 |
54.957219 |
-150.346423 |
10.362321 |
4479.543419 |
8 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… |
105.055792 |
19.825260 |
-91.247251 |
21.089619 |
2082.758414 |
9 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… |
-124.834262 |
127.247155 |
-473.064479 |
10.465854 |
-15884.804687 |
### 4 合并
pd.concat([df,df,df],ignore_index=True)
|
seriesuid |
coordX |
coordY |
coordZ |
diameter_mm |
corrd_X_Y |
0 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
-128.699421 |
-175.319272 |
-298.387506 |
5.651471 |
22563.488788 |
1 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
103.783651 |
-211.925149 |
-227.121250 |
4.224708 |
-21994.365650 |
2 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… |
69.639017 |
-140.944586 |
876.374496 |
5.786348 |
-9815.242447 |
3 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
-24.013824 |
192.102405 |
-391.081276 |
8.143262 |
-4613.113389 |
4 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
2.441547 |
172.464881 |
-405.493732 |
18.545150 |
421.081078 |
5 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
90.931713 |
149.027266 |
-426.544715 |
18.208570 |
13551.304585 |
6 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
89.540769 |
196.405159 |
-515.073322 |
16.381276 |
17586.268931 |
7 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… |
81.509646 |
54.957219 |
-150.346423 |
10.362321 |
4479.543419 |
8 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… |
105.055792 |
19.825260 |
-91.247251 |
21.089619 |
2082.758414 |
9 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… |
-124.834262 |
127.247155 |
-473.064479 |
10.465854 |
-15884.804687 |
10 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
-128.699421 |
-175.319272 |
-298.387506 |
5.651471 |
22563.488788 |
11 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
103.783651 |
-211.925149 |
-227.121250 |
4.224708 |
-21994.365650 |
12 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… |
69.639017 |
-140.944586 |
876.374496 |
5.786348 |
-9815.242447 |
13 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
-24.013824 |
192.102405 |
-391.081276 |
8.143262 |
-4613.113389 |
14 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
2.441547 |
172.464881 |
-405.493732 |
18.545150 |
421.081078 |
15 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
90.931713 |
149.027266 |
-426.544715 |
18.208570 |
13551.304585 |
16 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
89.540769 |
196.405159 |
-515.073322 |
16.381276 |
17586.268931 |
17 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… |
81.509646 |
54.957219 |
-150.346423 |
10.362321 |
4479.543419 |
18 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… |
105.055792 |
19.825260 |
-91.247251 |
21.089619 |
2082.758414 |
19 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… |
-124.834262 |
127.247155 |
-473.064479 |
10.465854 |
-15884.804687 |
20 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
-128.699421 |
-175.319272 |
-298.387506 |
5.651471 |
22563.488788 |
21 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
103.783651 |
-211.925149 |
-227.121250 |
4.224708 |
-21994.365650 |
22 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… |
69.639017 |
-140.944586 |
876.374496 |
5.786348 |
-9815.242447 |
23 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
-24.013824 |
192.102405 |
-391.081276 |
8.143262 |
-4613.113389 |
24 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
2.441547 |
172.464881 |
-405.493732 |
18.545150 |
421.081078 |
25 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
90.931713 |
149.027266 |
-426.544715 |
18.208570 |
13551.304585 |
26 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
89.540769 |
196.405159 |
-515.073322 |
16.381276 |
17586.268931 |
27 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… |
81.509646 |
54.957219 |
-150.346423 |
10.362321 |
4479.543419 |
28 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… |
105.055792 |
19.825260 |
-91.247251 |
21.089619 |
2082.758414 |
29 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… |
-124.834262 |
127.247155 |
-473.064479 |
10.465854 |
-15884.804687 |
pd.merge(df,df,how='outer')
|
seriesuid |
coordX |
coordY |
coordZ |
diameter_mm |
corrd_X_Y |
0 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
-128.699421 |
-175.319272 |
-298.387506 |
5.651471 |
22563.488788 |
1 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100225287222… |
103.783651 |
-211.925149 |
-227.121250 |
4.224708 |
-21994.365650 |
2 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100398138793… |
69.639017 |
-140.944586 |
876.374496 |
5.786348 |
-9815.242447 |
3 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
-24.013824 |
192.102405 |
-391.081276 |
8.143262 |
-4613.113389 |
4 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
2.441547 |
172.464881 |
-405.493732 |
18.545150 |
421.081078 |
5 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
90.931713 |
149.027266 |
-426.544715 |
18.208570 |
13551.304585 |
6 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100621383016… |
89.540769 |
196.405159 |
-515.073322 |
16.381276 |
17586.268931 |
7 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.100953483028… |
81.509646 |
54.957219 |
-150.346423 |
10.362321 |
4479.543419 |
8 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.102681962408… |
105.055792 |
19.825260 |
-91.247251 |
21.089619 |
2082.758414 |
9 |
1.3.6.1.4.1.14519.5.2.1.6279.6001.104562737760… |
-124.834262 |
127.247155 |
-473.064479 |
10.465854 |
-15884.804687 |
5 合并文件夹下所有同类型的csv的小例子
csv_files = glob.glob('/*/*/*.csv')
df = df = pd.DataFrame(columns=['seriesuid', 'coordX', 'coordY', 'coordZ', 'diameter_mm','des'])
for csv in csv_files:
df = pd.merge(df,pd.read_csv(csv),how='outer')
df_to_save = pd.DataFrame(df,columns=['seriesuid', 'coordX', 'coordY', 'coordZ', 'diameter_mm'])
df_to_save.to_csv('annotations.csv',index=False)