#!/usr/bin/python import pandas as pd import json from collections import OrderedDict
#1.将json格式转换为python对象,该对象主要由字典和列表组成 with open('cases.2021-02-25.json','r') as f: data = json.load(f) #2.将需要的字段放到列表中 Ks=[] for test in data: for a,b in test.items(): #print(a) if a == 'summary': for i in b.keys(): #print(i) Ks.append(i) if a == 'primary_site': Ks.append(a) #Ks.append(b.keys()) if a == 'project': for i in b.keys(): Ks.append(i) #Ks.append(b.keys()) if a == 'submitter_id': Ks.append(a) #Ks.append(b.keys()) if a == 'demographic': for i in b.keys(): Ks.append(i) #Ks.append(b.keys())
#添加嵌套字典中的字段 for test in data: for k in test['summary']: if k == 'file_count': my_dict[k].append(test['summary'][k]) else: category=[] for da in test['summary'][k]: for da_k in da.keys(): if da_k == 'data_category': category.append(da['data_category']) my_dict[k].append(category) for k in test['project']: my_dict[k].append(test['project'][k]) if'demographic'in test: for k in test['demographic']: my_dict[k].append(test['demographic'][k]) else: my_dict['gender'].append('Unknow') #print(my_dict)