分享

python从入门到实践,文件读写与Excel操作

 网摘文苑 2023-07-06 发布于新疆

文件读写器操作通过open()函数进行操作,使用带关键字读写器文件就不用再惦记着要关闭文件时。

首先来看一个简单的常规文件读写操作;

f = open('test.txt', mode='w+', encoding='utf-8')f.write(','.join([t for t in range(1000)]))f.close()f = open('test.txt', mode='r', encoding='utf-8')print(f.read())f.close()

再看一个省心的文件读写操作

def write_csv_file(file_path, data, data_type='list'):    '''爬虫输入写入Excel文件'''    head = ['标题', '小区', '房厅', '面积', '朝向', '楼层', '年份',            '位置', '总价(万)', '单价(元/平方米)']    keys = ['title', 'house', 'bedroom', 'area', 'direction',            'floor', 'year', 'location', 'total_price', 'unit_price']    try:        with open(file_path, 'w', newline='', encoding='utf_8_sig') as csv_file:            writer = csv.writer(csv_file, dialect='excel')            if head is not None:                writer.writerow(head)            if data_type == 'list':                for item in data:                    row_data = []                    for k in keys:                        row_data.append(item[k])                    writer.writerow(row_data)            else:                # 如果队列不为空,写入每行数据                while not data.empty():                    item = data.get()                    if item:                        row_data = []                        for k in keys:                            row_data.append(item[k])                        writer.writerow(row_data)            print('Write a CSV file to path %s Successful.' % file_path)    except Exception as e:        print('Fail to write CSV to path: %s, Case: %s' % (file_path, e))def read_file():    with open('home.csv', mode='r', encoding='utf-8') as f:        for i in f.readlines():            print(i)

Excel操作

Excel文件操作常用的库有xlrd、xlwt、openpyxl、xlswriter已经pandas等;

  • pandas:数据处理最常用的分析库之一,可以读取各种格式的数据文件,一般输出dataframe格式,功能强大;
  • openpyxl:针对主要xlsx格式的excel进行读取和编辑;
  • xlrd库:从excel中读取数据,支持xls;
  • xlwt修改库:对excel进行操作,不支持对xlsx格式的修改;
  • xlutils库:在xlwt和xlrd中,对一个已存在的文件进行修改
  • xlwings:对xlsx、xls、xlsm格式文件进行读写、格式修改等操作
  • xlsxwriter:用于生成excel表格,插入数据、插入图标等表格操作,不支持读取
  1. xlwt库创建Excel
# 设置表格样式def set_style(name, height, bold=False): style = xlwt.XFStyle() font = xlwt.Font() font.name = name font.bold = bold font.color_index = 4 font.height = height style.font = font return styledef write_excel(file_name: str, info: List, table_head: Optional[List], sheet: str = 'data', save_path: str = '', *args, **kwargs) -> str: ''' xlwt库创建Excel ''' try: file_name += '.xls' save_file = os.path.join(save_path, file_name) if save_path else file_name # Excel文件保存路径 workbook = xlwt.Workbook() # 创建Excel sheet1 = workbook.add_sheet(sheet, cell_overwrite_ok=True) # 增加sheet页 for i in range(0, len(table_head)): # 写表头数据 sheet1.write(0, i, table_head[i], set_style('Times New Roman', 220, True)) for i in range(len(info)): # 从第二行开始写数据 item = info[i] for j in range(len(table_head)): field = table_head[j] sheet1.write(i + 1, j, item[field], set_style('Times New Roman', 220, True)) workbook.save(save_file) logger.info(f'Excel文件: {file_name}, 保存成功') return save_file except Exception as e: logger.exception(f'Excel文件处理异常: {str(e)}') return ''
  1. xlrd包读取Excel文件
def read_excel(file_name: str, sheet_name: str = '',                sheet_index: int = 0, data_type: int = 0) -> List:    ''' xlrd包读取Excel文件 只支持xls    :param file_name: 文件路径    :param sheet_name: 要读取的sheet名称    :param sheet_index: 要读取的sheet下标    :param data_type: 数据类型 0: list, 1: dict    :return:    '''    wb = xlrd.open_workbook(filename=file_name)  # 打开文件    sheet1 = wb.sheet_by_name(sheet_name) if sheet_name else wb.sheet_by_index(sheet_index)  # 通过sheet名称或索引获取表格    rows = sheet1.nrows  # sheet页行数    colums = sheet1.ncols  # sheet页列数    table_head = sheet1.row_values(0)  # 获取表头    logger.info(f'table_head: {table_head}')    li = [table_head]    for i in range(1, rows):  # sheet页数据添加到li中        if data_type == 0:            li.append(sheet1.row_values(i))  # 数据形式为列表        else:            di = {}            for idx, t in enumerate(table_head):                di[t] = sheet1.row_values(i)[idx]            li.append(di)    return li
  1. xlsxwriter库进行创建Excel
def write_excel_xlsx(file_name: str, info: List, table_head: Optional[List], sheet: str = 'data', save_path: str = ''): ''' 数据写入Excel,导出Excel文件, 支持xlsx类型 使用xlsxwriter库 :param file_name: excel文件名称 :param info: 写入的数据 :param table_head: 数据表头 :param sheet: sheet页名称 :param save_path: 保存路径 :return: ''' try: down_path = os.path.expanduser('~') # 用户目录 file_name += '.xlsx' down_path_file = os.path.join(save_path, file_name) if save_path else \ os.path.join(down_path, file_name) # excel文件地址 workbook = xlsxwriter.Workbook(down_path_file) # 创建Excel文件 worksheet = workbook.add_worksheet(sheet) # 创建sheet,名为data format1 = workbook.add_format( {'bold': True, 'font_color': 'black', 'font_size': 13, 'align': 'left', 'font_name': u'宋体'}) # 表头格式 format2 = workbook.add_format({'font_color': 'black', 'font_size': 11, 'align': 'left', 'font_name': u'宋体'}) # 表头外格式 worksheet.set_column('A:A', 10) # A列列宽设置能更好的显示 for i in range(0, len(table_head)): # 插入第一行表头标题 field = table_head[i] worksheet.write(0, i, field, format1) # 从第二行开始插入数据 for i in range(len(info)): item = info[i] # 行数据 for j in range(len(table_head)): field = table_head[j] worksheet.write(i + 1, j, item[field], format2) workbook.close() alert_text = '导出成功,导出地址:{}!'.format(down_path_file) return down_path_file except Exception as e: logger.exception('导出Excel异常: {}'.format(e)) return ''
  1. openpyxl库进行写Excel
def openpyxl_write_excel(file_name: str, info: List,                          table_head: Optional[List], sheet: str = 'data',                         save_path: str = ''):    ''' 数据写入Excel,导出Excel文件, 支持xlsx类型  使用openpyxl库    raise ValueError('Cannot convert {0!r} to Excel'.format(value)) 数据需要为字符串    '''    down_path = os.path.expanduser('~')  # 用户目录    file_name += '.xlsx'    down_path_file = os.path.join(save_path, file_name) if save_path else \      	os.path.join(down_path,   file_name)  # excel文件地址    wb = Workbook()  # 创建一个新的Excel表, 只写模式write_only=True, 默认读写模式    sheet1 = wb.active  # 激活当前sheet页    # sheet1 = wb.create_sheet(sheet)  # sheet页面末尾新建一个sheet页   create_sheet(sheet, index=0) 在第0位置新建    sheet1.append(table_head)    for t in info:        if isinstance(t, list):            t = [str(s) for s in t]            sheet1.append(t)        elif isinstance(t, dict):            fields = [str(t[s]) for s in table_head]            sheet1.append(fields)    wb.save(down_path_file)    return down_path_file
  1. openpyxl库进行读Excel
def openpyxl_read_excel(file_name: str, sheet_name: str = '', sheet_index: int = 0, data_type: int = 0) -> List: ''' 读取Excel文件 使用openpyxl库 param data_type: 列表中数据类型 0:list, 1: dict ''' workbook = load_workbook(file_name) # 加载Excel文件 # booksheet = workbook.active # 获取当前活跃的sheet,默认是第一个sheet # 如果想获取别的sheet页采取下面这种方式,先获取所有sheet页名,在通过指定那一页。 sheets = workbook.sheetnames # workbook.get_sheet_names() # 从名称获取sheet # booksheet = workbook.get_sheet_by_name(sheet_name) try: booksheet = workbook[sheet_name] if sheet_name else workbook[sheets[sheet_index]] except Exception as e: booksheet = workbook[sheets[0]] rows = booksheet.rows # 获取sheet页的行数据 columns = booksheet.columns # 获取sheet页的列数据 li = [] table_head = [] for idx, row in enumerate(rows): # 迭代所有的行 line = [col.value for col in row] if idx == 0: table_head.extend(line) # 列表数据追加到列表 continue if data_type == 0: li.append(line) # 数据形式为列表 else: di = dict(zip(table_head, line)) # 数据转换为字典 li.append(di) return li
  1. pandas操作Excel
def pd_excel_write_info(path, file, data, abs_path, **kwargs):    info = {}    for i in data:        info = i        info['path'] = abs_path    f = os.path.join(path, file)    df = pd.DataFrame(info)    df.to_excel(f)def pd_excel_info(file, sheetname=0, **kwargs):    converters = kwargs.get('converters', {})  # 列转码    try:        data = pd.read_excel(file, sheet_name=sheetname, converters=converters)        li = []        for ix, row in data.iterrows():            try:                li.append(dict(row))  # 按行添加数据 以dict/list形式添加            except Exception as e:                logger.error(e)        return li    except Exception as e:        return None

根据以上库的使用,可以看出openpyxl和pandas的功能更加坚固和强大,尤其是pandas,对于数据操作来说那简直方便的飞起!

以下是各个库的性能对比,该图是从知乎网上得来的,凡事有删减!

文章图片1

    本站是提供个人知识管理的网络存储空间,所有内容均由用户发布,不代表本站观点。请注意甄别内容中的联系方式、诱导购买等信息,谨防诈骗。如发现有害或侵权内容,请点击一键举报。
    转藏 分享 献花(0

    0条评论

    发表

    请遵守用户 评论公约

    类似文章 更多