Principle and usage analysis of Python openpyxl module

Time:2020-6-23

This article mainly introduces the principle and usage analysis of Python openpyxl module. The example code is introduced in detail in this article, which has certain reference value for your study or work. You can refer to the following for your friends

This module is not a python built-in module that needs to be installed. The installation method is as follows

pip install openpyxl

be careful:

This module only supports Office 2010, that is, the suffix of the spreadsheet is *. Xlsx

1. Common functions of openpyxl module

import openpyxl

			wb = openpyxl.load_workbook('example.xlsx')

			#Worksheet common operations
			print(wb.active) 
			#Get whether the spreadsheet worksheet has data
			print(wb.read_only) 
			#Open as read-only
			print(wb.encoding) 
			#Get the code of the spreadsheet
			print('', wb.properties) 
			#Get spreadsheet properties such as title, author, creation time, etc
			print(wb.worksheets) 
			#Get sheet name
			print(wb.get_sheet_names()) 
			#Get all the names of the sheet
			print(wb.sheetnames) 
			#Get all the names of the sheet跟wb.get_sheet_names()一样的功能
			print(wb.get_sheet_by_name('Sheet1')) 
			#Get worksheet object operation spreadsheet by worksheet name
			print( wb.create_ Sheet ('sheet created by Python ')) 
			#The created worksheet must be saved with save before it is saved to the hard disk
			print(wb.copy_worksheet(wb['Sheet1'])) 
			#Copy sheet

			#Common operations of worksheet
			sheet1_obj = wb['Sheet1']
			print(sheet1_obj.title) 
			#Title of sheet
			print(sheet1_obj.dimensions) 
			#Get the table size. The return format is as follows: A1: D6
			print(sheet1_obj.max_row) 
			#Table maximum rows
			print(sheet1_obj.min_row) 
			#Table minimum rows
			print(sheet1_obj.max_column) 
			#Maximum number of columns in the table
			print(sheet1_obj.min_column) 
			#Minimum number of columns in the table
			print(sheet1_obj.rows) 
			#Get cells by row (cell object)
			print(sheet1_obj.columns) 
			#Get cells by column (cell object)
			print(sheet1_obj.freeze_panes) 
			#Freeze pane
			print(sheet1_obj.values) 
			#Get table contents (data) by row
			print(sheet1_obj.iter_rows())
			#Iterator method, get all cells (cell object) by row
			print(sheet1_obj.iter_columns()
			)#Iterator method, get all cells by column (cell object)
			sheet1_ obj.append (['1 column ',' 2 column ',' 3 column ',' 4 column ']) 
			#Insert multiple columns of data to the last row of the worksheet

			#Common cell operations
			sheet1_obj.merged_cells 
			#Merge cells
			sheet1_obj.unmerge_cells 
			#Unmerge cells
			print(sheet1_obj['A2'].row) 
			#Get number of rows
			print(sheet1_obj['A2'].column) 
			#Get number of columns
			print(sheet1_obj['B1'].value) 
			#Get cell value
			wb.save('example.xlsx') 
	#Save cell

2. Using openpyxl module to create a spreadsheet

#!/usr/bin/env python
			# -*- coding: utf-8 -*-

			from openpyxl import Workbook

			wb = Workbook()
			# print(wb.get_sheet_names()) 
			#Get the name of the sheet

			ws = wb.get_sheet_by_name('Sheet') 
			#Get sheet object
			# print(ws.title) 
			#Get the title of the sheet

			ws.title = 'Student' 
			#Set up a new sheet

			#Setting content
			ws['A1'] = 'Hello World'

			import datetime

			ws['A2'] = datetime.datetime.now()

	wb.save('new_sample.xlsx')

Operation effect

3. Using openpyxl module to operate the spreadsheet (sum, average)

An example of the table is as follows:

#!/usr/bin/env python
			# -*- coding: utf-8 -*-

			import openpyxl
			from openpyxl.styles import Alignment

			def process_worksheet(sheet):
			avg_column = sheet.max_column + 1 
			#Average, store in last column
			sum_column = sheet.max_column + 2 
			#Sum, store in the last second column

			for row in sheet.iter_rows(min_row=2, min_col=2):
			scores = [cell.value for cell in row] 
			#Get the value of a row
			sum_score = sum(scores) 
			#Find the sum of a line
			avg_score = sum_score / len(scores) 
			#Find the average of a row
			avg_cell = sheet.cell(row=row[0].row, column=avg_column)
			sum_cell = sheet.cell(row=row[0].row, column=sum_column)
			avg_cell.value = avg_score 
			#Navigate to cell, set total score
			sum_cell.value = sum_score 
			#Navigate to cell, set average

			#Set alignment, horizontal is right, vertical is center
			align = Alignment(horizontal='left', vertical='center', wrap_text=True)
			avg_cell.alignment = align
			sum_cell.alignment = align

			#Set the title of the average score and total score
			sheet.cell (row=1, column=avg_ Column. Value = 'average score'
			sheet.cell (row=1, column=sum_ Column. Value = 'total score'

			def main():
			wb = openpyxl.load_workbook('example.xlsx')
			sheet = wb.get_sheet_by_name('Sheet1')
			process_worksheet(sheet)
			wb.save('example.xlsx')

			if __name__ == '__main__':
	main()

Operation effect

4. Combine multiple spreadsheets into one spreadsheet

Prepare 3 spreadsheets

Employee 1.xlsx

Employee 2.xlsx

Employee 3.xlsx

#!/usr/bin/env python
				# -*- coding: utf-8 -*-

				import openpyxl
				import glob
				import os

				def merge_xlsx_files(xlsx_files):
				wb = openpyxl.load_workbook(xlsx_files[0]) 
				#Open the first spreadsheet
				ws =  wb.active  #Activate worksheet
				ws.title = 'merged result' 
				#Consolidated results

				for filename in xlsx_files[1:]:
				workbook = openpyxl.load_workbook(filename)
				sheet = workbook.active 
				#Activate worksheet
				for row in sheet.iter_rows(min_row=2): 
				#Start iteration from the second line
				values = [cell.value for cell in row] 
				#Returns the value of a column to the list type
				ws.append(values) 
				#Add the list to the new table
				return wb

				def get_all_xlsx_files(path):
				"" "specify the suffix name, get all file types related to the suffix, and return to the list" ""
				xlsx_files = glob.glob(os.path.join(path, '*.xlsx'))
				sorted(xlsx_files, key=str.lower) 
				#Sort
				return xlsx_files

				def main():
				path =  os.path.join ( os.path.dirname ( os.getcwd ()), 'temporary test', 'excel' 
				#Directory self configuration
				xlsx_files = get_all_xlsx_files(path)
				wb = merge_xlsx_files(xlsx_files)
				wb.save('merge_data.xlsx') 
				#Save data to hard disk

				if __name__ == '__main__':
		main()

Operation effect (the hard disk has an extra spreadsheet merge_ data.xlsx )