No articles found
Try different keywords or browse our categories
Generate Excel Files from Raw Data with Python
Quick guide to creating Excel files from raw data using Python. Learn to use openpyxl, xlsxwriter, and pandas for Excel generation.
Need to convert raw data into Excel files? Python makes it simple with three powerful libraries. Here’s how to generate Excel files quickly and efficiently.
Quick Setup
pip install openpyxl xlsxwriter pandas
Method 1: Using Pandas (Simplest)
Perfect for converting data structures to Excel:
import pandas as pd
# From dictionary
data = {
'Name': ['John', 'Sarah', 'Mike'],
'Age': [28, 34, 45],
'Salary': [50000, 65000, 75000]
}
df = pd.DataFrame(data)
df.to_excel('output.xlsx', index=False, sheet_name='Employees')
Multiple sheets:
import pandas as pd
employees = pd.DataFrame({
'Name': ['John', 'Sarah'],
'Department': ['IT', 'HR']
})
departments = pd.DataFrame({
'Department': ['IT', 'HR'],
'Budget': [100000, 75000]
})
with pd.ExcelWriter('company.xlsx') as writer:
employees.to_excel(writer, sheet_name='Employees', index=False)
departments.to_excel(writer, sheet_name='Departments', index=False)
Method 2: Using openpyxl (More Control)
Best when you need cell-level control:
from openpyxl import Workbook
from openpyxl.styles import Font, Alignment
wb = Workbook()
ws = wb.active
ws.title = "Sales Report"
# Headers
headers = ['Product', 'Quantity', 'Price', 'Total']
ws.append(headers)
# Style headers
for cell in ws[1]:
cell.font = Font(bold=True)
cell.alignment = Alignment(horizontal='center')
# Add data
data = [
['Laptop', 5, 1000, 5000],
['Mouse', 20, 25, 500],
['Keyboard', 15, 75, 1125]
]
for row in data:
ws.append(row)
# Adjust column widths
for column in ws.columns:
max_length = max(len(str(cell.value)) for cell in column)
ws.column_dimensions[column[0].column_letter].width = max_length + 2
wb.save('sales.xlsx')
Method 3: Using xlsxwriter (Advanced Formatting)
Ideal for reports with charts and formatting:
import xlsxwriter
workbook = xlsxwriter.Workbook('report.xlsx')
worksheet = workbook.add_worksheet('Monthly Sales')
# Define formats
header_format = workbook.add_format({
'bold': True,
'bg_color': '#4472C4',
'font_color': 'white',
'align': 'center'
})
currency_format = workbook.add_format({'num_format': '$#,##0.00'})
# Headers
headers = ['Month', 'Revenue', 'Expenses', 'Profit']
worksheet.write_row('A1', headers, header_format)
# Data
data = [
['January', 50000, 30000, 20000],
['February', 55000, 32000, 23000],
['March', 48000, 28000, 20000]
]
for row_num, row_data in enumerate(data, start=1):
worksheet.write_string(row_num, 0, row_data[0])
worksheet.write_number(row_num, 1, row_data[1], currency_format)
worksheet.write_number(row_num, 2, row_data[2], currency_format)
worksheet.write_number(row_num, 3, row_data[3], currency_format)
# Add chart
chart = workbook.add_chart({'type': 'column'})
chart.add_series({
'name': 'Revenue',
'categories': '=Monthly Sales!$A$2:$A$4',
'values': '=Monthly Sales!$B$2:$B$4',
})
worksheet.insert_chart('F2', chart)
workbook.close()
Convert CSV to Excel
import pandas as pd
# Single CSV
df = pd.read_csv('data.csv')
df.to_excel('data.xlsx', index=False)
# Multiple CSV files to sheets
import glob
csv_files = glob.glob('*.csv')
with pd.ExcelWriter('combined.xlsx') as writer:
for csv_file in csv_files:
df = pd.read_csv(csv_file)
sheet_name = csv_file.replace('.csv', '')
df.to_excel(writer, sheet_name=sheet_name, index=False)
Convert JSON to Excel
import pandas as pd
import json
# From JSON file
with open('data.json', 'r') as f:
data = json.load(f)
df = pd.DataFrame(data)
df.to_excel('output.xlsx', index=False)
# From nested JSON
data = {
'users': [
{'name': 'John', 'email': 'john@example.com'},
{'name': 'Sarah', 'email': 'sarah@example.com'}
]
}
df = pd.DataFrame(data['users'])
df.to_excel('users.xlsx', index=False)
Database to Excel
import pandas as pd
import sqlite3
# SQLite
conn = sqlite3.connect('database.db')
df = pd.read_sql_query("SELECT * FROM users", conn)
df.to_excel('users.xlsx', index=False)
conn.close()
# PostgreSQL/MySQL (requires psycopg2 or pymysql)
from sqlalchemy import create_engine
engine = create_engine('postgresql://user:pass@localhost/dbname')
df = pd.read_sql_query("SELECT * FROM orders", engine)
df.to_excel('orders.xlsx', index=False)
Add Formulas
import xlsxwriter
workbook = xlsxwriter.Workbook('formulas.xlsx')
worksheet = workbook.add_worksheet()
# Data
worksheet.write('A1', 'Item')
worksheet.write('B1', 'Price')
worksheet.write('C1', 'Quantity')
worksheet.write('D1', 'Total')
data = [
['Widget', 10, 5],
['Gadget', 20, 3],
['Doohickey', 15, 7]
]
for row, (item, price, qty) in enumerate(data, start=1):
worksheet.write(row, 0, item)
worksheet.write(row, 1, price)
worksheet.write(row, 2, qty)
worksheet.write_formula(row, 3, f'=B{row+1}*C{row+1}')
# Sum total
worksheet.write_formula('D5', '=SUM(D2:D4)')
workbook.close()
Batch Processing
import pandas as pd
import os
def process_files_to_excel(input_dir, output_file):
"""Convert all CSV files in directory to Excel sheets."""
with pd.ExcelWriter(output_file) as writer:
for filename in os.listdir(input_dir):
if filename.endswith('.csv'):
filepath = os.path.join(input_dir, filename)
df = pd.read_csv(filepath)
sheet_name = filename.replace('.csv', '')[:31] # Excel sheet name limit
df.to_excel(writer, sheet_name=sheet_name, index=False)
print(f"Created {output_file}")
# Usage
process_files_to_excel('./data', 'combined_report.xlsx')
Format Data Types
import pandas as pd
data = {
'Date': ['2024-01-01', '2024-01-02', '2024-01-03'],
'Sales': [1000.50, 2500.75, 1750.25],
'Quantity': [10, 25, 17]
}
df = pd.DataFrame(data)
df['Date'] = pd.to_datetime(df['Date'])
with pd.ExcelWriter('formatted.xlsx', engine='xlsxwriter') as writer:
df.to_excel(writer, sheet_name='Sales', index=False)
workbook = writer.book
worksheet = writer.sheets['Sales']
# Format columns
date_format = workbook.add_format({'num_format': 'yyyy-mm-dd'})
money_format = workbook.add_format({'num_format': '$#,##0.00'})
worksheet.set_column('A:A', 12, date_format)
worksheet.set_column('B:B', 12, money_format)
Quick Tips
Choose the right library:
- Pandas: Quick data conversion, simple formatting
- openpyxl: Reading/modifying existing files
- xlsxwriter: Complex formatting, charts, performance
Performance:
- xlsxwriter is fastest for large files
- Pandas is most convenient for data manipulation
- openpyxl best for editing existing files
Common gotchas:
- Excel sheet names limited to 31 characters
- Cannot have special characters in sheet names:
\ / * [ ] : ? - Close workbooks after writing (xlsxwriter)
- Use context managers with pandas ExcelWriter
Conclusion
Python makes Excel generation straightforward. Use pandas for quick conversions, openpyxl for cell-level control, and xlsxwriter for advanced formatting. Choose based on your needs and start automating spreadsheet creation today.
Related Articles
Read and Write CSV Files with Python
Simple guide to reading and writing CSV files in Python using csv module and pandas. Quick examples for data processing.
Python FFMPEG Integration: Edit Videos in Terminal
Master video editing from the command line using Python and FFmpeg. Learn to trim, merge, compress, and manipulate videos programmatically.
Python ReportLab Tutorial: Edit PDF Files in Terminal
Learn how to use Python's ReportLab library to create and edit PDF files directly from the command line with practical examples.