收藏文章楼主

批量识别身份证并导出excel

版块：便民互助类型：普通作者：喔查看：4 回复：0 获赞：0 时间：2025-06-02 17:58:57

由于一些行业需要手动录入大量的身份证信息, 因此编写本软件用于减少工作量。

下载地址：

https://pan.xunlei.com/s/VORZO0ORVI_f5-QvI3vutGNgA1?pwd=6jpp#

代码：

import re
import threading
import tkinter as tk
from tkinter import filedialog, messagebox, ttk
from openpyxl import Workbook
from openpyxl.drawing.image import Image as XLImage
from openpyxl.styles import Alignment
from paddlenlp import Taskflow
from paddleocr import PaddleOCR
# 初始化 OCR 和 NLP 模型
ocr = PaddleOCR(use_angle_cls=True, lang="ch", use_gpu=False)
schema = ['地址', '姓名', '时间']
ie = Taskflow('information_extraction', schema=schema, model='uie-base')
def extract_info_from_image(image_path):
try:
result = ocr.ocr(image_path, cls=True)
all_text = ""
for res in result:
for line in res:
text = line[1][0]
if text:
all_text += text + " "
# 提取结构化信息
name = nation = birth = id_number = ""
# 提取民族和身份证号（通过关键词匹配）
for res in result:
for i, line in enumerate(res):
text = line[1][0]
if '民族' in text:
match = re.search(r'民族(.+)', text)
if match:
nation = match.group(1).strip()
if '公民身份号码' in text:
if i + 1 < len(res):
next_text = res[i + 1][1][0]
if re.match(r'\d{17}[\dXx]', next_text):
id_number = next_text.strip()
# 使用 IE 模型提取姓名、出生日期
ie_result = ie(all_text)
if ie_result and isinstance(ie_result, list):
info = ie_result[0]
if '姓名' in info and info['姓名']:
name = info['姓名'][0]['text']
if '时间' in info and info['时间']:
birth = info['时间'][0]['text']
return {
"姓名": name,
"民族": nation,
"出生日期": birth,
"身份证号": id_number,
"图片路径": image_path
}
except Exception as e:
print(f"处理 {image_path} 失败: {e}")
return None
class OCRApp:
def __init__(self, root):
self.root = root
self.root.title("身份证批量识别工具")
self.root.geometry("400x200")
self.label = tk.Label(root, text="点击下方按钮选择身份证图片", font=("Arial", 14))
self.label.pack(pady=20)
self.btn = tk.Button(root, text="选择图片", command=self.start_batch_ocr)
self.btn.pack(pady=10)
self.progress = ttk.Progressbar(root, orient="horizontal", length=300, mode="indeterminate")
self.progress.pack(pady=10)
def start_batch_ocr(self):
# 开始文件选择
file_paths = filedialog.askopenfilenames(
title="选择身份证图片",
filetypes=[("Image Files", "*.jpg *.jpeg *.png *.bmp")]
)
if not file_paths:
return
# 显示进度条
self.progress.start()
self.btn.config(state=tk.DISABLED)
# 在子线程中处理 OCR
thread = threading.Thread(target=self.process_files, args=(file_paths,))
thread.start()
def process_files(self, file_paths):
wb = Workbook()
ws = wb.active
ws.append(["图片", "姓名", "民族", "出生日期", "身份证号"])
row_idx = 2 # 从第二行开始插入数据
for path in file_paths:
info = extract_info_from_image(path)
if info:
ws.cell(row=row_idx, column=2, value=info["姓名"])
ws.cell(row=row_idx, column=3, value=info["民族"])
ws.cell(row=row_idx, column=4, value=info["出生日期"])
ws.cell(row=row_idx, column=5, value=info["身份证号"])
try:
img = XLImage(path)
img.width = 500
img.height = 300
ws.row_dimensions[row_idx].height = img.height # 设置行高为图片高度
ws.add_image(img, f"A{row_idx}")
# 设置 A 列宽度为图片宽度（近似值）
ws.column_dimensions['A'].width = img.width * 0.14 # 粗略换算像素到 Excel 列宽单位
except Exception as e:
print(f"无法插入图片 {path}: {e}")
# 所有列居中对齐
for col in range(1, 6): # A~E 列
cell = ws.cell(row=row_idx, column=col)
cell.alignment = Alignment(horizontal='center', vertical='center')
row_idx += 1
# 设置表头居中
for col in range(1, 6):
header_cell = ws.cell(row=1, column=col)
header_cell.alignment = Alignment(horizontal='center', vertical='center')
output_path = "身份证识别结果.xlsx"
wb.save(output_path)
self.progress.stop()
self.btn.config(state=tk.NORMAL)
messagebox.showinfo("完成", f"识别完成，结果已保存至 {output_path}")
if __name__ == "__main__":
root = tk.Tk()
app = OCRApp(root)
root.mainloop()![](
)

有些梦虽然遥不可及，但并不是不可能实现。

微信红包封面

回复列表

默认热门正序倒序

首 1 尾

暂无用户组

退出

等级：0级

金钱：

游客：

后台控制面板

站长交流论坛