umi-ocr识别文件夹所有文件并导入数据库

最近有个需求是识别身份证所有信息。所以调用umi-ocr接口并写入mysql

import os
import glob
from time import sleep
import requests
import json
import pymysql
import tkinter as tk
from tkinter import ttk

from sympy import true


def write_to_database(image_path, response_text, db_config):
    with pymysql.connect(**db_config) as conn:
        with conn.cursor() as cursor:
            sql = "INSERT INTO image_records (path, text) VALUES (%s, %s)"
            cursor.execute(sql, (image_path, response_text))
            conn.commit()


def get_image_paths(directory, extensions=['.jpg', '.png']):
    image_paths = []
    for ext in extensions:
        pattern = os.path.join(directory, f'*{ext}')
        image_paths.extend(glob.glob(pattern))
    return image_paths


def send_http_post_request(url, data, headers=None, auth=None):
    default_headers = {'Content-Type': 'application/json'}
    if headers is not None:
        headers = {**default_headers, **headers}
    else:
        headers = default_headers
    json_data = json.dumps(data)
    response = requests.post(url, data=json_data, headers=headers, auth=auth)
    return response

def returndata(directory, db_config):
    # 使用os.walk遍历目录
    for root, dirs, files in os.walk(directory):
        for filename in files:
            sleep(0.2)
            # 构建完整的文件路径
            file_path = os.path.join(root, filename)
            # 准备发送的数据
            post_data = ["--path", file_path]
            try:
                # 发送HTTP POST请求
                response = send_http_post_request("http://127.0.0.1:1224/argv", post_data)
                # 获取响应文本并替换换行符
                response_text = response.text.replace('\n', ',').replace('\t','')
                # 将响应写入数据库
                write_to_database(file_path, response_text, db_config)
            except Exception as e:
                # 打印异常信息,或者根据需要处理异常
                print(f"Error processing file {file_path}: {e}")
    return true


def on_click():
    global entry_directory
    directory = entry_directory.get()
    db_host = '127.0.0.1'
    db_port = 3306
    db_user = 'root'
    db_password = '123456'
    db_name = 'xxxxx'

    db_config = {
        'host': db_host,
        'port': db_port,
        'user': db_user,
        'password': db_password,
        'db': db_name
    }

    status = returndata(directory, db_config)

    if status:
        window.destroy()

def main():
    global window
    window = tk.Tk()
    window.title("OCR识别导入")
    window.geometry("400x200")  # 设置窗口大小(可根据需要调整)

    # 添加标签和输入框
    label_directory = ttk.Label(window, text="请输入文件夹目录:", style="TLabel")
    label_directory.grid(row=0, column=0, padx=10, pady=(10, 5), sticky="w")

    global entry_directory
    entry_directory = ttk.Entry(window, width=30, style="TEntry")
    entry_directory.grid(row=0, column=1, padx=(0, 10), pady=(10, 5), sticky="ew")

    # 添加执行按钮
    button_execute = ttk.Button(window, text="Execute", command=on_click, style="TButton")
    button_execute.grid(row=1, column=0, columnspan=2, padx=10, pady=10, ipadx=10, sticky="ew")

    window.mainloop()

if __name__ == '__main__':
    main()

之后再利用

python -m pip install pyinstaller


python -m PyInstaller -F -w  --compress test.py 打包成可执行程序

相关推荐

  1. OCR识别文字示例

    2024-04-05 21:44:02       43 阅读
  2. vue3导入文件夹导入文件导出zip、导出

    2024-04-05 21:44:02       57 阅读

最近更新

  1. docker php8.1+nginx base 镜像 dockerfile 配置

    2024-04-05 21:44:02       98 阅读
  2. Could not load dynamic library ‘cudart64_100.dll‘

    2024-04-05 21:44:02       106 阅读
  3. 在Django里面运行非项目文件

    2024-04-05 21:44:02       87 阅读
  4. Python语言-面向对象

    2024-04-05 21:44:02       96 阅读

热门阅读

  1. C语言每日一题—日期转换问题

    2024-04-05 21:44:02       47 阅读
  2. B000-1114-常量 变量 数据类型

    2024-04-05 21:44:02       38 阅读
  3. 安卓手机APP开发的功能之一:通知概述

    2024-04-05 21:44:02       36 阅读
  4. C++ templates: (4)、引用折叠

    2024-04-05 21:44:02       34 阅读
  5. 题目:求一个3*3矩阵对角线元素之和

    2024-04-05 21:44:02       33 阅读
  6. React 18 中常见的生命周期方法

    2024-04-05 21:44:02       37 阅读
  7. c语言:用for循环输出前n的阶乘

    2024-04-05 21:44:02       32 阅读