OpenCV表格图片寻找有效的x、y坐标并删除异常点

需求描述:

对表格图片,识别出表格里的横、纵坐标列表,并剔除异常点

解决方法:

  1. 通过opencv的getStructuringElement识别出横、竖线
  2. 通过bitwise_and取得交点并去除表格线
  3. 获取x和y的所有可能点,按照相邻点不超过阈值来筛选每一行、列最大的y和x
  4. 对于个别异常点通过卡图片临近点阈值、面积过滤、自定义异常筛选剔除
  5. 自定义异常筛选主要是通过对对相邻坐标数据进行统计,少于指定阈值认为是异常点

import cv2
import pandas as pd
import numpy as np

def outset(df):
    df['diff'] = df.diff(periods=-1)
    df.fillna(0, inplace=True)
    df['flag'] = df['diff'].apply(lambda x: 1 if abs(x) > 10 else 0)
    df.at[len(df) - 1, 'flag'] = 1
    group0 = 1
    for row_index, row_data in df.iterrows():
        df.at[row_index, 'group0'] = group0
        if row_data['flag'] == 1:
            group0 += 1    df = df.astype(int)
    grouped_df = df.groupby('group0').count()
    df.to_csv(r"D:/df.csv")
    filter_df = grouped_df[grouped_df['flag'] <= 40] #60
    filter_df = filter_df.reset_index()
    finadf=df[df['group0'].isin(list(filter_df['group0']))]['point']
    finslist=list(finadf)
    print("异常X坐标********")
    print(sorted(list(set(finslist))))
    print("异常X坐标********")
    return list(set(finslist))

def seg_pic(img):
    image = cv2.imread(img, 1)
    w,h = image.shape[0:2]
    print(w,h)
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    binary = cv2.adaptiveThreshold(~gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 35, -5)

    rows, cols = binary.shape
    scale = 40
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (cols // scale, 1))
    eroded = cv2.erode(binary, kernel, iterations=1)
    dilatedcol = cv2.dilate(eroded, kernel, iterations=1)

    scale = 20
    kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, rows // scale))
    eroded = cv2.erode(binary, kernel, iterations=1)
    dilatedrow = cv2.dilate(eroded, kernel, iterations=1)

    bitwiseAnd = cv2.bitwise_and(dilatedcol, dilatedrow)
    # 标识表格
    merge = cv2.add(dilatedcol, dilatedrow)

    merge2 = cv2.subtract(binary, bitwiseAnd)
    cv2.imwrite(('D:/bitwiseAnd/'+img.split("/")[-1] ), bitwiseAnd)
    ys, xs = np.where(bitwiseAnd > 0)

    mylisty = [] 
    mylistx = [] 

    i = 0
    myxs = np.sort(xs)
    myxs = np.delete(myxs, np.where(myxs <=10))
    myxs = np.delete(myxs, np.where(myxs >= h-10))
    #pd.DataFrame(myxs).to_csv(r"myxs.csv")
    for i in range(len(myxs) - 1):
        if (myxs[i + 1] - myxs[i] > 20 and abs(myxs[i]-h)>10): #>30
            mylistx.append(myxs[i])
        i = i + 1
    mylistx.append(myxs[i])
    myys = np.sort(ys)
    #pd.DataFrame(myys).to_csv(r"myys.csv")
    tuple1 = np.where(bitwiseAnd > 0)
    for i in zip(*tuple1[::-1]):
        if i[1] <=20:
            myys = np.delete(myys, np.where(myys <= 20))
            if i[0] in mylistx:
                mylistx.remove(i[0])
        elif i[1] >= w-20:
            myys = np.delete(myys, np.where(myys >= w - 20))
            if i[0] in mylistx:
                mylistx.remove(i[0])

    i = 0
    for i in range(len(myys) - 1):
        if (myys[i + 1] - myys[i] >= 23 and abs(myys[i]-w)>10): #阈值
            mylisty.append(myys[i])
        i = i + 1
    mylisty.append(myys[i]) 
    pointx = {'point': myxs}
    dfx = pd.DataFrame(pointx)
    del_x = []
    del_y = []
    for x in outset(dfx):
        del_x.append(x)
        if x in mylistx:
            mylistx.remove(x)
    for i in zip(*tuple1[::-1]):
        for j in del_x:
            if i[0] == j:
                del_y.append(i[1])
    for j in mylisty:
        if j in del_y:
            mylisty.remove(j)
    # 面积法修正异常点
    contours, hierarchy = cv2.findContours(np.uint8(bitwiseAnd), cv2.RETR_TREE,
                                           cv2.CHAIN_APPROX_SIMPLE) 
    cnts = sort_contours(contours, method="top-to-bottom")
    x, y, w, h, t = 0,0,0,0,90
    for j in cnts:
        area = cv2.contourArea(j)
        if 100 > area >= 57: 
            rect = cv2.minAreaRect(j)
            (x, y), (w, h), t = rect
            points_rect = cv2.boxPoints(rect)
            abnormalx=int(points_rect[1][0])
            if abnormalx in mylistx:
                print("异常区域:\t",abnormalx,area)
                mylistx.remove(abnormalx)

    return image, mylistx, mylisty

if __name__ == '__main__':
    img_path = r"D:/testslope_corr/_21.png"
    print(seg_pic(img_path))

结果示例:

相关推荐

  1. opencv如何寻找图片轮廓

    2024-04-20 21:36:03       31 阅读

最近更新

  1. docker php8.1+nginx base 镜像 dockerfile 配置

    2024-04-20 21:36:03       94 阅读
  2. Could not load dynamic library ‘cudart64_100.dll‘

    2024-04-20 21:36:03       100 阅读
  3. 在Django里面运行非项目文件

    2024-04-20 21:36:03       82 阅读
  4. Python语言-面向对象

    2024-04-20 21:36:03       91 阅读

热门阅读

  1. 23种设计模式之行为模式篇

    2024-04-20 21:36:03       35 阅读
  2. Docker 部署 jenkins 并正确迁移到新服务器

    2024-04-20 21:36:03       41 阅读
  3. 【rust】解析代码有感

    2024-04-20 21:36:03       32 阅读
  4. 【LeetCode热题100】【动态规划】单词拆分

    2024-04-20 21:36:03       38 阅读
  5. 前后端分离图书管理系统项目

    2024-04-20 21:36:03       35 阅读
  6. 【华为OD机试】虚拟理财游戏【C卷|100分】

    2024-04-20 21:36:03       43 阅读
  7. 对外观模式的理解

    2024-04-20 21:36:03       35 阅读
  8. AWS-ECR 使用python一键删除ecr所有存储库

    2024-04-20 21:36:03       32 阅读
  9. 九河云:选择AWS有什么优势?

    2024-04-20 21:36:03       39 阅读
  10. 【YOLO改进】主干SE模块(基于MMYOLO)

    2024-04-20 21:36:03       36 阅读
  11. 解决AGP升级到8.0后编译报错kaptGenerateStubsDebugKotlin

    2024-04-20 21:36:03       42 阅读