解析oracle的DDL语句生成高斯内表及表字段主键配置

oracle的DDL语句如下:

CREATE TABLE TPPROD.CONFIG (
	NO VARCHAR2(50),
	CONFIGCODE VARCHAR2(400),
	CONFIGVALUE VARCHAR2(400),
	CONSTRAINT PK_GUENDORASSISTCONFIG PRIMARY KEY (NO,CONFIGCODE)
);
CREATE UNIQUE INDEX PK_GUENDORASSISTCONFIG ON TPPROD.GUENDORASSISTCONFIG (NO,CONFIGCODE);

解析oracle表生成高斯内表入口:Oracle2GaussCreateMana.scala

package com.tpiods.sqoop.oracle2gauss

import java.io.{File, FileInputStream, PrintWriter}
/**
 * 输入: oracle建表语句
 * 输出: gauss内表建表语句
 */
object Oracle2GaussCreateMana {
  def main(args: Array[String]): Unit = {
    // 指定输入sql语句的文件
    val input = "ods_etl/src/main/resources/work0409_test/test_tab.sql"
    val lines = scala.io.Source.fromInputStream(new FileInputStream(input)).getLines()

    val str = readSql(lines)
    val str1 = replaceSql(str)
    val str2 = addTeacCols3(str1)
    // 指定输出sql语句的文件
    val output1 = "ods_etl/src/main/resources/work0409_test/test_tab_mana.sql"
    val writer = new PrintWriter(new File(output1))
    writer.write(str2)
    writer.close()
  }

  /**
   * 处理建表语句的转换逻辑
   * @param lines
   * @return
   */
  def readSql(lines: Iterator[String]): String = {
    val sb = new StringBuilder
    var colCnt = 0
    var pkCols: String = ""
    for (line <- lines if line != null) {
      if (line.startsWith("CREATE TABLE") || line.startsWith("  CREATE TABLE") || line.startsWith("  CREATE TABLE")) {
        colCnt = colCnt + 1
        val tabName = line.split("\\.")(1).replaceAll(" ", "").replaceAll("\\(", "")
        if (sb.length >= 1) {
          sb.append(
            s"""
              |set search_path = ods;
              |drop table if exists ${tabName};
              |create table if not exists ${tabName} (\t\n""".stripMargin)
        } else {
          sb.append(
            s"""
              |set search_path = ods;
              |drop table if exists ${tabName};
              |create table if not exists ${tabName} (\t\n""".stripMargin)
        }
      }
      if (line.startsWith("\t\"")  || line.startsWith("\t")) {
        val split = line.split("\\s+")
        if (!("CONSTRAINT" == split(1)) && split.length >= 3) {
          if (colCnt == 1) {
            sb.append(" ").append(split(1)).append("\t").append(split(2)).append("\n")
          } else {
            sb.append(",").append(split(1)).append("\t").append(split(2)).append("\n")
          }
        }
        colCnt = 0
      }
      if (line.startsWith("   (") || line.startsWith(" (    ")) {
        val split = line.split("\\s+")
        sb.append(split(2)).append("\t").append(split(3)).append("\n")
      }
      if (line.indexOf("PRIMARY KEY ")> 0) {
        pkCols = line.split("PRIMARY KEY ")(1)
      }
      if (line.startsWith(");") || line.endsWith("MOVEMENT  |") || line.startsWith("   )")) {
//        sb.append(
//          s""") WITH (orientation=column, compression=low)
//            |DISTRIBUTE BY HASH${pkCols}
//            |;
//            |""".stripMargin)
        // 不按主键做hash,则打开下面这段代码
        sb.append(
          s""");
             |""".stripMargin)
      }
    }
    sb.toString()
  }

  /**
   * oracle数据类型替换成gauss对应的数据类型
   * @param str
   * @return
   */
  def replaceSql(str: String): String = {
    str.toLowerCase()
      .replaceAll("\"", "")
      .replaceAll("number\\(\\*+,\\d*\\)", "number")
      .replaceAll(",\n", "\n")
      .replaceAll("\\b" + "nchar" + "\\b", "char")
      .replaceAll("\\b" + "varchar2|nvarchar2" + "\\b", "varchar")
      .replaceAll("\\b" + "long" + "\\b", "text")
  }


  /**
   *
   * @param lines2 读入字段类型替换后的sql
   * @return 业务字段之后增加技术字段
   */
  def addTeacCols(lines2: String): String = {
    lines2.replaceAll("\\) with",
      """,oper	varchar
        |,mtime	numeric(38,5)
        |,source_sys	varchar
        |,etl_time	varchar
        |,etl_date	varchar
        |)
        |with""".stripMargin)
  }

  /**
   *不按主键做hash,调用该方法
   * @param lines2 读入字段类型替换后的sql
   * @return 业务字段之后增加技术字段
   */
  def addTeacCols2(lines2: String): String = {
    lines2.replaceAll("\\);",
      """,oper	varchar
        |,mtime	numeric(38,5)
        |,source_sys	varchar
        |,etl_time	varchar
        |,etl_date	varchar
        |);
        |""".stripMargin)
  }

  /**
   * 按主键做hash
   * @param lines2 读入字段类型替换后的sql
   * @return 业务字段之前增加技术字段
   */
  def addTeacCols3(lines2: String): String = {
    lines2.replaceAll("\t\n",
      """
        | tabname varchar
        |,oper	varchar
        |,mtime	numeric(38,5)
        |,source_sys	varchar
        |,etl_time	varchar
        |,etl_date	varchar
        |,""".stripMargin)
  }

  /**
   * 不按主键做hash,调用这个方法
   * @param lines2 读入字段类型替换后的sql
   * @return 业务字段之前增加技术字段
   */
  def addTeacCols4(lines2: String): String = {
    lines2.replaceAll("\t\n",
      """
        | tabname varchar
        |,oper	varchar
        |,mtime	numeric(38,5)
        |,source_sys	varchar
        |,etl_time	varchar
        |,etl_date	varchar
        |,""".stripMargin)
  }
}

运行以上代码,输出如下:

set search_path = ods;
drop table if exists config;
create table if not exists config (
 tabname varchar
,oper	varchar
,mtime	numeric(38,5)
,source_sys	varchar
,etl_time	varchar
,etl_date	varchar
, no	varchar(50)
,configcode	varchar(400)
,configvalue	varchar(400)
);

解析oracle表生成表名字段主键配置入口:OracleGeneTabColsPk.scala

package com.tpiods.sqoop.oracle2gauss

import java.io.{File, FileInputStream, PrintWriter}
/**
 * 输入: oracle源系统建表语句
 * 输出: 表名|列名...|主键
 */
object OracleGeneTabColsPk {
  def main(args: Array[String]): Unit = {
    // 指定输入sql语句的文件
    val input = "ods_etl/src/main/resources/work0409_test/test_tab.sql"
    val lines = scala.io.Source.fromInputStream(new FileInputStream(input)).getLines()

    val str = readSql(lines)
    // 指定输出sql语句的文件
    val output = "ods_etl/src/main/resources/work0409_test/test_tab_tabcolspk.txt"
    val writer = new PrintWriter(new File(output))
    writer.write(str)
    writer.close()
  }
  def readSql(lines: Iterator[String]): String = {
    val sb = new StringBuilder
    var tableName: String = null
    for (tempString <- lines if tempString != null) {
      if (tempString.startsWith("CREATE TABLE") || tempString.startsWith("  CREATE TABLE")) {
        tableName = tempString.split("\\.")(1).replaceAll(" ", "")
        sb.append("\n").append(tableName).append("|")
      } else if (tempString.indexOf("PRIMARY KEY") >= 0) {
        val pkName = tempString.split("\\(")(1)
              .replaceAll(" ", "")
              .replaceAll("\\)", "")
        sb.append("|").append(pkName)
      }
      else if (tempString.startsWith("   (") || tempString.startsWith(" (    ")) {
        val split = tempString.split("\\s+")
        sb.append(split(2)).append(",")
      }
      else if (tempString.startsWith("\t\"") || tempString.startsWith("\t")) {
        val split = tempString.split("\\s+")
        if (!("CONSTRAINT" == split(1)) && split.length >= 3) {
          sb.append(split(1)).append(",")
        }
      }
    }
    val sb2 = sb.toString().toLowerCase()
        .replaceAll("\"", "")
        .replaceAll(",\n", "\n")
        .replaceAll(",\\|", "\\|")
        .replaceAll("\\(\\|", "\\|")
    if (sb2.endsWith(",")) {
      sb2.substring(0, sb2.length() - 1)
    } else {
      sb2
    }
  }

}

运行以上代码,输出如下:

config|no,configcode,configvalue|no,configcode

最近更新

  1. docker php8.1+nginx base 镜像 dockerfile 配置

    2024-04-11 12:12:03       98 阅读
  2. Could not load dynamic library ‘cudart64_100.dll‘

    2024-04-11 12:12:03       106 阅读
  3. 在Django里面运行非项目文件

    2024-04-11 12:12:03       87 阅读
  4. Python语言-面向对象

    2024-04-11 12:12:03       96 阅读

热门阅读

  1. LVS NAT、DR、FULL-NAT 模式介绍

    2024-04-11 12:12:03       34 阅读
  2. 浅入浅出容器化部署

    2024-04-11 12:12:03       38 阅读
  3. 访问网站时IP被阻止?解决方法

    2024-04-11 12:12:03       57 阅读
  4. 解决moviepy保存的视频画质不清晰问题

    2024-04-11 12:12:03       38 阅读
  5. 4.9

    4.9

    2024-04-11 12:12:03      37 阅读
  6. 使用 IEEE (1735) Verilog 标准机制进行 IP 保护

    2024-04-11 12:12:03       41 阅读
  7. ISBN 正则表达式及代码示例

    2024-04-11 12:12:03       39 阅读
  8. Leetcode-1702-修改后的最大二进制字符串-c++

    2024-04-11 12:12:03       42 阅读
  9. vue监听键盘回车事件的三种方法

    2024-04-11 12:12:03       32 阅读
  10. 学习基于pytorch的VGG图像分类 day3

    2024-04-11 12:12:03       37 阅读
  11. 面试算法-168-LRU 缓存

    2024-04-11 12:12:03       32 阅读