- 环境说明:pycharm2022.3.2连接虚拟机远程环境,具体详见上一篇。
import pymysql
from pyhive import hive
hive_conn = hive.Connection(host='hadoop102',
port=10000,
database='db_hive')
hive_cursor = hive_conn.cursor()
mysql_conn = pymysql.connect(host='hadoop102', user='xxxx',
password='xxxxxx', database='xxx')
mysql_cursor = mysql_conn.cursor()
hive_cursor.execute('SHOW TABLES')
tables = hive_cursor.fetchall()
for table_name in tables:
print(f'DESCRIBE {table_name[0]}')
hive_cursor.execute(f'DESCRIBE {table_name[0]}')
columns = hive_cursor.fetchall()
create_table_sql = f'CREATE TABLE {table_name[0]} ('
for column in columns:
if column[1].startswith('string'):
data_type = 'VARCHAR(255)'
elif column[1].startswith('int'):
data_type = 'INT'
elif column[1].startswith('double'):
data_type = 'DOUBLE'
else:
data_type = 'VARCHAR(255)'
create_table_sql += f'{column[0]} {data_type}, '
print(create_table_sql)
create_table_sql = create_table_sql[:-2] + ')'
print(create_table_sql)
mysql_cursor.execute(create_table_sql)
hive_cursor.close()
hive_conn.close()
mysql_cursor.close()
mysql_conn.close()