123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106 |
- # import pandas as pd
- #
- # # 1. 读取 Excel 文件(假设文件名为 data.xlsx,工作表为第一个)
- # excel_file = 'D:\Study\postgraduate\project\北交新平台\北交大屏数据依赖关系.xlsx'
- #
- # # 只读取需要的三列
- # df = pd.read_excel(excel_file, usecols=['Block', 'Node(dd接口)', 'DD-DB'])
- #
- # for index, row in df.iterrows():
- # block = row['Block']
- # node = row['Node(dd接口)']
- # dd_db_raw = str(row['DD-DB']) # 转为字符串,避免空值出错
- # dd_db_list = dd_db_raw.splitlines() # 根据换行符拆分
- #
- # print(f"Row {index}: Block = {block}, Node = {node}, DD-DB 列表 = {dd_db_list}")
- #
- #
- #
- import pandas as pd
- import pymysql
- import uuid
- # 连接数据库
- conn = pymysql.connect(
- host='dujunlong',
- user='root',
- password='123456',
- database='knowledge_graph',
- charset='utf8mb4'
- )
- cursor = conn.cursor()
- # Block-Node
- BN_relation_id = '1c1291dd463d4879ad377efd20d1a57b'
- # Node → DD-DB 的关系 ID
- ND_relation_id = '95e596b8921a445989234027495289b5'
- # 读取 Excel
- df = pd.read_excel('D:\Study\postgraduate\project\北交新平台\北交大屏数据依赖关系.xlsx', usecols=['Block', 'Node(dd接口)', 'DD-DB'])
- # 清洗列名(防止出错)
- df.columns = df.columns.str.strip()
- # 定义查询函数
- def get_id_by_name(table, name):
- sql = f"SELECT node_guid FROM {table} WHERE node_name = %s"
- cursor.execute(sql, (name,))
- result = cursor.fetchone()
- return result[0] if result else None
- # 定义查询函数
- def get_id_by_name_type(table, name):
- sql = f"SELECT node_guid FROM {table} WHERE node_name = %s and node_type = 'DD-DB'"
- cursor.execute(sql, (name,))
- result = cursor.fetchone()
- return result[0] if result else None
- # 三元组插入函数
- def insert_triple(head_id, relation_id, tail_id):
- triple_id = uuid.uuid4().hex # UUID 去掉 -
- sql = "INSERT INTO t_triplet (tri_guid, head_guid, rela_guid, tail_guid) VALUES (%s, %s, %s, %s)"
- cursor.execute(sql, (triple_id, head_id, relation_id, tail_id))
- print(f"[插入成功] Triple: {triple_id} | {head_id} -[{relation_id}]-> {tail_id}")
- # 遍历每一行
- for index, row in df.iterrows():
- block_name = str(row['Block']).strip()
- node_name = str(row['Node(dd接口)']).strip()
- dd_db_list = str(row['DD-DB']).splitlines()
- # 查询 Block id
- block_id = get_id_by_name('t_node', block_name)
- # 查询 Node id
- node_id = get_id_by_name('t_node', node_name)
- # 查询 DD-DB 列表中每个项的 id
- dd_db_ids = []
- for dd_name in dd_db_list:
- dd_id = get_id_by_name_type('t_node', dd_name.strip())
- if dd_id:
- dd_db_ids.append(dd_id)
- # 插入 Block → Node
- if block_id and node_id:
- insert_triple(block_id, BN_relation_id, node_id)
- # 插入 Node → DD-DB(多个)
- if node_id:
- for dd_id in dd_db_ids:
- if dd_id:
- insert_triple(node_id, ND_relation_id, dd_id)
- print(f"Row {index}: Block ID = {block_id}, Node ID = {node_id}, DD-DB IDs = {dd_db_ids}")
- conn.commit()
- # 关闭连接
- cursor.close()
- conn.close()
|