insertTriple.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106
  1. # import pandas as pd
  2. #
  3. # # 1. 读取 Excel 文件(假设文件名为 data.xlsx,工作表为第一个)
  4. # excel_file = 'D:\Study\postgraduate\project\北交新平台\北交大屏数据依赖关系.xlsx'
  5. #
  6. # # 只读取需要的三列
  7. # df = pd.read_excel(excel_file, usecols=['Block', 'Node(dd接口)', 'DD-DB'])
  8. #
  9. # for index, row in df.iterrows():
  10. # block = row['Block']
  11. # node = row['Node(dd接口)']
  12. # dd_db_raw = str(row['DD-DB']) # 转为字符串,避免空值出错
  13. # dd_db_list = dd_db_raw.splitlines() # 根据换行符拆分
  14. #
  15. # print(f"Row {index}: Block = {block}, Node = {node}, DD-DB 列表 = {dd_db_list}")
  16. #
  17. #
  18. #
  19. import pandas as pd
  20. import pymysql
  21. import uuid
  22. # 连接数据库
  23. conn = pymysql.connect(
  24. host='dujunlong',
  25. user='root',
  26. password='123456',
  27. database='knowledge_graph',
  28. charset='utf8mb4'
  29. )
  30. cursor = conn.cursor()
  31. # Block-Node
  32. BN_relation_id = '1c1291dd463d4879ad377efd20d1a57b'
  33. # Node → DD-DB 的关系 ID
  34. ND_relation_id = '95e596b8921a445989234027495289b5'
  35. # 读取 Excel
  36. df = pd.read_excel('D:\Study\postgraduate\project\北交新平台\北交大屏数据依赖关系.xlsx', usecols=['Block', 'Node(dd接口)', 'DD-DB'])
  37. # 清洗列名(防止出错)
  38. df.columns = df.columns.str.strip()
  39. # 定义查询函数
  40. def get_id_by_name(table, name):
  41. sql = f"SELECT node_guid FROM {table} WHERE node_name = %s"
  42. cursor.execute(sql, (name,))
  43. result = cursor.fetchone()
  44. return result[0] if result else None
  45. # 定义查询函数
  46. def get_id_by_name_type(table, name):
  47. sql = f"SELECT node_guid FROM {table} WHERE node_name = %s and node_type = 'DD-DB'"
  48. cursor.execute(sql, (name,))
  49. result = cursor.fetchone()
  50. return result[0] if result else None
  51. # 三元组插入函数
  52. def insert_triple(head_id, relation_id, tail_id):
  53. triple_id = uuid.uuid4().hex # UUID 去掉 -
  54. sql = "INSERT INTO t_triplet (tri_guid, head_guid, rela_guid, tail_guid) VALUES (%s, %s, %s, %s)"
  55. cursor.execute(sql, (triple_id, head_id, relation_id, tail_id))
  56. print(f"[插入成功] Triple: {triple_id} | {head_id} -[{relation_id}]-> {tail_id}")
  57. # 遍历每一行
  58. for index, row in df.iterrows():
  59. block_name = str(row['Block']).strip()
  60. node_name = str(row['Node(dd接口)']).strip()
  61. dd_db_list = str(row['DD-DB']).splitlines()
  62. # 查询 Block id
  63. block_id = get_id_by_name('t_node', block_name)
  64. # 查询 Node id
  65. node_id = get_id_by_name('t_node', node_name)
  66. # 查询 DD-DB 列表中每个项的 id
  67. dd_db_ids = []
  68. for dd_name in dd_db_list:
  69. dd_id = get_id_by_name_type('t_node', dd_name.strip())
  70. if dd_id:
  71. dd_db_ids.append(dd_id)
  72. # 插入 Block → Node
  73. if block_id and node_id:
  74. insert_triple(block_id, BN_relation_id, node_id)
  75. # 插入 Node → DD-DB(多个)
  76. if node_id:
  77. for dd_id in dd_db_ids:
  78. if dd_id:
  79. insert_triple(node_id, ND_relation_id, dd_id)
  80. print(f"Row {index}: Block ID = {block_id}, Node ID = {node_id}, DD-DB IDs = {dd_db_ids}")
  81. conn.commit()
  82. # 关闭连接
  83. cursor.close()
  84. conn.close()