Browse Source

合并冲突

dujunlong 2 months ago
parent
commit
4cbeba2461

BIN
final/ByRules/__pycache__/util.cpython-39.pyc


+ 2 - 1
final/ByRules/similarity.py

@@ -21,6 +21,7 @@ template_dict = {
     ],
     "sjjy1_B08_output": ["某年全年累计省间交易电量是多少?"],
     "sjjy1_B01_output": ["某年某月交易电量是多少?"],
+    "sjjy1_B02_output": ["某年省间交易电量月内交易电量是多少??"],
 }
 
 # 构造模板和key的映射反向表
@@ -31,7 +32,7 @@ for key, sentences in template_dict.items():
         templates.append(s)
         key_map.append(key)
 
-query = "当月省间交易完成的交易是多少?"
+query = "2024年12月交易电量是多少?"
 
 vectorizer = TfidfVectorizer(tokenizer=jieba_tokenizer)
 tfidf_matrix = vectorizer.fit_transform([query] + templates)

+ 68 - 187
final/ByRules/similarity_answer_json.py

@@ -3,9 +3,6 @@ from sklearn.metrics.pairwise import cosine_similarity
 import jieba
 import os
 import json
-from datetime import datetime
-from typing import Tuple, List, Dict
-import re
 
 from final.ByRules.util import calculate_sum_by_time_range
 
@@ -224,155 +221,26 @@ def extract_time_location(question: str) -> Tuple[List[Dict], List[str]]:
 
     return time_results, locations
 
-# def extract_time_location(question: str) -> Tuple[List[Dict], List[str]]:
-#     current_date = datetime.now()
-#     current_year = current_date.year
-#     current_month = current_date.month
-#
-#     # 匹配绝对时间
-#     absolute_patterns = [
-#         r'(?P<year>\d{4})年(?P<month>\d{1,2})月(?P<day>\d{1,2})日',
-#         r'(?P<year>\d{4})年(?P<month>\d{1,2})月',
-#         r'(?P<year>\d{4})年'
-#     ]
-#
-#     relative_year_mapping = {
-#         '明年': current_year + 1,
-#         '今年': current_year,
-#         '去年': current_year - 1,
-#         '前年': current_year - 2
-#     }
-#
-#     season_mapping = {
-#         '一季度': (1, 3),
-#         '二季度': (4, 6),
-#         '三季度': (7, 9),
-#         '四季度': (10, 12),
-#         '上半年': (1, 6),
-#         '下半年': (7, 12)
-#     }
-#
-#     provinces = [
-#         '北京', '天津', '上海', '重庆', '河北', '山西', '辽宁', '吉林', '黑龙江',
-#         '江苏', '浙江', '安徽', '福建', '江西', '山东', '河南', '湖北', '湖南',
-#         '广东', '海南', '四川', '贵州', '云南', '陕西', '甘肃', '青海', '台湾',
-#         '内蒙古', '广西', '西藏', '宁夏', '新疆', '香港', '澳门'
-#     ]
-#
-#     time_results = []
-#     used_keywords = set()
-#
-#     # 🆕 处理“起止时间段”
-#     range_pattern = r'(?P<start>(\d{4}|今|去|前|明)年(\d{1,2})?月?)到(?P<end>(\d{4}|今|去|前|明)年(\d{1,2})?月?)'
-#     for match in re.finditer(range_pattern, question):
-#         start_raw, end_raw = match.group('start'), match.group('end')
-#
-#         def parse_relative(text):
-#             year = current_year
-#             month = None
-#             if '明年' in text:
-#                 year = current_year + 1
-#             elif '今年' in text or '今' in text:
-#                 year = current_year
-#             elif '去年' in text or '去' in text:
-#                 year = current_year - 1
-#             elif '前年' in text or '前' in text:
-#                 year = current_year - 2
-#             # 提取月份
-#             m = re.search(r'(\d{1,2})月', text)
-#             if m:
-#                 month = int(m.group(1))
-#             return year, month
-#
-#         def parse_absolute(text):
-#             m = re.match(r'(?P<year>\d{4})年(?P<month>\d{1,2})?月?', text)
-#             if m:
-#                 year = int(m.group('year'))
-#                 month = int(m.group('month')) if m.group('month') else None
-#                 return year, month
-#             return None, None
-#
-#         # 统一处理为 year/month
-#         def parse_any(text):
-#             if any(key in text for key in relative_year_mapping.keys()) or text[:1] in ['今', '去', '前', '明']:
-#                 return parse_relative(text)
-#             else:
-#                 return parse_absolute(text)
-#
-#         start_y, start_m = parse_any(start_raw)
-#         end_y, end_m = parse_any(end_raw)
-#         time_results.append({
-#             'start_year': start_y, 'start_month': start_m,
-#             'end_year': end_y, 'end_month': end_m,
-#             'label': f'{start_raw}到{end_raw}',
-#             'raw': match.group()
-#         })
-#         used_keywords.add(match.group())
-#
-#     # 相对+具体月份
-#     relative_absolute_pattern = r'(?P<relative>今|去|前)年(?P<month>\d{1,2})月'
-#     for match in re.finditer(relative_absolute_pattern, question):
-#         if match.group() in used_keywords:
-#             continue
-#         rel = match.group('relative')
-#         month = int(match.group('month'))
-#         year = {'今': current_year, '去': current_year - 1, '前': current_year - 2}.get(rel, current_year)
-#         time_results.append({'year': year, 'month': month, 'raw': match.group()})
-#         used_keywords.add(match.group())
-#
-#     # 绝对时间
-#     for pattern in absolute_patterns:
-#         for match in re.finditer(pattern, question):
-#             if match.group() in used_keywords:
-#                 continue
-#             time_info = {'raw': match.group()}
-#             gd = match.groupdict()
-#             if gd.get('year'):
-#                 time_info['year'] = int(gd['year'])
-#             if gd.get('month'):
-#                 time_info['month'] = int(gd['month'])
-#             if gd.get('day'):
-#                 time_info['day'] = int(gd['day'])
-#             time_results.append(time_info)
-#             used_keywords.add(match.group())
-#
-#     # 单独的相对年份关键词
-#     for term, year in relative_year_mapping.items():
-#         if term in question and term not in used_keywords:
-#             time_results.append({'year': year, 'label': term, 'raw': term})
-#             used_keywords.add(term)
-#
-#     # 当前/上个月
-#     if '当前' in question and '当前' not in used_keywords:
-#         time_results.append({'year': current_year, 'month': current_month, 'label': '当前', 'raw': '当前'})
-#         used_keywords.add('当前')
-#     if '上个月' in question and '上个月' not in used_keywords:
-#         prev_year = current_year if current_month > 1 else current_year - 1
-#         prev_month = current_month - 1 if current_month > 1 else 12
-#         time_results.append({'year': prev_year, 'month': prev_month, 'label': '上个月', 'raw': '上个月'})
-#         used_keywords.add('上个月')
-#
-#     # 季度和半年
-#     for term, (start_month, end_month) in season_mapping.items():
-#         if term in question and term not in used_keywords:
-#             time_results.append({
-#                 'year': current_year,
-#                 'label': term,
-#                 'start_month': start_month,
-#                 'end_month': end_month,
-#                 'raw': term
-#             })
-#             used_keywords.add(term)
-#
-#     # 地点识别
-#     locations = [p for p in provinces if p in question]
-#
-#     return time_results, locations
 # 先用 extract_time_location 判断问句包含哪类时间信息,然后只对结构匹配的模板子集做余弦匹配。
+# def classify_by_time_type(query, time_info):
+#     if any('start_year' in t and 'end_year' in t for t in time_info):
+#         return ['3']  # 时间段
+#     return list(template_dict.keys())  # fallback 所有模板
 def classify_by_time_type(query, time_info):
-    if any('start_year' in t and 'end_year' in t for t in time_info):
-        return ['3']  # 时间段
-    return list(template_dict.keys())  # fallback 所有模板
+    if not time_info:
+        # 无时间信息时,返回指定模板 19-23
+        return ['19', '20', '21', '22', '23']
+
+    time = time_info[0]
+
+    # 情况 1:起始时间和结束时间都有,判断为时间段
+    if 'start_year' in time and 'end_year' in time:
+        return ['3']  # 某年某月到某月累计交易电量
+
+    # 情况 2:有 year 和 month,精确到月
+    if 'year' in time and 'month' in time:
+        return ['2']  # 某年某月交易电量
+
 def match_template_with_time_filter(query, template_dict, tokenizer, extract_time_location_func):
     """
     先基于时间信息筛选候选模板,再进行TF-IDF匹配。
@@ -385,7 +253,6 @@ def match_template_with_time_filter(query, template_dict, tokenizer, extract_tim
 
     # 构造候选子模板字典
     filtered_template_dict = {k: template_dict[k] for k in candidate_keys}
-
     # 使用你原来的 TF-IDF 匹配函数
     return match_template(query, filtered_template_dict, tokenizer)
 # 找相似度最高的模板
@@ -455,10 +322,6 @@ def load_template_info(matched_key, json_folder):
 def process_query(query, template_dict, json_folder, tokenizer=jieba_tokenizer):
     # 提取条件
     time_info, location_info = extract_time_location(query)
-    print(time_info)
-    candidate_ids = classify_by_time_type(query, time_info)
-    # 构造候选模板
-    candidates = [template_dict[k][0] for k in candidate_ids]
 
     conditions = {}
     # 匹配模板
@@ -469,7 +332,7 @@ def process_query(query, template_dict, json_folder, tokenizer=jieba_tokenizer):
         extract_time_location_func=extract_time_location
     )
     # 定义阈值
-    similarity_threshold = 0.4
+    similarity_threshold = 0.3
     # ★ 判断相似度阈值
     if score < similarity_threshold:
         return {
@@ -487,11 +350,19 @@ def process_query(query, template_dict, json_folder, tokenizer=jieba_tokenizer):
         }
 
     if time_info:
-        year = time_info[0].get('year')
-        if year:
-            conditions['年'] = year
-        if 'month' in time_info[0]:
-            conditions['月'] = time_info[0]['month']
+        ti = time_info[0]
+        # 先判断是否是区间时间(有start_year/end_year等字段)
+        if 'start_year' in ti and 'end_year' in ti:
+            conditions['start_year'] = ti.get('start_year')
+            conditions['start_month'] = ti.get('start_month')
+            conditions['end_year'] = ti.get('end_year')
+            conditions['end_month'] = ti.get('end_month')
+        else:
+            # 单时间点
+            if 'year' in ti:
+                conditions['年'] = ti['year']
+            if 'month' in ti:
+                conditions['月'] = ti['month']
 
     if location_info:
         unit = map_location_to_unit(location_info[0])
@@ -592,32 +463,42 @@ def find_key_recursively(data, target_key):
     return results
 # query = "当月省间交易完成的交易是多少?"
 # query = "2024年1月到2月累计交易电量是多少?"
-# # query = "但同样阿贾克斯大口径的话我可合金外壳设计文件突然发?"
-# json_folder = "templatesJson"
-#
-#
-# result = process_query(query, template_dict, json_folder)
-#
-# print("匹配的模板 key:", result["matched_key"])
-# print("最相似的模板句:", result["matched_template"])
-# print("相似度分数:", result["similarity_score"])
-# print("类型:", result["type"])
-# print("关键词:", result["keywords"])
-# print("查询字段:", result["target"])
-# print("模型名字", result["name"])
-# print("条件", result["conditions"])
-# print("返回的内容是:", result["content"])
-# print("问句是:", result["query"])
-# print("动作是:", result["play"])
-
-# type = result["type"]
-# content = result["content"]
-#
-# json_data_folder = "..\Json\json_data"
-# if type == "query":
-#     result = smart_find_value(json_data_folder, result["name"],result["conditions"],result["target"] )
-# elif type == "calculate":
-#     result = calculate_sum_by_time_range(json_data_folder, result["name"], result["target"],)
+query = "2024年12月交易电量是多少?"
+# query = "但同样阿贾克斯大口径的话我可合金外壳设计文件突然发?"
+json_folder = "templatesJson"
+
+
+result = process_query(query, template_dict, json_folder)
+
+print("匹配的模板 key:", result["matched_key"])
+print("最相似的模板句:", result["matched_template"])
+print("相似度分数:", result["similarity_score"])
+print("类型:", result["type"])
+print("关键词:", result["keywords"])
+print("查询字段:", result["target"])
+print("模型名字", result["name"])
+print("条件", result["conditions"])
+print("返回的内容是:", result["content"])
+print("问句是:", result["query"])
+print("动作是:", result["play"])
+
+type = result["type"]
+content = result["content"]
+
+json_data_folder = "..\Json\json_data"
+if type == "query":
+    fileName = result["dataJsonName"]
+    result = smart_find_value(json_data_folder, fileName,result["conditions"],result["target"] )
+    print(result)
+elif type == "calculate":
+    conditions = result["conditions"]
+    start_conditions = {('年' if 'year' in k else '月'): v for k, v in conditions.items() if k.startswith('start_')}
+    end_conditions = {('年' if 'year' in k else '月'): v for k, v in conditions.items() if k.startswith('end_')}
+    print(start_conditions)
+    print(end_conditions)
+    fileName = result["dataJsonName"] + ".json"
+    result = calculate_sum_by_time_range(json_data_folder,fileName,result["target"],start_conditions, end_conditions)
+    print(result)
 #
 # # 最终回答的文本
 # final_content = content.replace("?", str(result))

+ 1 - 1
final/ByRules/templatesJson/19.json

@@ -5,5 +5,5 @@
   "target": "正在组织",
   "content": "省间交易正在组织的交易为&",
   "play": "讲述文本",
-  "name": "省间交易"
+  "name": "省间交易组织"
 }

+ 1 - 1
final/ByRules/templatesJson/20.json

@@ -5,5 +5,5 @@
   "target": "当月完成",
   "content": "省间交易当月完成的交易为&",
   "play": "讲述文本",
-  "name": "省间交易"
+  "name": "省间交易组织"
 }

+ 1 - 1
final/ByRules/templatesJson/21.json

@@ -5,5 +5,5 @@
   "target": "当年完成",
   "content": "省间交易当年完成的交易为&",
   "play": "讲述文本",
-  "name": "省间交易"
+  "name": "省间交易组织"
 }

+ 1 - 1
final/ByRules/templatesJson/22.json

@@ -5,5 +5,5 @@
   "target": "当年达成电量",
   "content": "省间交易当年达成的电量为&",
   "play": "讲述文本",
-  "name": "省间交易"
+  "name": "省间交易组织"
 }

+ 1 - 1
final/ByRules/templatesJson/23.json

@@ -5,5 +5,5 @@
   "target": "参与交易家次",
   "content": "省间交易当年参与交易的家次为&",
   "play": "讲述文本",
-  "name": "省间交易"
+  "name": "省间交易组织"
 }

+ 1 - 1
final/ByRules/templatesJson/8.1.json

@@ -5,5 +5,5 @@
   "target": "按交易周期划分",
   "content": "省间交易电量按交易周期划分的电量是&",
   "play": "讲述文本",
-  "name": "按 交易周期/交易类型/发电类型/交易方式 划分"
+  "name": "按 交易周期 划分"
 }

+ 1 - 1
final/ByRules/templatesJson/8.2.json

@@ -5,5 +5,5 @@
   "target": "按交易类型划分",
   "content": "省间交易电量按交易类型划分的电量是&",
   "play": "讲述文本",
-  "name": "按 交易周期/交易类型/发电类型/交易方式 划分"
+  "name": "按 交易类型 划分"
 }

+ 1 - 1
final/ByRules/templatesJson/8.3.json

@@ -5,5 +5,5 @@
   "target": "按发电类型划分",
   "content": "省间交易电量按发电类型划分的电量是&",
   "play": "讲述文本",
-  "name": "按 交易周期/交易类型/发电类型/交易方式 划分"
+  "name": "按 发电类型 划分"
 }

+ 1 - 1
final/ByRules/templatesJson/8.4.json

@@ -5,5 +5,5 @@
   "target": "按交易方式划分",
   "content": "省间交易电量按交易方式划分的电量是&",
   "play": "讲述文本",
-  "name": "按 交易周期/交易类型/发电类型/交易方式 划分"
+  "name": "按 交易方式 划分"
 }