zfrr 2 months ago
parent
commit
12ce3986d5

BIN
final/ByRules/__pycache__/app.cpython-39.pyc


BIN
final/ByRules/__pycache__/similarity_answer_json.cpython-39.pyc


BIN
final/ByRules/__pycache__/util.cpython-39.pyc


+ 31 - 30
final/ByRules/similarity_answer_json.py

@@ -34,6 +34,8 @@ template_dict = {
     "9.15": ["某年省间交易电量双边交易电量是多少?"],
     "9.16": ["某年省间交易电量集中交易电量是多少?"],
     "9.17": ["某年省间交易电量挂牌交易电量是多少?"],
+    "16.1": ["当月送出均价最高的是哪个省?","当年送出均价最高的是哪个省?"],
+    "16.2": ["当月送出均价最低的是哪个省?","当年送出均价最低的是哪个省?"],
     "17.1": ["那个省送出电量最高?是多少?"],
     "17.2": ["那个省受入电量最高?是多少?"],
     "17.3": ["那个省送出电量最低?是多少?"],
@@ -209,6 +211,15 @@ def extract_time_location(question: str) -> Tuple[List[Dict], List[str]]:
         prev_month = current_month - 1 if current_month > 1 else 12
         time_results.append({'year': prev_year, 'month': prev_month, 'label': '上个月', 'raw': '上个月'})
         used_keywords.add('上个月')
+    # ✅ 添加:当年
+    if '当年' in question and '当年' not in used_keywords:
+        time_results.append({'year': current_year, 'label': '当年', 'raw': '当年'})
+        used_keywords.add('当年')
+
+    # ✅ 添加:当月
+    if '当月' in question and '当月' not in used_keywords:
+        time_results.append({'year': current_year, 'month': current_month, 'label': '当月', 'raw': '当月'})
+        used_keywords.add('当月')
 
     # 季度和半年
     for term, (start_month, end_month) in season_mapping.items():
@@ -235,7 +246,7 @@ def extract_time_location(question: str) -> Tuple[List[Dict], List[str]]:
 def classify_by_time_type(query, time_info):
     if not time_info:
         # 无时间信息时,返回指定模板 19-23
-        return ['19', '20', '21', '22', '23', '17.1', '17.2', '17.3', '17.4']
+        return ['19', '20', '21', '22', '23', '17.1', '17.2', '17.3', '17.4', '18.1', '18.2', '18.3', '18.4']
 
     time = time_info[0]
 
@@ -245,21 +256,21 @@ def classify_by_time_type(query, time_info):
 
     # 情况 2:有 year 和 month,精确到月
     if 'year' in time and 'month' in time:
-        return ['2']  # 某年某月交易电量
+        return ['2','16.1','16.2']  # 某年某月交易电量
 
     # 情况 3:仅 year,全年
     if 'year' in time and 'month' not in time:
-        return ['1','8.1','8.2','8.3','8.4','9.1','9.2','9.3','9.4','9.5','9.6','9.7','9.8','9.9','9.10','9.11','9.12','9.13','9.14','9.15','9.16','9.17']  # 某年全年累计交易电量
+        return ['1','8.1','8.2','8.3','8.4','9.1','9.2','9.3','9.4','9.5','9.6','9.7','9.8','9.9','9.10','9.11','9.12','9.13','9.14','9.15','9.16','9.17','16.1','16.2']  # 某年全年累计交易电量
 def match_template_with_time_filter(query, template_dict, tokenizer, extract_time_location_func):
     """
     先基于时间信息筛选候选模板,再进行TF-IDF匹配。
     """
     # 提取时间
     time_info, _ = extract_time_location_func(query)
-
+    print(time_info)
     # 通过时间判断候选模板 key
     candidate_keys = classify_by_time_type(query, time_info)
-
+    print(candidate_keys)
     # 构造候选子模板字典
     filtered_template_dict = {k: template_dict[k] for k in candidate_keys}
     # 使用你原来的 TF-IDF 匹配函数
@@ -473,36 +484,26 @@ def find_key_recursively(data, target_key):
 # query = "当月省间交易完成的交易是多少?"
 # query = "2024年1月到2月累计交易电量是多少?"
 # query = "2023年省间交易电量新能源交易电量是多少??"
-# # query = "但同样阿贾克斯大口径的话我可合金外壳设计文件突然发?"
+# query = "但同样阿贾克斯大口径的话我可合金外壳设计文件突然发?"
 
 
-# query = "2023年省间交易电量新能源交易电量是多少??"
-# # query = "但同样阿贾克斯大口径的话我可合金外壳设计文件突然发?"
+query = "当月送出均价最高的是哪个省??"
 
-# json_folder = "templatesJson"
+json_folder = "templatesJson"
 
-#
-# json_folder = "templatesJson"
-# #
-# #
-# result = process_query(query, template_dict, json_folder)
-#
-
-
-# result = process_query(query, template_dict, json_folder)
-#
+result = process_query(query, template_dict, json_folder)
 
-# print("匹配的模板 key:", result["matched_key"])
-# print("最相似的模板句:", result["matched_template"])
-# print("相似度分数:", result["similarity_score"])
-# print("类型:", result["type"])
-# print("关键词:", result["keywords"])
-# print("查询字段:", result["target"])
-# print("模型名字", result["name"])
-# print("条件", result["conditions"])
-# print("返回的内容是:", result["content"])
-# print("问句是:", result["query"])
-# print("动作是:", result["play"])
+print("匹配的模板 key:", result["matched_key"])
+print("最相似的模板句:", result["matched_template"])
+print("相似度分数:", result["similarity_score"])
+print("类型:", result["type"])
+print("关键词:", result["keywords"])
+print("查询字段:", result["target"])
+print("模型名字", result["name"])
+print("条件", result["conditions"])
+print("返回的内容是:", result["content"])
+print("问句是:", result["query"])
+print("动作是:", result["play"])
 #
 # type = result["type"]
 # content = result["content"]

+ 11 - 0
final/ByRules/templatesJson/16.1.json

@@ -0,0 +1,11 @@
+{
+  "dataJsonName": "sjjy1_B04_output",
+  "type": "compare_max_min",
+  "value_key": "送出均价",
+  "name_key": "单位",
+  "find_max": "True",
+  "content": "送出均价最高的省份是&,送出均价是&",
+  "play": "讲述文本",
+  "name": "省间交易",
+  "qcode": "16.1"
+}

+ 11 - 0
final/ByRules/templatesJson/16.2.json

@@ -0,0 +1,11 @@
+{
+  "dataJsonName": "sjjy1_B04_output",
+  "type": "compare_max_min",
+  "value_key": "送出均价",
+  "name_key": "单位",
+  "find_max": "False",
+  "content": "送出均价最低的省份是&,送出均价是&",
+  "play": "讲述文本",
+  "name": "省间交易",
+  "qcode": "16.2"
+}

+ 27 - 13
final/ByRules/util.py

@@ -71,6 +71,7 @@ def find_max_or_min_value(folder_path, file_name, value_key: str, name_key: str,
         print(f"文件 {file_path} 不存在")
         return None
 
+    # 读取主数据文件
     with open(file_path, 'r', encoding='utf-8') as f:
         try:
             data = json.load(f)
@@ -78,30 +79,41 @@ def find_max_or_min_value(folder_path, file_name, value_key: str, name_key: str,
             print(f"JSON 解析失败:{e}")
             return None
 
-    # 加载映射文件
+    # 读取名称映射文件(可选)
     name_mapping = {}
     if mapping_file:
-        with open(mapping_file, 'r', encoding='utf-8') as mf:
-            try:
-                name_mapping = json.load(mf)
-            except json.JSONDecodeError as e:
-                print(f"映射文件 JSON 解析失败:{e}")
+        if os.path.exists(mapping_file):
+            with open(mapping_file, 'r', encoding='utf-8') as mf:
+                try:
+                    name_mapping = json.load(mf)
+                except json.JSONDecodeError as e:
+                    print(f"映射文件 JSON 解析失败:{e}")
+        else:
+            print(f"映射文件 {mapping_file} 不存在")
 
-    # 比较逻辑
+    # 初始化目标记录和值
     target_record = None
     target_value = None
 
     for record in data:
         if not isinstance(record, dict):
             continue
-        if conditions and not all(record.get(k) == v for k, v in conditions.items()):
-            continue
 
+        # 检查条件是否匹配
+        if conditions:
+            if not all(k in record and record[k] == v for k, v in conditions.items()):
+                continue
+
+        # 安全地提取数值字段
+        value_raw = record.get(value_key)
+        if value_raw is None:
+            continue
         try:
-            value = float(record.get(value_key, 0))
-        except ValueError:
+            value = float(value_raw)
+        except (ValueError, TypeError):
             continue
 
+        # 判断是否为新的最大/最小值
         if target_value is None or (find_max and value > target_value) or (not find_max and value < target_value):
             target_value = value
             target_record = record
@@ -110,12 +122,14 @@ def find_max_or_min_value(folder_path, file_name, value_key: str, name_key: str,
         return None
 
     unit_code = target_record.get(name_key)
-    province = name_mapping.get(unit_code, unit_code)  # 优先用映射名
+    province = name_mapping.get(unit_code, unit_code)  # 映射优先,找不到用原值
+
     return {
-        # "单位编码": unit_code,
         name_key: province,
         value_key: target_value
     }
+        # 如果需要,也可以加上原始 unit_code:
+        # "单位
 # 找TopN
 def find_top_n_by_value(folder_path, file_name, value_key, name_key, mapping_file=None, conditions=None, top_n=3, descending=True):
     file_path = os.path.join(folder_path, file_name)