Browse Source

合并冲突

dujunlong 2 months ago
parent
commit
b486b20285

BIN
final/ByRules/__pycache__/app.cpython-39.pyc


BIN
final/ByRules/__pycache__/similarity_answer_json.cpython-39.pyc


BIN
final/ByRules/__pycache__/util.cpython-39.pyc


+ 40 - 6
final/ByRules/similarity_answer_json.py

@@ -34,10 +34,16 @@ template_dict = {
     "9.15": ["某年省间交易电量双边交易电量是多少?"],
     "9.16": ["某年省间交易电量集中交易电量是多少?"],
     "9.17": ["某年省间交易电量挂牌交易电量是多少?"],
+    "16.1": ["当月送出均价最高的是哪个省?","当年送出均价最高的是哪个省?"],
+    "16.2": ["当月送出均价最低的是哪个省?","当年送出均价最低的是哪个省?"],
     "17.1": ["那个省送出电量最高?是多少?"],
     "17.2": ["那个省受入电量最高?是多少?"],
     "17.3": ["那个省送出电量最低?是多少?"],
     "17.4": ["那个省受入电量最低?是多少?"],
+    "18.1": ["那个省送出均价最高?是多少?"],
+    "18.2": ["那个省受入均价最高?是多少?"],
+    "18.3": ["那个省送出均价最低?是多少?"],
+    "18.4": ["那个省受入均价最低?是多少?"],
     "19": ["省间交易正在组织的交易有多少?"],
     "20": ["省间交易当月完成的交易有多少?"],
     "21": ["省间交易当年完成的交易有多少?"],
@@ -205,6 +211,15 @@ def extract_time_location(question: str) -> Tuple[List[Dict], List[str]]:
         prev_month = current_month - 1 if current_month > 1 else 12
         time_results.append({'year': prev_year, 'month': prev_month, 'label': '上个月', 'raw': '上个月'})
         used_keywords.add('上个月')
+    # ✅ 添加:当年
+    if '当年' in question and '当年' not in used_keywords:
+        time_results.append({'year': current_year, 'label': '当年', 'raw': '当年'})
+        used_keywords.add('当年')
+
+    # ✅ 添加:当月
+    if '当月' in question and '当月' not in used_keywords:
+        time_results.append({'year': current_year, 'month': current_month, 'label': '当月', 'raw': '当月'})
+        used_keywords.add('当月')
 
     # 季度和半年
     for term, (start_month, end_month) in season_mapping.items():
@@ -231,7 +246,7 @@ def extract_time_location(question: str) -> Tuple[List[Dict], List[str]]:
 def classify_by_time_type(query, time_info):
     if not time_info:
         # 无时间信息时,返回指定模板 19-23
-        return ['19', '20', '21', '22', '23', '17.1', '17.2', '17.3', '17.4']
+        return ['19', '20', '21', '22', '23', '17.1', '17.2', '17.3', '17.4', '18.1', '18.2', '18.3', '18.4']
 
     time = time_info[0]
 
@@ -241,21 +256,21 @@ def classify_by_time_type(query, time_info):
 
     # 情况 2:有 year 和 month,精确到月
     if 'year' in time and 'month' in time:
-        return ['2']  # 某年某月交易电量
+        return ['2','16.1','16.2']  # 某年某月交易电量
 
     # 情况 3:仅 year,全年
     if 'year' in time and 'month' not in time:
-        return ['1','8.1','8.2','8.3','8.4','9.1','9.2','9.3','9.4','9.5','9.6','9.7','9.8','9.9','9.10','9.11','9.12','9.13','9.14','9.15','9.16','9.17']  # 某年全年累计交易电量
+        return ['1','8.1','8.2','8.3','8.4','9.1','9.2','9.3','9.4','9.5','9.6','9.7','9.8','9.9','9.10','9.11','9.12','9.13','9.14','9.15','9.16','9.17','16.1','16.2']  # 某年全年累计交易电量
 def match_template_with_time_filter(query, template_dict, tokenizer, extract_time_location_func):
     """
     先基于时间信息筛选候选模板,再进行TF-IDF匹配。
     """
     # 提取时间
     time_info, _ = extract_time_location_func(query)
-
+    print(time_info)
     # 通过时间判断候选模板 key
     candidate_keys = classify_by_time_type(query, time_info)
-
+    print(candidate_keys)
     # 构造候选子模板字典
     filtered_template_dict = {k: template_dict[k] for k in candidate_keys}
     # 使用你原来的 TF-IDF 匹配函数
@@ -469,7 +484,7 @@ def find_key_recursively(data, target_key):
 # query = "当月省间交易完成的交易是多少?"
 # query = "2024年1月到2月累计交易电量是多少?"
 # query = "2023年省间交易电量新能源交易电量是多少??"
-# # query = "但同样阿贾克斯大口径的话我可合金外壳设计文件突然发?"
+# query = "但同样阿贾克斯大口径的话我可合金外壳设计文件突然发?"
 
 
 # query = "2023年省间交易电量新能源交易电量是多少??"
@@ -499,6 +514,25 @@ def find_key_recursively(data, target_key):
 # print("返回的内容是:", result["content"])
 # print("问句是:", result["query"])
 # print("动作是:", result["play"])
+
+# query = "当月送出均价最高的是哪个省??"
+#
+# json_folder = "templatesJson"
+#
+# result = process_query(query, template_dict, json_folder)
+#
+# print("匹配的模板 key:", result["matched_key"])
+# print("最相似的模板句:", result["matched_template"])
+# print("相似度分数:", result["similarity_score"])
+# print("类型:", result["type"])
+# print("关键词:", result["keywords"])
+# print("查询字段:", result["target"])
+# print("模型名字", result["name"])
+# print("条件", result["conditions"])
+# print("返回的内容是:", result["content"])
+# print("问句是:", result["query"])
+# print("动作是:", result["play"])
+
 #
 # type = result["type"]
 # content = result["content"]

+ 11 - 0
final/ByRules/templatesJson/16.1.json

@@ -0,0 +1,11 @@
+{
+  "dataJsonName": "sjjy1_B04_output",
+  "type": "compare_max_min",
+  "value_key": "送出均价",
+  "name_key": "单位",
+  "find_max": "True",
+  "content": "送出均价最高的省份是&,送出均价是&",
+  "play": "讲述文本",
+  "name": "省间交易",
+  "qcode": "16.1"
+}

+ 11 - 0
final/ByRules/templatesJson/16.2.json

@@ -0,0 +1,11 @@
+{
+  "dataJsonName": "sjjy1_B04_output",
+  "type": "compare_max_min",
+  "value_key": "送出均价",
+  "name_key": "单位",
+  "find_max": "False",
+  "content": "送出均价最低的省份是&,送出均价是&",
+  "play": "讲述文本",
+  "name": "省间交易",
+  "qcode": "16.2"
+}

+ 11 - 0
final/ByRules/templatesJson/18.1.json

@@ -0,0 +1,11 @@
+{
+  "dataJsonName": "sjjy1_B04_output",
+  "type": "compare_max_min",
+  "value_key": "送出均价",
+  "name_key": "单位",
+  "find_max": "True",
+  "content": "送出均价最高的省份是&,送出均价是&",
+  "play": "讲述文本",
+  "name": "省间交易",
+  "qcode": "18.1"
+}

+ 11 - 0
final/ByRules/templatesJson/18.2.json

@@ -0,0 +1,11 @@
+{
+  "dataJsonName": "sjjy1_B05_output",
+  "type": "compare_max_min",
+  "value_key": "受入均价",
+  "name_key": "单位",
+  "find_max": "True",
+  "content": "受入均价最高的省份是&,受入均价是&",
+  "play": "讲述文本",
+  "name": "省间交易",
+  "qcode": "18.2"
+}

+ 11 - 0
final/ByRules/templatesJson/18.3.json

@@ -0,0 +1,11 @@
+{
+  "dataJsonName": "sjjy1_B04_output",
+  "type": "compare_max_min",
+  "value_key": "送出均价",
+  "name_key": "单位",
+  "find_max": "False",
+  "content": "送出均价最低的省份是&,送出均价是&",
+  "play": "讲述文本",
+  "name": "省间交易",
+  "qcode": "18.3"
+}

+ 11 - 0
final/ByRules/templatesJson/18.4.json

@@ -0,0 +1,11 @@
+{
+  "dataJsonName": "sjjy1_B05_output",
+  "type": "compare_max_min",
+  "value_key": "受入均价",
+  "name_key": "单位",
+  "find_max": "False",
+  "content": "受入均价最低的省份是&,受入均价是&",
+  "play": "讲述文本",
+  "name": "省间交易",
+  "qcode": "18.4"
+}

+ 27 - 13
final/ByRules/util.py

@@ -71,6 +71,7 @@ def find_max_or_min_value(folder_path, file_name, value_key: str, name_key: str,
         print(f"文件 {file_path} 不存在")
         return None
 
+    # 读取主数据文件
     with open(file_path, 'r', encoding='utf-8') as f:
         try:
             data = json.load(f)
@@ -78,30 +79,41 @@ def find_max_or_min_value(folder_path, file_name, value_key: str, name_key: str,
             print(f"JSON 解析失败:{e}")
             return None
 
-    # 加载映射文件
+    # 读取名称映射文件(可选)
     name_mapping = {}
     if mapping_file:
-        with open(mapping_file, 'r', encoding='utf-8') as mf:
-            try:
-                name_mapping = json.load(mf)
-            except json.JSONDecodeError as e:
-                print(f"映射文件 JSON 解析失败:{e}")
+        if os.path.exists(mapping_file):
+            with open(mapping_file, 'r', encoding='utf-8') as mf:
+                try:
+                    name_mapping = json.load(mf)
+                except json.JSONDecodeError as e:
+                    print(f"映射文件 JSON 解析失败:{e}")
+        else:
+            print(f"映射文件 {mapping_file} 不存在")
 
-    # 比较逻辑
+    # 初始化目标记录和值
     target_record = None
     target_value = None
 
     for record in data:
         if not isinstance(record, dict):
             continue
-        if conditions and not all(record.get(k) == v for k, v in conditions.items()):
-            continue
 
+        # 检查条件是否匹配
+        if conditions:
+            if not all(k in record and record[k] == v for k, v in conditions.items()):
+                continue
+
+        # 安全地提取数值字段
+        value_raw = record.get(value_key)
+        if value_raw is None:
+            continue
         try:
-            value = float(record.get(value_key, 0))
-        except ValueError:
+            value = float(value_raw)
+        except (ValueError, TypeError):
             continue
 
+        # 判断是否为新的最大/最小值
         if target_value is None or (find_max and value > target_value) or (not find_max and value < target_value):
             target_value = value
             target_record = record
@@ -110,12 +122,14 @@ def find_max_or_min_value(folder_path, file_name, value_key: str, name_key: str,
         return None
 
     unit_code = target_record.get(name_key)
-    province = name_mapping.get(unit_code, unit_code)  # 优先用映射名
+    province = name_mapping.get(unit_code, unit_code)  # 映射优先,找不到用原值
+
     return {
-        # "单位编码": unit_code,
         name_key: province,
         value_key: target_value
     }
+        # 如果需要,也可以加上原始 unit_code:
+        # "单位
 # 找TopN
 def find_top_n_by_value(folder_path, file_name, value_key, name_key, mapping_file=None, conditions=None, top_n=3, descending=True):
     file_path = os.path.join(folder_path, file_name)