2 months ago · aaf2e04b67
--- a/final/ByRules/__pycache__/app.cpython-38.pyc
+++ b/final/ByRules/__pycache__/app.cpython-38.pyc
--- a/final/ByRules/__pycache__/similarity_answer_json.cpython-38.pyc
+++ b/final/ByRules/__pycache__/similarity_answer_json.cpython-38.pyc
--- a/final/ByRules/app.py
+++ b/final/ByRules/app.py
@@ -26,7 +26,6 @@ app = Flask(__name__)
 
				 # DATA_FOLDER = resource_path("final/Json/json_data")
			
 
				 
			
 
				 
			
 
				-
			
 
				 BASE_DIR = os.path.dirname(os.path.abspath(__file__))
			
 
				 TEMPLATE_FOLDER = os.path.join(BASE_DIR, "templatesJson")
			
 
				 DATA_FOLDER = os.path.join(BASE_DIR, "..", "Json", "json_data")
			
@@ -36,7 +35,6 @@ MAPPING_FOLDER = os.path.join(BASE_DIR, "..", "Json", "sjgxys")
 
				 MAPPING_FILE = os.path.join(MAPPING_FOLDER, "sjgxys.json")
			
 
				 
			
 
				 
			
 
				-
			
 
				 @app.route('/process_query', methods=['POST'])
			
 
				 def process_query_route():
			
 
				     data = request.get_json()
			
@@ -116,7 +114,7 @@ def process_query_route():
 
				         elif result['type'] == 'topN':
			
 
				             topN = result["conditions"]['rank']
			
 
				             del result["conditions"]['rank']
			
 
				-            print(topN)
			
 
				+            # print(topN)
			
 
				             final_value = find_top_n_by_value(folder_path=DATA_FOLDER,
			
 
				                                               file_name=result["dataJsonName"],
			
 
				                                               value_key=result['value_key'],
			
@@ -137,6 +135,36 @@ def process_query_route():
 
				                 "play": result["play"]
			
 
				             }
			
 
				             return jsonify(response)
			
 
				+        # TopN
			
 
				+        elif result['type'] == 'rank':
			
 
				+            rank = result["conditions"]['rank2']
			
 
				+            del result["conditions"]['rank2']
			
 
				+            # print(topN)
			
 
				+            final_value = find_top_n_by_value(folder_path=DATA_FOLDER,
			
 
				+                                              file_name=result["dataJsonName"],
			
 
				+                                              value_key=result['value_key'],
			
 
				+                                              name_key=result['name_key'],
			
 
				+                                              mapping_file=MAPPING_FILE,
			
 
				+                                              conditions=result["conditions"],
			
 
				+                                              top_n=rank,  # 查前3高
			
 
				+                                              descending=True
			
 
				+                                              )
			
 
				+            # keys = [result['name_key'], result['value_key']]
			
 
				+            contentResult = final_value[rank - 1]
			
 
				+            # 构造需要填充的列表
			
 
				+            fillResult = [rank, contentResult[result['name_key']]]
			
 
				+            final_content = fill_template_auto(result['content'], fillResult)
			
 
				+            response = {
			
 
				+                "rank": rank,
			
 
				+                "content": final_content,
			
 
				+                "content_text": result["content"],
			
 
				+                "raw_result": final_value,
			
 
				+                "conditions": result["conditions"],
			
 
				+                "name": result["name"],
			
 
				+                "play": result["play"],
			
 
				+                "qcode": result["qcode"]
			
 
				+            }
			
 
				+            return jsonify(response)
			
 
				     except Exception as e:
			
 
				         return jsonify({"error": str(e)}), 500
			
 
				 
			
--- a/final/ByRules/similarity_answer_json.py
+++ b/final/ByRules/similarity_answer_json.py
@@ -52,7 +52,8 @@ template_dict = {
 
				     "23": ["省间交易当年参与交易的家次有多少？"],
			
 
				     "24": ["某年送出电量前五名是谁？"],
			
 
				     "25": ["某年受入电量前五名是谁？"],
			
 
				-    "26": ["某年送出电量第五名是谁？"],
			
 
				+    "26": ["某年受入电量第五名是谁？"],
			
 
				+    "27": ["某年送出电量第五名是谁？"],
			
 
				 }
			
 
				 # 将地点映射成相应的代码
			
 
				 def map_location_to_unit(location: str) -> str:
			
@@ -269,9 +270,32 @@ def extract_time_location(question: str) -> Tuple[List[Dict], List[str]]:
 
				         # print(f"匹配到的排名为：{rank}")
			
 
				     else:
			
 
				         rank = None
			
 
				-        print("未匹配到排名")
			
 
				+        # print("未匹配到排名")
			
 
				 
			
 
				-    return time_results, locations, rank
			
 
				+
			
 
				+
			
 
				+    # 匹配“第五”或“top 5”等形式
			
 
				+    rank_match2 = re.search(r'(第\s*)(\d+|[一二两三四五六七八九十])\s*(名)', question, re.IGNORECASE)
			
 
				+
			
 
				+    # rank_match = re.search(
			
 
				+    #     r'(前|top\s*|第\s*)(\d+|[一二三四五六七八九十])\s*(名)?',
			
 
				+    #     question,
			
 
				+    #     re.IGNORECASE
			
 
				+    # )
			
 
				+
			
 
				+    if rank_match2:
			
 
				+        rank_str = rank_match2.group(2)
			
 
				+        if rank_str.isdigit():
			
 
				+            rank2 = int(rank_str)
			
 
				+        else:
			
 
				+            rank2 = chinese_digit_map.get(rank_str, None)
			
 
				+
			
 
				+        # print(f"匹配到的排名为：{rank}")
			
 
				+    else:
			
 
				+        rank2 = None
			
 
				+        # print("未匹配到排名")
			
 
				+
			
 
				+    return time_results, locations, rank, rank2
			
 
				 
			
 
				 # 先用 extract_time_location 判断问句包含哪类时间信息，然后只对结构匹配的模板子集做余弦匹配。
			
 
				 # def classify_by_time_type(query, time_info):
			
@@ -282,7 +306,7 @@ def extract_time_location(question: str) -> Tuple[List[Dict], List[str]]:
 
				 def classify_by_time_type(query, time_info):
			
 
				     if not time_info:
			
 
				         # 无时间信息时，返回指定模板 19-23
			
 
				-        return ['19', '20', '21', '22', '23', '17.1', '17.2', '17.3', '17.4', '18.1', '18.2', '18.3', '18.4','24','25']
			
 
				+        return ['19', '20', '21', '22', '23', '17.1', '17.2', '17.3', '17.4', '18.1', '18.2', '18.3', '18.4','24','25','26','27']
			
 
				 
			
 
				     time = time_info[0]
			
 
				 
			
@@ -296,17 +320,17 @@ def classify_by_time_type(query, time_info):
 
				 
			
 
				     # 情况 3：仅 year，全年
			
 
				     if 'year' in time and 'month' not in time:
			
 
				-        return ['1','8.1','8.2','8.3','8.4','9.1','9.2','9.3','9.4','9.5','9.6','9.7','9.8','9.9','9.10','9.11','9.12','9.13','9.14','9.15','9.16','9.17','16.1','16.2','21','22','23','24','25']  # 某年全年累计交易电量
			
 
				+        return ['1','8.1','8.2','8.3','8.4','9.1','9.2','9.3','9.4','9.5','9.6','9.7','9.8','9.9','9.10','9.11','9.12','9.13','9.14','9.15','9.16','9.17','16.1','16.2','21','22','23','24','25','26','27']  # 某年全年累计交易电量
			
 
				 def match_template_with_time_filter(query, template_dict, tokenizer, extract_time_location_func):
			
 
				     """
			
 
				     先基于时间信息筛选候选模板，再进行TF-IDF匹配。
			
 
				     """
			
 
				     # 提取时间
			
 
				-    time_info, _, _ = extract_time_location_func(query)
			
 
				-    print(time_info)
			
 
				+    time_info, _, _, _ = extract_time_location_func(query)
			
 
				+    # print(time_info)
			
 
				     # 通过时间判断候选模板 key
			
 
				     candidate_keys = classify_by_time_type(query, time_info)
			
 
				-    print(candidate_keys)
			
 
				+    # print(candidate_keys)
			
 
				     # 构造候选子模板字典
			
 
				     filtered_template_dict = {k: template_dict[k] for k in candidate_keys}
			
 
				     # 使用你原来的 TF-IDF 匹配函数
			
@@ -377,7 +401,7 @@ def load_template_info(matched_key, json_folder):
 
				     return data
			
 
				 def process_query(query, template_dict, json_folder, tokenizer=jieba_tokenizer):
			
 
				     # 提取条件
			
 
				-    time_info, location_info, rank_info = extract_time_location(query)
			
 
				+    time_info, location_info, rank_info, rank_info2 = extract_time_location(query)
			
 
				 
			
 
				     conditions = {}
			
 
				     # 匹配模板
			
@@ -427,6 +451,10 @@ def process_query(query, template_dict, json_folder, tokenizer=jieba_tokenizer):
 
				 
			
 
				     if rank_info:
			
 
				         conditions['rank'] = rank_info
			
 
				+
			
 
				+    if rank_info2:
			
 
				+        conditions['rank2'] = rank_info2
			
 
				+
			
 
				     # 查询模板json
			
 
				     template_info = load_template_info(matched_key, json_folder)
			
 
				     # 模板的关键词
			
@@ -447,6 +475,8 @@ def process_query(query, template_dict, json_folder, tokenizer=jieba_tokenizer):
 
				     content = template_info.get("content", "")
			
 
				     # 动作类型
			
 
				     play = template_info.get("play", "")
			
 
				+    # 问题序号
			
 
				+    qcode = template_info.get("qcode", "")
			
 
				     return {
			
 
				         "matched_key": matched_key,
			
 
				         "matched_template": best_sentence,
			
@@ -462,8 +492,8 @@ def process_query(query, template_dict, json_folder, tokenizer=jieba_tokenizer):
 
				         "play": play,
			
 
				         "find_max": find_max,
			
 
				         "value_key": value_key,
			
 
				-        "name_key": name_key
			
 
				-
			
 
				+        "name_key": name_key,
			
 
				+        "qcode": qcode
			
 
				     }
			
 
				 # 查询类
			
 
				 def smart_find_value(folder_path, file_name, conditions: dict, target_key: str):
			
@@ -528,26 +558,28 @@ def find_key_recursively(data, target_key):
 
				 
			
 
				 # query = "2023年省间交易电量新能源交易电量是多少？？"
			
 
				 # query = "今年1月到2023年2月累计交易电量是多少?"
			
 
				+# query = "2024年送出电量第二名是谁?"
			
 
				+query = "2024年7月、8月、12月交易电量的平均值是多少?"
			
 
				+
			
 
				+json_folder = "templatesJson"
			
 
				+
			
 
				 #
			
 
				-# json_folder = "templatesJson"
			
 
				 #
			
 
				-# #
			
 
				-# #
			
 
				-# result = process_query(query, template_dict, json_folder)
			
 
				+result = process_query(query, template_dict, json_folder)
			
 
				 #
			
 
				-# # print(result)
			
 
				-# print(result['content'])
			
 
				-# print("匹配的模板 key：", result["matched_key"])
			
 
				-# print("最相似的模板句：", result["matched_template"])
			
 
				-# print("相似度分数：", result["similarity_score"])
			
 
				-# print("类型：", result["type"])
			
 
				-# print("关键词：", result["keywords"])
			
 
				-# print("查询字段：", result["target"])
			
 
				-# print("模型名字", result["name"])
			
 
				-# print("条件", result["conditions"])
			
 
				-# print("返回的内容是：", result["content"])
			
 
				-# print("问句是：", result["query"])
			
 
				-# print("动作是：", result["play"])
			
 
				+print(result)
			
 
				+print(result['content'])
			
 
				+print("匹配的模板 key：", result["matched_key"])
			
 
				+print("最相似的模板句：", result["matched_template"])
			
 
				+print("相似度分数：", result["similarity_score"])
			
 
				+print("类型：", result["type"])
			
 
				+print("关键词：", result["keywords"])
			
 
				+print("查询字段：", result["target"])
			
 
				+print("模型名字", result["name"])
			
 
				+print("条件", result["conditions"])
			
 
				+print("返回的内容是：", result["content"])
			
 
				+print("问句是：", result["query"])
			
 
				+print("动作是：", result["play"])
			
 
				 
			
 
				 # query = "当月送出均价最高的是哪个省？？"
			
 
				 # query = ("2025年送出电量前五名是谁？？")
			
--- a/final/ByRules/templatesJson/26.json
+++ b/final/ByRules/templatesJson/26.json
@@ -1,11 +1,11 @@
 
				 {
			
 
				   "dataJsonName": "sjjy1_B05_output",
			
 
				-  "type": "topN",
			
 
				+  "type": "rank",
			
 
				   "value_key": "受入电量",
			
 
				   "name_key": "单位",
			
 
				-  "top_n": 5,
			
 
				-  "content": "送出电量排名分别为：&",
			
 
				+  "rank": 1,
			
 
				+  "content": "第&名是：&",
			
 
				   "play": "讲述文本",
			
 
				   "name": "省间交易",
			
 
				-  "qcode": "25"
			
 
				+  "qcode": "26"
			
 
				 }
			
--- a/final/ByRules/templatesJson/27.json
+++ b/final/ByRules/templatesJson/27.json
@@ -0,0 +1,11 @@
 
				+{
			
 
				+  "dataJsonName": "sjjy1_B04_output",
			
 
				+  "type": "rank",
			
 
				+  "value_key": "送出电量",
			
 
				+  "name_key": "单位",
			
 
				+  "rank": 1,
			
 
				+  "content": "第&名是：&",
			
 
				+  "play": "讲述文本",
			
 
				+  "name": "省间交易",
			
 
				+  "qcode": "27"
			
 
				+}