se refactoriza las función extract_answers para que funcione con todos los schamas de datos de las respuestas estructuradas

2025-11-14 14:52:03 -05:00
parent 3b0ba91ecc
commit 1fd99cc536
31 changed files with 147 additions and 7 deletions
--- a/action_flow/fase1_extract.py
+++ b/action_flow/fase1_extract.py
@@ -2,6 +2,19 @@ import pandas as pd
 import os
 import sys
 from extraccion import agentes_entidades
+from extraccion.schemas_entidades import (
+    Acciones,
+    Temas,
+    Estrategias,
+    Factores,
+    Entidades,
+    Competencias,
+    Herramientas,
+    Mecanismos,
+    Opiniones,
+    Expectativas,
+)
+

 module_path = os.path.abspath(os.path.join(".."))

@@ -11,8 +24,8 @@ if module_path not in sys.path:
    print(module_path)


-INPUT_FOLDER = f"{module_path}/input/Preguntas Categoricas/"
-OUTPUT_FOLDER = f"{module_path}/output/fase1"
+INPUT_FOLDER = "input/Preguntas Categoricas"
+OUTPUT_FOLDER = "output/fase1"
 FILES_TO_PROCESS = os.listdir(INPUT_FOLDER)
 DELIMITER = "|^"
 DIC_QUESTIONS = {
@@ -34,11 +47,79 @@ DIC_QUESTIONS = {
 }


-def extract_answers(answers):
+# 2. Refactorizar 'extract_answers' para manejar todos los tipos de esquemas
+def extract_answers(answers_obj):
+    """
+    Toma el objeto Pydantic devuelto por un extractor y lo formatea
+    en un único string delimitado por |^.
+    """
    answer_formated = ""
-    iterator_answers = answers.acciones
-    for item in iterator_answers:
-        answer_formated += f"{item.accion}{DELIMITER}"
+    list_items = []  # Lista genérica de items (ej. [Accion, Accion])
+    item_attr_name = ""  # Atributo a extraer de cada item (ej. "accion")
+
+    # --- Manejo de todos los esquemas que son listas de objetos ---
+
+    if isinstance(answers_obj, Acciones):
+        list_items = answers_obj.acciones
+        item_attr_name = "accion"
+    elif isinstance(answers_obj, Temas):
+        list_items = answers_obj.temas
+        item_attr_name = "tema"
+    elif isinstance(answers_obj, Estrategias):
+        list_items = answers_obj.estrategias
+        item_attr_name = "estrategia"
+    elif isinstance(answers_obj, Factores):
+        list_items = answers_obj.factores
+        item_attr_name = "factor"
+    elif isinstance(answers_obj, Entidades):
+        list_items = answers_obj.entidades
+        item_attr_name = "entidad"
+    elif isinstance(answers_obj, Herramientas):
+        list_items = answers_obj.herramientas
+        item_attr_name = "herramienta"
+    elif isinstance(answers_obj, Mecanismos):
+        list_items = answers_obj.mecanismos
+        item_attr_name = "mecanismo"
+    elif isinstance(answers_obj, Opiniones):
+        list_items = answers_obj.opiniones
+        item_attr_name = "opinion"
+    elif isinstance(answers_obj, Expectativas):
+        list_items = answers_obj.expectativas
+        item_attr_name = "expectativa"
+
+    # Si se encontró un tipo de lista estándar, procesarla
+    if list_items and item_attr_name:
+        for item in list_items:
+            # Obtener el texto (ej. item.accion, item.tema) de forma segura
+            item_text = getattr(item, item_attr_name, "")
+            if item_text:
+                answer_formated += f"{item_text}{DELIMITER}"
+        return answer_formated
+
+    # --- Manejo del caso especial: Competencias ---
+
+    if isinstance(answers_obj, Competencias):
+        comp_texts = []
+        # El esquema de Competencias usa listas de strings, no listas de objetos
+        if answers_obj.basicas:
+            comp_texts.extend([f"Básica: {c}" for c in answers_obj.basicas])
+        if answers_obj.socioemocionales:
+            comp_texts.extend(
+                [f"Socioemocional: {c}" for c in answers_obj.socioemocionales]
+            )
+        if answers_obj.ciudadanas:
+            comp_texts.extend([f"Ciudadana: {c}" for c in answers_obj.ciudadanas])
+        if answers_obj.siglo_xxi:
+            comp_texts.extend([f"Siglo XXI: {c}" for c in answers_obj.siglo_xxi])
+
+        if not comp_texts:
+            return ""  # Devuelve vacío si el objeto Competencias está vacío
+
+        # Unir todos los textos de competencias
+        answer_formated = DELIMITER.join(comp_texts) + DELIMITER
+        return answer_formated
+
+    # Si el tipo no coincide con nada (o era una lista vacía), devuelve un string vacío
    return answer_formated