{"round": 1, "policy_id": "greedy", "action": "modelo_rapido", "reason": "initial_exploration", "reward": 0.62, "cumulative_reward": 0.62, "regret": 0.154}
{"round": 2, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "initial_exploration", "reward": 0.78, "cumulative_reward": 1.4, "regret": 0.148}
{"round": 3, "policy_id": "greedy", "action": "revision_humana", "reason": "initial_exploration", "reward": 0.7, "cumulative_reward": 2.1, "regret": 0.222}
{"round": 4, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.74, "cumulative_reward": 2.84, "regret": 0.256}
{"round": 5, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.8, "cumulative_reward": 3.64, "regret": 0.23}
{"round": 6, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.76, "cumulative_reward": 4.4, "regret": 0.244}
{"round": 7, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.79, "cumulative_reward": 5.19, "regret": 0.228}
{"round": 8, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.77, "cumulative_reward": 5.96, "regret": 0.232}
{"round": 9, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.81, "cumulative_reward": 6.77, "regret": 0.196}
{"round": 10, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.75, "cumulative_reward": 7.52, "regret": 0.22}
{"round": 11, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.78, "cumulative_reward": 8.3, "regret": 0.214}
{"round": 12, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.76, "cumulative_reward": 9.06, "regret": 0.228}
{"round": 13, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.78, "cumulative_reward": 9.84, "regret": 0.222}
{"round": 14, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.74, "cumulative_reward": 10.58, "regret": 0.256}
{"round": 15, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.8, "cumulative_reward": 11.38, "regret": 0.23}
{"round": 16, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.76, "cumulative_reward": 12.14, "regret": 0.244}
{"round": 17, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.79, "cumulative_reward": 12.93, "regret": 0.228}
{"round": 18, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.77, "cumulative_reward": 13.7, "regret": 0.232}
{"round": 19, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.81, "cumulative_reward": 14.51, "regret": 0.196}
{"round": 20, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.75, "cumulative_reward": 15.26, "regret": 0.22}
{"round": 21, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.78, "cumulative_reward": 16.04, "regret": 0.214}
{"round": 22, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.76, "cumulative_reward": 16.8, "regret": 0.228}
{"round": 23, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.78, "cumulative_reward": 17.58, "regret": 0.222}
{"round": 24, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.74, "cumulative_reward": 18.32, "regret": 0.256}
{"round": 25, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.8, "cumulative_reward": 19.12, "regret": 0.23}
{"round": 26, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.76, "cumulative_reward": 19.88, "regret": 0.244}
{"round": 27, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.79, "cumulative_reward": 20.67, "regret": 0.228}
{"round": 28, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.77, "cumulative_reward": 21.44, "regret": 0.232}
{"round": 29, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.81, "cumulative_reward": 22.25, "regret": 0.196}
{"round": 30, "policy_id": "greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.75, "cumulative_reward": 23.0, "regret": 0.22}
{"round": 1, "policy_id": "epsilon_greedy", "action": "modelo_rapido", "reason": "initial_exploration", "reward": 0.62, "cumulative_reward": 0.62, "regret": 0.154}
{"round": 2, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "initial_exploration", "reward": 0.78, "cumulative_reward": 1.4, "regret": 0.148}
{"round": 3, "policy_id": "epsilon_greedy", "action": "revision_humana", "reason": "initial_exploration", "reward": 0.7, "cumulative_reward": 2.1, "regret": 0.222}
{"round": 4, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.74, "cumulative_reward": 2.84, "regret": 0.256}
{"round": 5, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.8, "cumulative_reward": 3.64, "regret": 0.23}
{"round": 6, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.76, "cumulative_reward": 4.4, "regret": 0.244}
{"round": 7, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.79, "cumulative_reward": 5.19, "regret": 0.228}
{"round": 8, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.77, "cumulative_reward": 5.96, "regret": 0.232}
{"round": 9, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.81, "cumulative_reward": 6.77, "regret": 0.196}
{"round": 10, "policy_id": "epsilon_greedy", "action": "modelo_rapido", "reason": "scheduled_exploration", "reward": 0.58, "cumulative_reward": 7.35, "regret": 0.39}
{"round": 11, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.75, "cumulative_reward": 8.1, "regret": 0.414}
{"round": 12, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.78, "cumulative_reward": 8.88, "regret": 0.408}
{"round": 13, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.76, "cumulative_reward": 9.64, "regret": 0.422}
{"round": 14, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.78, "cumulative_reward": 10.42, "regret": 0.416}
{"round": 15, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.74, "cumulative_reward": 11.16, "regret": 0.45}
{"round": 16, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.8, "cumulative_reward": 11.96, "regret": 0.424}
{"round": 17, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.76, "cumulative_reward": 12.72, "regret": 0.438}
{"round": 18, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.79, "cumulative_reward": 13.51, "regret": 0.422}
{"round": 19, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.77, "cumulative_reward": 14.28, "regret": 0.426}
{"round": 20, "policy_id": "epsilon_greedy", "action": "revision_humana", "reason": "scheduled_exploration", "reward": 0.68, "cumulative_reward": 14.96, "regret": 0.52}
{"round": 21, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.81, "cumulative_reward": 15.77, "regret": 0.484}
{"round": 22, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.75, "cumulative_reward": 16.52, "regret": 0.508}
{"round": 23, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.78, "cumulative_reward": 17.3, "regret": 0.502}
{"round": 24, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.76, "cumulative_reward": 18.06, "regret": 0.516}
{"round": 25, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.78, "cumulative_reward": 18.84, "regret": 0.51}
{"round": 26, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.74, "cumulative_reward": 19.58, "regret": 0.544}
{"round": 27, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.8, "cumulative_reward": 20.38, "regret": 0.518}
{"round": 28, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.76, "cumulative_reward": 21.14, "regret": 0.532}
{"round": 29, "policy_id": "epsilon_greedy", "action": "modelo_fuerte", "reason": "best_observed_mean", "reward": 0.79, "cumulative_reward": 21.93, "regret": 0.516}
{"round": 30, "policy_id": "epsilon_greedy", "action": "modelo_rapido", "reason": "scheduled_exploration", "reward": 0.61, "cumulative_reward": 22.54, "regret": 0.68}
{"round": 1, "policy_id": "ucb", "action": "modelo_rapido", "reason": "initial_exploration", "reward": 0.62, "cumulative_reward": 0.62, "regret": 0.154}
{"round": 2, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "initial_exploration", "reward": 0.78, "cumulative_reward": 1.4, "regret": 0.148}
{"round": 3, "policy_id": "ucb", "action": "revision_humana", "reason": "initial_exploration", "reward": 0.7, "cumulative_reward": 2.1, "regret": 0.222}
{"round": 4, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.74, "cumulative_reward": 2.84, "regret": 0.256}
{"round": 5, "policy_id": "ucb", "action": "revision_humana", "reason": "ucb_score", "reward": 0.68, "cumulative_reward": 3.52, "regret": 0.35}
{"round": 6, "policy_id": "ucb", "action": "modelo_rapido", "reason": "ucb_score", "reward": 0.58, "cumulative_reward": 4.1, "regret": 0.544}
{"round": 7, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.8, "cumulative_reward": 4.9, "regret": 0.518}
{"round": 8, "policy_id": "ucb", "action": "revision_humana", "reason": "ucb_score", "reward": 0.72, "cumulative_reward": 5.62, "regret": 0.572}
{"round": 9, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.76, "cumulative_reward": 6.38, "regret": 0.586}
{"round": 10, "policy_id": "ucb", "action": "modelo_rapido", "reason": "ucb_score", "reward": 0.61, "cumulative_reward": 6.99, "regret": 0.75}
{"round": 11, "policy_id": "ucb", "action": "revision_humana", "reason": "ucb_score", "reward": 0.69, "cumulative_reward": 7.68, "regret": 0.834}
{"round": 12, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.79, "cumulative_reward": 8.47, "regret": 0.818}
{"round": 13, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.77, "cumulative_reward": 9.24, "regret": 0.822}
{"round": 14, "policy_id": "ucb", "action": "modelo_rapido", "reason": "ucb_score", "reward": 0.57, "cumulative_reward": 9.81, "regret": 1.026}
{"round": 15, "policy_id": "ucb", "action": "revision_humana", "reason": "ucb_score", "reward": 0.71, "cumulative_reward": 10.52, "regret": 1.09}
{"round": 16, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.81, "cumulative_reward": 11.33, "regret": 1.054}
{"round": 17, "policy_id": "ucb", "action": "revision_humana", "reason": "ucb_score", "reward": 0.67, "cumulative_reward": 12.0, "regret": 1.158}
{"round": 18, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.75, "cumulative_reward": 12.75, "regret": 1.182}
{"round": 19, "policy_id": "ucb", "action": "modelo_rapido", "reason": "ucb_score", "reward": 0.6, "cumulative_reward": 13.35, "regret": 1.356}
{"round": 20, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.78, "cumulative_reward": 14.13, "regret": 1.35}
{"round": 21, "policy_id": "ucb", "action": "revision_humana", "reason": "ucb_score", "reward": 0.7, "cumulative_reward": 14.83, "regret": 1.424}
{"round": 22, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.76, "cumulative_reward": 15.59, "regret": 1.438}
{"round": 23, "policy_id": "ucb", "action": "revision_humana", "reason": "ucb_score", "reward": 0.69, "cumulative_reward": 16.28, "regret": 1.522}
{"round": 24, "policy_id": "ucb", "action": "modelo_rapido", "reason": "ucb_score", "reward": 0.59, "cumulative_reward": 16.87, "regret": 1.706}
{"round": 25, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.78, "cumulative_reward": 17.65, "regret": 1.7}
{"round": 26, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.74, "cumulative_reward": 18.39, "regret": 1.734}
{"round": 27, "policy_id": "ucb", "action": "revision_humana", "reason": "ucb_score", "reward": 0.71, "cumulative_reward": 19.1, "regret": 1.798}
{"round": 28, "policy_id": "ucb", "action": "modelo_fuerte", "reason": "ucb_score", "reward": 0.8, "cumulative_reward": 19.9, "regret": 1.772}
{"round": 29, "policy_id": "ucb", "action": "modelo_rapido", "reason": "ucb_score", "reward": 0.63, "cumulative_reward": 20.53, "regret": 1.916}
{"round": 30, "policy_id": "ucb", "action": "revision_humana", "reason": "ucb_score", "reward": 0.68, "cumulative_reward": 21.21, "regret": 2.01}
