How to fix list index out of range - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: Homework (https://python-forum.io/forum-9.html) +--- Thread: How to fix list index out of range (/thread-36931.html) |
RE: How to fix list index out of range - longmen - Apr-24-2022 (Apr-23-2022, 10:32 PM)deanhystad Wrote: I still think your building the board incorrectly. You should not be using insert() to set moves or values. Can you elaborate on insert() to set move or values? Also why you think that the Q_learning function is not working? Thanks Here is update on the code import random import numpy as np import enum EACH_STEP_REWARD = -0.1 GOAL_SQUARE_REWARD = 100 FORBIDDEN_SQUARE_REWARD = -100 DISCOUNT_RATE_GAMMA = 0.1 # Discount Rate LEARNING_RATE_ALPHA = 0.3 # Learning Rate GREEDY_PROBABILITY_EPSILON = 0.5 # Greedy Probability ITERATION_MAX_NUM = 10000 # Will be 10,000 START_STATE = 2 LEVEL = 4 HEIGHT = 4 WEIGHT = 4 class Direction(enum.Enum): up = 0 right = 1 down = 2 left = 3 class Node: def __init__(self, title, next, Goal=False, Forbidden=False, Wall=False, qValues=None, actions=None): self.title = title self.next = next self.qValues = [qValues] * 4 self.move = [actions] * 4 self.goal = Goal self.forbidden = Forbidden self.wall = Wall def max_Q_value(self): if self.wall: return False max_q = [] for q in self.qValues: if q is not None: max_q.append(q) return max(max_q) def find_best_move(self): max_q = self.max_Q_value() q_index = self.qValues.index(max_q) return Direction(q_index) class create_env: def __init__(self, input_list, wall=None): self.wall = wall self.episode = [[13, 14, 15, 16], [9, 10, 11, 12], [5, 6, 7, 8], [1, 2, 3, 4]] S = 2 Node_1 = Node(1, [5, S, self.wall, self.wall]) Node_Start = Node(S, [6, 3, self.wall, 1]) Node_3 = Node(3, [S, 7, 4, self.wall]) Node_4 = Node(4, [8, self.wall, self.wall, 3]) Node_5 = Node(5, [9, 6, 1, self.wall]) Node_6 = Node(6, [10, 7, S, 5]) Node_7 = Node(7, [11, 8, 3, 6]) Node_8 = Node(8, [12, self.wall, 4, 7]) Node_9 = Node(9, [13, 10, 5, self.wall]) Node_10 = Node(10, [14, 11, 6, 9]) Node_11 = Node(11, [15, 12, 7, 10]) Node_12 = Node(12, [16, self.wall, 8, 11]) Node_13 = Node(13, [self.wall, 14, 9, self.wall]) Node_14 = Node(14, [self.wall, 15, 10, 13]) Node_15 = Node(15, [self.wall, 16, 11, 14]) Node_16 = Node(16, [self.wall, self.wall, 12, 15]) self.episode[0][0] = Node_1 self.episode[0][1] = Node_Start self.episode[0][S] = Node_3 self.episode[0][3] = Node_4 self.episode[1][0] = Node_5 self.episode[1][1] = Node_6 self.episode[1][S] = Node_7 self.episode[1][3] = Node_8 self.episode[S][0] = Node_9 self.episode[S][1] = Node_10 self.episode[S][S] = Node_11 self.episode[S][3] = Node_12 self.episode[3][0] = Node_13 self.episode[3][1] = Node_14 self.episode[3][S] = Node_15 self.episode[3][3] = Node_16 self.goal_labels = [int(input_list[0]), int(input_list[1])] self.forbidden_label = int(input_list[2]) self.wall_label = int(input_list[3]) x = 0 while x < LEVEL: y = 0 while y < LEVEL: current_episode = self.episode[x][y] if current_episode.title in self.goal_labels: current_episode.goal = 1 current_episode.move.append(4) current_episode.qValues.append(4) elif current_episode.title == self.forbidden_label: current_episode.forbidden = 1 current_episode.move.append(4) current_episode.qValues.append(4) elif current_episode.title == self.wall_label: current_episode.wall = 1 else: position = 0 while position < LEVEL: if current_episode.next[position] is not None: current_episode.move.append(Direction(position)), current_episode.qValues.insert( position, False) position += 1 y += 1 x += 1 def get_episode(self, name): for x in self.episode: for episode in x: if episode.title == name: # print(episode) return episode def print_best_actions(self): for row in self.episode: for episode in row: if episode.goal: best_action_str = 'Direction.goal' elif episode.forbidden: best_action_str = "Direction.forbid" elif episode.wall: best_action_str = 'Direction.wall-square' else: best_action_str = str(episode.find_best_move()) print(str(episode.title) + " " + best_action_str[10:]) def print_four_Q_value(self, index): episode = self.get_episode(index) print("up" + ' ' + str(round(episode.qValues[1], 2))) print("right" + ' ' + str(round(episode.qValues[2], 2))) print("down" + ' ' + str(round(episode.qValues[3], 2))) print("left" + ' ' + str(round(episode.qValues[0], 2))) def Q_learning(environment, print_best_actions, index): for iteration in range(ITERATION_MAX_NUM): current_episode = environment.get_episode(START_STATE) total_episode_reward = 0 for episode in range(ITERATION_MAX_NUM): if np.random.uniform(0, 1) < GREEDY_PROBABILITY_EPSILON: next_move = [] for score in current_episode.move: if score is not None: next_move.append(score) # print(score) next_move = random.choice(next_move) else: next_move = current_episode.find_best_move() next_episode = environment.get_episode(current_episode.next[next_move.value]) if next_episode.goal: reward = GOAL_SQUARE_REWARD elif next_episode.forbidden: reward = FORBIDDEN_SQUARE_REWARD else: reward = EACH_STEP_REWARD total_episode_reward += reward old_q = current_episode.qValues[next_move.value] new_q = old_q + LEARNING_RATE_ALPHA * (reward + DISCOUNT_RATE_GAMMA * next_episode.max_Q_value() - old_q) # print(new_q) current_episode.qValues[next_move.value] = new_q # print(current_episode.qValues[next_move.value]) if next_episode.goal: break elif next_episode.forbidden: break else: if next_episode.wall: break else: current_episode = next_episode def user_input(): try: input_list = [] input_str = input() input_list = input_str.split() except: print("The input should be like: 15 12 8 6 p") environment = create_env(input_list) if (len(input_list) == 5) and (input_list[-1] == 'p'): Q_learning(environment, 1, 0) environment.print_best_actions() elif (len(input_list) == 6) and (input_list[-2] == 'q'): Q_learning(environment, 0, int(input_list[5])) environment.print_four_Q_value(int(input_list[5])) user_input() RE: How to fix list index out of range - deanhystad - Apr-24-2022 As far as this: Quote:Can you elaborate on insert() to set move or values?What do you think list.insert(value) does? It is not the correct way to SET values in a list. I would like you to describe your Q learning algorithm. I cannot tell if it contains errors in logic or execution or my not understanding your requirements without knowing what you think it is supposed to do. For example, you always start at the same square but it appears that you want a solution for the entire map. I can design a map where that is not possible. It also seems a very inefficient way to generate a map, and even with 10000 iterations there remains a possibility that you do not try all routes. You calculate a total episode reward that is never used. You pass arguments to Q_learning() that are not used. I think I implemented your algorithm and I get this result for inputs: 15 12 8 6 p I think this is correct. 16 cannot be up as you indicate in your expected results. 16 does not have an "Up" neighbor.
RE: How to fix list index out of range - longmen - Apr-24-2022 (Apr-24-2022, 02:59 AM)deanhystad Wrote: As far as this: But my assignment requires it to go up at 16 index :/ RE: How to fix list index out of range - deanhystad - Apr-24-2022 If 16 is supposed to be up that means your board is wrong and 16 up cannot be a wall. Though your solution is inefficient I think it works. Any problems you have are probably errors setting up the board. RE: How to fix list index out of range - longmen - Apr-24-2022 (Apr-24-2022, 03:31 PM)deanhystad Wrote: If 16 is supposed to be up that means your board is wrong and 16 up cannot be a wall. Hi, I figured out the issue. There is one more thing I hope you could advise me. With this input 15 12 8 6 q 11 it prints out Action.UP 99.99999999999999 Action.UP 99.99999999999999 Action.DOWN 0.8899999999999998 Action.DOWN 0.8899999999999998However, I want it to update from UP to RIGHT if it prints UP for the second time and print out LEFT at the fourth output if it shows up as DOWN. Its output should always print out as UP RIGHT DOWN LEFT. Could you please advise? Thanks for all your responses! RE: How to fix list index out of range - deanhystad - Apr-24-2022 You have not posted any code that can produce that output. I cannot explain why code I have not seen does not work as expected. Did you change your search order from LURD to URDL? Is that what "fixed" your output? If so, that just means that 16 was never evaluated. This makes sense because it is not a neighbor of any open (not goal, forbidden or wall) tile. RE: How to fix list index out of range - longmen - Apr-24-2022 (Apr-24-2022, 03:58 PM)deanhystad Wrote: You have not posted any code that can produce that output. I cannot explain why code I have not seen does not work as expected. I figured out that also. However, there is one more issue. As it shown below, UP value is wrong sometimes and DOWN value is always wrong. I wonder if you could help? I also provide the code below. Thanks 10 8 9 6 q 2 My output up -0.1 right 0.89 down -0.1 left -0.1 Expected Output up -0.01 right 0.89 down -0.01 left -0.1 12 7 5 6 q 3 My output up 100.0 right 0.89 down -0.01 left 0.89 Expected Output up 100.0 right 0.89 down 9.9 left 0.89 import random import numpy as np import enum EACH_STEP_REWARD = -0.1 GOAL_SQUARE_REWARD = 100 FORBIDDEN_SQUARE_REWARD = -100 DISCOUNT_RATE_GAMMA = 0.1 # Discount Rate LEARNING_RATE_ALPHA = 0.3 # Learning Rate GREEDY_PROBABILITY_EPSILON = 0.5 # Greedy Probability ITERATION_MAX_NUM = 1000000 # Will be 10,000 START_LABEL = 2 LEVEL = 4 HEIGHT = 4 WEIGHT = 4 class Direction(enum.Enum): up = 0 right = 1 down = 2 left = 3 class Node: def __init__(self, title, next, Goal=False, Forbidden=False, Wall=False, qValues=None, actions=None): self.title = title self.next = next self.qValues = [qValues] * 5 self.move = [actions] * 5 self.goal = Goal self.forbidden = Forbidden self.wall = Wall def max_Q_value(self): if self.wall: return False max_q = [] for q in self.qValues: if q is not None: max_q.append(q) return max(max_q) def find_best_move(self): max_q = self.max_Q_value() q_index = self.qValues.index(max_q) return Direction(q_index) class create_env: def __init__(self, input_list, wall=None): self.wall = wall self.episode = [[13, 14, 15, 16], [9, 10, 11, 12], [5, 6, 7, 8], [1, 2, 3, 4]] S = 2 Node_1 = Node(1, [5, S, 1, 1]) Node_Start = Node(S, [6, 3, 1, 1]) Node_3 = Node(3, [7, 4, 1, S]) Node_4 = Node(4, [8, 1, 1, 3]) Node_5 = Node(5, [9, self.wall, 1, 1]) Node_6 = Node(6, [10, 7, S, 5]) Node_7 = Node(7, [11, 8, 3, 6]) Node_8 = Node(8, [12, self.wall, 4, 7]) Node_9 = Node(9, [13, 10, 5, 1]) Node_10 = Node(10, [14, 11, 6, 9]) Node_11 = Node(11, [15, 12, 7, 10]) Node_12 = Node(12, [16, self.wall, 8, 11]) Node_13 = Node(13, [1, 14, 9, self.wall]) Node_14 = Node(14, [1, 15, 10, 13]) Node_15 = Node(15, [1, 16, 11, 14, ]) Node_16 = Node(16, [1, self.wall, 12, 1]) self.episode[0][0] = Node_1 self.episode[0][1] = Node_Start self.episode[0][S] = Node_3 self.episode[0][3] = Node_4 self.episode[1][0] = Node_5 self.episode[1][1] = Node_6 self.episode[1][S] = Node_7 self.episode[1][3] = Node_8 self.episode[S][0] = Node_9 self.episode[S][1] = Node_10 self.episode[S][S] = Node_11 self.episode[S][3] = Node_12 self.episode[3][0] = Node_13 self.episode[3][1] = Node_14 self.episode[3][S] = Node_15 self.episode[3][3] = Node_16 self.goal_labels = [int(input_list[0]), int(input_list[1])] self.forbidden_label = int(input_list[2]) self.wall_label = int(input_list[3]) x = 0 while x < LEVEL: y = 0 while y < LEVEL: current_episode = self.episode[x][y] if current_episode.title in self.goal_labels: current_episode.goal = 1 current_episode.move.insert(4, 0) current_episode.qValues.insert(4, 0) elif current_episode.title == self.forbidden_label: current_episode.forbidden = 1 current_episode.move.insert(4, 0) current_episode.qValues.insert(4, 0) elif current_episode.title == self.wall_label: current_episode.wall = 1 else: position = 0 while position < LEVEL: if current_episode.next[position] is not None: current_episode.move.insert(position, Direction(position)), current_episode.qValues.insert( position, False) position += 1 y += 1 x += 1 def get_episode(self, name): for x in self.episode: for episode in x: if episode.title == name: # print(episode) return episode def print_best_actions(self): for row in self.episode: for episode in row: if episode.goal: best_action_str = 'Direction.goal' elif episode.forbidden: best_action_str = "Direction.forbid" elif episode.wall: best_action_str = 'Direction.wall-square' else: best_action_str = str(episode.find_best_move()) print(str(episode.title) + " " + best_action_str[10:]) def print_four_Q_value(self, index): episode = self.get_episode(index) print("up" + ' ' + str(round(episode.qValues[0], 2))) print("right" + ' ' + str(round(episode.qValues[1], 2))) print("down" + ' ' + str(round(episode.qValues[2], 2))) print("left" + ' ' + str(round(episode.qValues[3], 2))) def Q_learning(environment, print_best_actions, index): for iteration in range(ITERATION_MAX_NUM): current_episode = environment.get_episode(START_LABEL) total_episode_reward = 0 for episode in range(10000): if np.random.uniform(0, 1) < GREEDY_PROBABILITY_EPSILON: next_move = [] for score in current_episode.move: if score is not None: next_move.append(score) next_move = random.choice(next_move) else: next_move = current_episode.find_best_move() next_episode = environment.get_episode(current_episode.next[next_move.value]) if next_episode.goal: reward = GOAL_SQUARE_REWARD elif next_episode.forbidden: reward = FORBIDDEN_SQUARE_REWARD else: reward = EACH_STEP_REWARD total_episode_reward += reward old_q = current_episode.qValues[next_move.value] new_q = old_q + LEARNING_RATE_ALPHA * (reward + DISCOUNT_RATE_GAMMA * next_episode.max_Q_value() - old_q) current_episode.qValues[next_move.value] = new_q if next_episode.goal: break elif next_episode.forbidden: break else: if next_episode.wall: break else: current_episode = next_episode def user_input(): try: input_list = [] input_str = input() input_list = input_str.split() except: print("The input should be like: 15 12 8 6 p") environment = create_env(input_list) if (len(input_list) == 5) and (input_list[-1] == 'p'): Q_learning(environment, 1, 0) environment.print_best_actions() elif (len(input_list) == 6) and (input_list[-2] == 'q'): Q_learning(environment, 0, int(input_list[5])) environment.print_four_Q_value(int(input_list[5])) user_input() RE: How to fix list index out of range - deanhystad - Apr-24-2022 Is this correct? Node_Start = Node(S, [6, 3, 1, 1]) Node_3 = Node(3, [7, 4, 1, S])It think your board is still riddled with errors. You would be better off making the wall programmatically. You make too many mistakes when doing it by hand. This is how I make my board. I use dictionaries instead of arrays class Action(Enum): """Enumeration for movements (actions). Value is (row, column) steps to make move""" UP = (1, 0) RIGHT = (0, 1) DOWN = (-1, 0) LEFT = (0, -1) def __str__(self): """Return arrow associated with action"""" if self is self.LEFT: return '\u2190' elif self is self.UP: return '\u2191' elif self is self.RIGHT: return '\u2192' elif self is self.DOWN: return '\u2193' return " " class Board: def __init__(self, goals, traps, walls, rows=4, columns=4): """Initialize the board""" # Make all the tiles self.rows = rows self.columns = columns self.tiles = {x:Tile(x, x in goals, x in traps, x in walls) for x in range(1, rows*columns+1)} # Tell open tiles about their neighbors for label in set(self.tiles) - set(goals + traps + walls): tile = self.tiles[label] for action in Action: r, c = action.value neighbor = self.tile(r + (label-1) // columns, c + (label-1) % columns) # Make a list of neighboring tiles tile.neighbor[action] = neighbor if neighbor and not neighbor.wall tile.values[action] = 1 if neighbor and neighbor.wall else 0 def tile(self, label, column): """Return tile at intersection of row and column. Return None if row or column out of range""" if 0 <= label < self.rows and 0 <= column < self.columns: return self.tiles[label * self.columns + column + 1] return None RE: How to fix list index out of range - longmen - Apr-24-2022 (Apr-24-2022, 07:14 PM)deanhystad Wrote: Is this correct? Node_Start = Node(S, [6, 3, 1, 1]) Node_3 = Node(3, [7, 4, 1, S]) This is correct. However, does the new Board print out according to the outputs? Thank you! RE: How to fix list index out of range - deanhystad - Apr-24-2022 I'm pretty sure that is wrong. Going down from node 2 or node 3 should not take you to node 1. |