How to fix list index out of range - Printable Version +- Python Forum (https://python-forum.io) +-- Forum: Python Coding (https://python-forum.io/forum-7.html) +--- Forum: Homework (https://python-forum.io/forum-9.html) +--- Thread: How to fix list index out of range (/thread-36931.html) |
How to fix list index out of range - longmen - Apr-13-2022 Hi everyone, so when I ran the below code I ran into this problem and I wonder if anyone has advice on how to fix it? Any advice would be very appreciated.Thanks Traceback (most recent call last): File "C:/", line 281, in <module> board = Board(input_list) File "C:", line 144, in __init__ self.goal_labels = [int(input_list[0]), int(input_list[1])] IndexError: list index out of range import math import sys import random from enum import Enum ''' Uses Q-learning algorithm to determine the best path to a goal state ''' # Constants LIVING_REWARD = -0.1 GOAL_REWARD = 100 FORBIDDEN_REWARD = -100 GAMMA = 0.2 # Discount Rate ALPHA = 0.1 # Learning Rate EPSILON = 0.1 # Greedy Probability MAX_ITERATIONS = 10000 # Will be 10,000 PRECISION_LIMIT = 0.009 # Limit of iterations START_LABEL = 2 ''' NOTE: All representations of directions in arrays (such as in actions, neighbors, q_values) will be represented numerically in the same order, clockwise, starting with LEFT This is shown in the ENUM class - can remember using acronym "L.U.R.D." s alpha s_prime alpha_prime gamma Q(s,alpha) <-- (1-alpha)*Q(s,alpha) + alpha*(R(s,alpha,s_prime) + [gamma * max_over_a_prime(Qk(s_prime,alpha_prime)))] 1. two-digit precision for convergence 2. set epsilon to 0 after convergence Active reinforcement learning S = set of states A = set of actions per state T(s,a,s') = model of transitions R(s,a,s') = reward function pi(s) = policy don't know T or R Q0(s,a,) = 0 state - current loc, prev loc, visual scope ''' class Action(Enum): LEFT = 0 UP = 1 RIGHT = 2 DOWN = 3 EXIT = 4 def __str__(self): if self.value == 0: return '\u2190' elif self.value == 1: return '\u2191' elif self.value == 2: return '\u2192' elif self.value == 3: return '\u2193' else: return 'X' class Tile: def __init__(self, label, neighbors): ''' Attributes: label (int): index of tile in visual representation of grid (1-16) neighbors (list[int]): labels of neighboring tiles in grid, in order [left,up,right,down] ''' self.label = label self.neighbors = neighbors # Default Tile creation assumes tile is not special and has no possible actions # This will be changed during board creation self.q_values = [None]*5 self.actions = [None]*5 self.isGoal = False self.isForbidden = False self.isWall = False def get_max_q(self): if self.isWall: return 0 else: return max(q for q in self.q_values if q is not None) def get_best_action(self): return Action(self.q_values.index(self.get_max_q())) def update_q(self, q, direction): self.q_values[direction.value] = q def __str__(self): tile_string = '[' + str(self.label) + ']:' for i in range(4): cur_q = self.q_values[i] if cur_q is not None: tile_string += str(self.q_values[i]) + '|' else: tile_string += 'n|' tile_string += '___' for action in self.actions: if action is not None: tile_string += str(action) + ',' return tile_string class Board: def __init__(self, input_list): self.tiles = [[1]*4 for n in range(4)] # tiles[row][col] # max_row = height - 1 # max_col = width - 1 # Neighbors are in order left, up, right, down self.tiles[0][0] = Tile(1,[None,5,2,None]) self.tiles[0][1] = Tile(2,[1,6,3,None]) self.tiles[0][2] = Tile(3,[2,7,4,None]) self.tiles[0][3] = Tile(4,[3,8,None,None]) self.tiles[1][0] = Tile(5,[None,9,6,1]) self.tiles[1][1] = Tile(6,[5,10,7,2]) self.tiles[1][2] = Tile(7,[6,11,8,3]) self.tiles[1][3] = Tile(8,[7,12,None,4]) self.tiles[2][0] = Tile(9,[None,13,10,5]) self.tiles[2][1] = Tile(10,[9,14,11,6]) self.tiles[2][2] = Tile(11,[10,15,12,7]) self.tiles[2][3] = Tile(12,[11,16,None,8]) self.tiles[3][0] = Tile(13,[None,None,14,9]) self.tiles[3][1] = Tile(14,[13,None,15,10]) self.tiles[3][2] = Tile(15,[14,None,16,11]) self.tiles[3][3] = Tile(16,[15,None,None,12]) self.goal_labels = [int(input_list[0]), int(input_list[1])] self.forbidden_label = int(input_list[2]) self.wall_label = int(input_list[3]) # Changes default tiles to special ones specified by user # Updates actions for all tiles according to available neighbors and special status for row in range(4): for col in range(4): cur_tile = self.tiles[row][col] if (cur_tile.label in self.goal_labels): cur_tile.isGoal = True cur_tile.actions.insert(4,Action.EXIT) cur_tile.q_values.insert(4,0) elif (cur_tile.label == self.forbidden_label): cur_tile.isForbidden = True cur_tile.actions.insert(4,Action.EXIT) cur_tile.q_values.insert(4,0) elif (cur_tile.label == self.wall_label): cur_tile.isWall = True else: for direction_num in range(4): if cur_tile.neighbors[direction_num] is not None: cur_tile.actions.insert(direction_num, Action(direction_num)) cur_tile.q_values.insert(direction_num, 0) def get_tile(self, label): for row in self.tiles: for tile in row: if (tile.label == label): return tile print("ERROR IF TILE NOT FOUND") def update_q(self, q, direction, tile_label): for row in range(4): for col in range(4): cur_tile = self.tiles[row][col] if (cur_tile.label == tile_label): cur_tile.update_q(q, direction) def print_to_file(self): outF = open("board_output.txt","w") outF.writelines(str(self)) outF.close() def print_all_states(self): for row in self.tiles: for tile in row: best_action_str = None if tile.isGoal or tile.isForbidden: best_action_str = 'EXIT' elif tile.isWall: best_action_str = 'WALL' else: best_action_str = str(tile.get_best_action()) print(str(tile.label) + best_action_str) def print_tile_values(self, index): tile = self.get_tile(index) if tile.isGoal: print('EXIT +100') elif tile.isForbidden: print('EXIT -100') elif tile.isWall: print('WALL 0') else: for q in tile.q_values: if q is not None: print(str(Action(tile.q_values.index(q))) + ' ' + str(q)) def __str__(self): tile_string = '' for row in range(3,-1,-1): for col in range(4): tile_string += str(self.tiles[row][col]) + ' ' tile_string += '\n\n' return tile_string def q_learn(board, print_all_states, index): iterations = 0 while (iterations < MAX_ITERATIONS): # Reset starting variables cur_tile = board.get_tile(START_LABEL) action = None exited = False small_iter = 0 reward_sum = 0 while (small_iter < 100): # TODO: Choose random (0.1) or calculated (0.9) action # Currently RANDOM if random.uniform(0, 1) < EPSILON: action = random.choice([x for x in cur_tile.actions if x is not None]) else: action = cur_tile.get_best_action() # Set reward next_tile = board.get_tile(cur_tile.neighbors[action.value]) if next_tile.isGoal: reward = GOAL_REWARD elif next_tile.isForbidden: reward = FORBIDDEN_REWARD else: reward = LIVING_REWARD reward_sum += reward old_q = cur_tile.q_values[action.value] # Calculate and update q value new_q = old_q + ALPHA*(reward + GAMMA*next_tile.get_max_q() - old_q) cur_tile.q_values[action.value] = new_q if next_tile.isGoal: # print("GOOOOOOAAAAAAAL") break elif next_tile.isForbidden: # print("DEATH") break else: if next_tile.isWall: yeet = 0 # print("YOU HIT A WALL") else: cur_tile = next_tile small_iter += 1 if (small_iter > 98): print("YOU ALMOST HAD AN INFINITE LOOP HUNNNY") iterations += 1 if (print_all_states): board.print_all_states() else: board.print_tile_values(index) if __name__ == "__main__": input_list = sys.argv input_list.pop(0) board = Board(input_list) board.print_to_file() if (len(input_list) == 5) and (input_list[4] == 'p'): q_learn(board, True, 0) elif (len(input_list) == 6) and (input_list[4] == 'q'): q_learn(board, False, int(input_list[5])) else: print('Invalid input, please run again.') RE: How to fix list index out of range - deanhystad - Apr-13-2022 What arguments did you supply? None? When you run this program you need to provide: 2 goal labels 1 forbidden label 1 wall label self.goal_labels = [int(input_list[0]), int(input_list[1])] self.forbidden_label = int(input_list[2]) self.wall_label = int(input_list[3])I have no idea what any of that means. I think it is a bad idea forcing the user to provide command line arguments. Command line arguments should be optional, and your program should provide appropriate default values when not arguments are not supplied. RE: How to fix list index out of range - longmen - Apr-15-2022 Hi , Thanks for your response. Here is the input that I am supposed to the feed to the program 14 12 7 6 p. However, I seem to not be able to figure out where to feed the input into this code. Also, I am trying to edit the code to ask for user input every time it runs; however, I still cannot figure it out for the last two days. I wonder if you could help? (Apr-13-2022, 02:45 AM)deanhystad Wrote: What arguments did you supply? None? When you run this program you need to provide: RE: How to fix list index out of range - deanhystad - Apr-15-2022 The program takes input as command line arguments. Replace "program" with the name of your program.If you want to modify the program so it takes user input instead of using command line arguments you need to replace these lines: # change these to get user input instead of using command line args in input_list self.goal_labels = [int(input_list[0]), int(input_list[1])] self.forbidden_label = int(input_list[2]) self.wall_label = int(input_list[3]) input_list = sys.argv # Remove input_list.pop(0) # Remove board = Board(input_list) # Remove input_list # Get this as user input instead of command line args if (len(input_list) == 5) and (input_list[4] == 'p'): q_learn(board, True, 0) elif (len(input_list) == 6) and (input_list[4] == 'q'): q_learn(board, False, int(input_list[5])) else: print('Invalid input, please run again.') RE: How to fix list index out of range - longmen - Apr-18-2022 I wonder if you could explain and simplify this line of code self.tiles = [[1]*4 for n in range(4)]as well as this one? Thanks action = random.choice([x for x in cur_tile.actions if x is not None]) RE: How to fix list index out of range - deanhystad - Apr-18-2022 These are both list comprehensions, a short and efficient way of building lists. You can read about list comprehensions here: https://docs.python.org/3/tutorial/datastructures.html#list-comprehensions This code creates a lists of lists. self.tiles = [[1]*4 for n in range(4)]You could figure this out yourself by writing a short test program. tiles = [[1]*4 for n in range(4)] print(tiles) tile = [1] * 4 print(tile) The [1] x 4 makes a list containing four 1'sThe [[1]x4 for n in range(4)] makes a list that contains four of these lists. This code randomly choses a tile action that is not None (I don't know what the tile actions are supposed to be). action = random.choice([x for x in cur_tile.actions if x is not None])The following for loop accomplishes the same task: actions = [] for x in cur_tile.actions: if x is not None: actions.append(x) action = random.choice(actions)You can read about random.choice here. https://docs.python.org/3/library/random.html RE: How to fix list index out of range - longmen - Apr-20-2022 @deanhystad Thanks for your responses. It has been very helpful. I am testing this input 12 7 5 6 p with the code and it prints this output: 1Action.RIGHT 2Action.RIGHT 3Action.UP 4Action.LEFT 5EXIT 6WALL 7EXIT 8Action.LEFT 9Action.UP 10Action.LEFT 11Action.LEFT 12EXIT 13Action.RIGHT 14Action.LEFT 15Action.LEFT 16Action.LEFT However I am looking for this output: 1 Action.right 2 Action.right 3 Action.up 4 Action.up 5 EXIT 6 wall-square 7 EXIT 8 Action.up 9 Action.up 10 Action.up 11 Action.up 12 EXIT 13 Action.up 14 Action.up 15 Action.up 16 Action.up I followed the clockwise priority from up, right, down, left and it did not match. I wonder if you have any advice? Thanks RE: How to fix list index out of range - deanhystad - Apr-23-2022 I would start by verifying the board. For each tile verify the neighbors, actions and q_values are correct. You cannot debug the problem if the starting conditions are wrong. You use insert() to modify actions and q_values in board.__init__(). Should you be using indexing? You never call update_q(). I don't know if that is a bug or just old unused code. RE: How to fix list index out of range - longmen - Apr-23-2022 It has been three days and I could not make much progress. This is how close I am getting to the expected result. However, it still does not print out the out put that I want. For example with this with this input 15 12 8 6 p I expect this result 1 up 2 right 3 up 4 left 5 up 6 wall-square 7 up 8 forbid 9 up 10 up 11 up 12 goal 13 right 14 right 15 goal 16 up with this input 15 12 8 6 q 11 i am expecting this up 100.0 right 100.0 down 0.89 left 0.89 Updated: I am able to print out correctly with this input 15 12 8 6 q 11. However, it throws an error when I tried different inputs such as 12 7 5 6 q 3 Traceback (most recent call last): File "C:", line 200, in <module> user_input() File "C:", line 197, in user_input environment.print_four_Q_value(int(input_list[5])) File "C:", line 142, in print_four_Q_value print("down" + ' ' + str(round(episode.qValues[3], 2))) TypeError: type NoneType doesn't define __round__ method up 100.0 right 0.89 The expected output is up 100.0 right 0.89 down 9.9 left 0.89 I wonder if you have anymore advice? import random import numpy as np import enum EACH_STEP_REWARD = -0.1 GOAL_SQUARE_REWARD = 100 FORBIDDEN_SQUARE_REWARD = -100 DISCOUNT_RATE_GAMMA = 0.1 # Discount Rate LEARNING_RATE_ALPHA = 0.3 # Learning Rate GREEDY_PROBABILITY_EPSILON = 0.5 # Greedy Probability ITERATION_MAX_NUM = 10000 # Will be 10,000 START_LABEL = 2 LEVEL = 4 HEIGHT = 4 WEIGHT = 4 class Direction(enum.Enum): up = 1 right = 2 down = 3 left = 0 class Node: def __init__(self, title, next, Goal=False, Forbidden=False, Wall=False, qValues=None, actions=None): self.title = title self.next = next self.qValues = [qValues] * 5 self.move = [actions] * 5 self.goal = Goal self.forbidden = Forbidden self.wall = Wall def max_Q_value(self): if self.wall: return False max_q = [] for q in self.qValues: if q is not None: max_q.append(q) return max(max_q) def find_best_move(self): max_q = self.max_Q_value() q_index = self.qValues.index(max_q) return Direction(q_index) class create_env: def __init__(self, input_list, wall=None): self.wall = wall self.episode = [[13, 14, 15, 16], [9, 10, 11, 12], [5, 6, 7, 8], [1, 2, 3, 4]] S = 2 Node_1 = Node(1, [self.wall, 5, S, self.wall]) Node_Start = Node(S, [1, 6, 3, self.wall]) Node_3 = Node(3, [S, 7, 4, self.wall]) Node_4 = Node(4, [3, 8, self.wall, self.wall]) Node_5 = Node(5, [self.wall, 9, 6, 1]) Node_6 = Node(6, [5, 10, 7, S]) Node_7 = Node(7, [6, 11, 8, 3]) Node_8 = Node(8, [7, 12, self.wall, 4]) Node_9 = Node(9, [self.wall, 13, 10, 5]) Node_10 = Node(10, [9, 14, 11, 6]) Node_11 = Node(11, [10, 15, 12, 7]) Node_12 = Node(12, [11, 16, self.wall, 8]) Node_13 = Node(13, [self.wall, self.wall, 14, 9]) Node_14 = Node(14, [13, self.wall, 15, 10]) Node_15 = Node(15, [14, self.wall, 16, 11]) Node_16 = Node(16, [15, self.wall, self.wall, 12]) self.episode[0][0] = Node_1 self.episode[0][1] = Node_Start self.episode[0][S] = Node_3 self.episode[0][3] = Node_4 self.episode[1][0] = Node_5 self.episode[1][1] = Node_6 self.episode[1][S] = Node_7 self.episode[1][3] = Node_8 self.episode[S][0] = Node_9 self.episode[S][1] = Node_10 self.episode[S][S] = Node_11 self.episode[S][3] = Node_12 self.episode[3][0] = Node_13 self.episode[3][1] = Node_14 self.episode[3][S] = Node_15 self.episode[3][3] = Node_16 self.goal_labels = [int(input_list[0]), int(input_list[1])] self.forbidden_label = int(input_list[2]) self.wall_label = int(input_list[3]) x = 0 while x < LEVEL: y = 0 while y < LEVEL: current_episode = self.episode[x][y] if current_episode.title in self.goal_labels: current_episode.goal = 1 current_episode.move.insert(4, 0) current_episode.qValues.insert(4, 0) elif current_episode.title == self.forbidden_label: current_episode.forbidden = 1 current_episode.move.insert(4, 0) current_episode.qValues.insert(4, 0) elif current_episode.title == self.wall_label: current_episode.wall = 1 else: position = 0 while position < LEVEL: if current_episode.next[position] is not None: current_episode.move.insert(position, Direction(position)), current_episode.qValues.insert( position, False) position += 1 y += 1 x += 1 def get_episode(self, name): for x in self.episode: for episode in x: if episode.title == name: # print(episode) return episode def print_best_actions(self): for row in self.episode: for episode in row: if episode.goal: best_action_str = 'Direction.goal' elif episode.forbidden: best_action_str = "Direction.forbid" elif episode.wall: best_action_str = 'Direction.wall-square' else: best_action_str = str(episode.find_best_move()) print(str(episode.title) + " " + best_action_str[10:]) def print_four_Q_value(self, index): episode = self.get_episode(index) print("up" + ' ' + str(round(episode.qValues[1], 2))) print("right" + ' ' + str(round(episode.qValues[2], 2))) print("down" + ' ' + str(round(episode.qValues[3], 2))) print("left" + ' ' + str(round(episode.qValues[0], 2))) def Q_learning(environment, print_best_actions, index): for iteration in range(ITERATION_MAX_NUM): current_episode = environment.get_episode(START_LABEL) total_episode_reward = 0 for episode in range(100): if np.random.uniform(0, 1) < GREEDY_PROBABILITY_EPSILON: next_move = [] for score in current_episode.move: if score is not None: next_move.append(score) next_move = random.choice(next_move) else: next_move = current_episode.find_best_move() next_episode = environment.get_episode(current_episode.next[next_move.value]) if next_episode.goal: reward = GOAL_SQUARE_REWARD elif next_episode.forbidden: reward = FORBIDDEN_SQUARE_REWARD else: reward = EACH_STEP_REWARD total_episode_reward += reward old_q = current_episode.qValues[next_move.value] new_q = old_q + LEARNING_RATE_ALPHA * (reward + DISCOUNT_RATE_GAMMA * next_episode.max_Q_value() - old_q) current_episode.qValues[next_move.value] = new_q if next_episode.goal: break elif next_episode.forbidden: break else: if next_episode.wall: break else: current_episode = next_episode def user_input(): try: input_list = [] input_str = input() input_list = input_str.split() except: print("The input should be like: 15 12 8 6 p") environment = create_env(input_list) if (len(input_list) == 5) and (input_list[-1] == 'p'): Q_learning(environment, 1, 0) environment.print_best_actions() elif (len(input_list) == 6) and (input_list[-2] == 'q'): Q_learning(environment, 0, int(input_list[5])) environment.print_four_Q_value(int(input_list[5])) user_input() RE: How to fix list index out of range - deanhystad - Apr-23-2022 I still think your building the board incorrectly. You should not be using insert() to set moves or values. You should describe how your Q_learning function is supposed to work. |