How to fix list index out of range

How to fix list index out of range - Printable Version

+- Python Forum (https://python-forum.io)
+-- Forum: Python Coding (https://python-forum.io/forum-7.html)
+--- Forum: Homework (https://python-forum.io/forum-9.html)
+--- Thread: How to fix list index out of range (/thread-36931.html)

Pages: 1 2 3

How to fix list index out of range - longmen - Apr-13-2022

Hi everyone, so when I ran the below code I ran into this problem and I wonder if anyone has advice on how to fix it? Any advice would be very appreciated.Thanks

Traceback (most recent call last):
File "C:/", line 281, in <module>
board = Board(input_list)
File "C:", line 144, in __init__
self.goal_labels = [int(input_list[0]), int(input_list[1])]
IndexError: list index out of range

import math
import sys
import random
from enum import Enum

'''
Uses Q-learning algorithm to determine the best path to a goal state
'''

# Constants
LIVING_REWARD = -0.1
GOAL_REWARD = 100
FORBIDDEN_REWARD = -100
GAMMA = 0.2 # Discount Rate
ALPHA = 0.1 # Learning Rate
EPSILON = 0.1 # Greedy Probability
MAX_ITERATIONS = 10000 # Will be 10,000
PRECISION_LIMIT = 0.009 # Limit of iterations
START_LABEL = 2

'''
NOTE: All representations of directions in arrays (such as in actions, neighbors, q_values)
will be represented numerically in the same order, clockwise, starting with LEFT
This is shown in the ENUM class - can remember using acronym "L.U.R.D."
s
alpha
s_prime
alpha_prime
gamma
Q(s,alpha) <-- (1-alpha)*Q(s,alpha) + alpha*(R(s,alpha,s_prime) + [gamma * max_over_a_prime(Qk(s_prime,alpha_prime)))]

1. two-digit precision for convergence
2. set epsilon to 0 after convergence

Active reinforcement learning
S = set of states
A = set of actions per state
T(s,a,s') = model of transitions
R(s,a,s') = reward function
pi(s) = policy
don't know T or R
Q0(s,a,) = 0
state - current loc, prev loc, visual scope
'''
class Action(Enum):
    LEFT = 0
    UP = 1
    RIGHT = 2
    DOWN = 3
    EXIT = 4
    
    def __str__(self):
        if self.value == 0:
            return '\u2190'
        elif self.value == 1:
            return '\u2191'
        elif self.value == 2:
            return '\u2192'
        elif self.value == 3:
            return '\u2193'
        else:
            return 'X'

class Tile:
    
    def __init__(self, label, neighbors):
        '''
        Attributes:
            label (int): index of tile in visual representation of grid (1-16)
            neighbors (list[int]): labels of neighboring tiles in grid, in order [left,up,right,down]
        '''
        self.label = label
        self.neighbors = neighbors
        # Default Tile creation assumes tile is not special and has no possible actions
        # This will be changed during board creation
        self.q_values = [None]*5
        self.actions = [None]*5
        self.isGoal = False
        self.isForbidden = False
        self.isWall = False

    def get_max_q(self):
        if self.isWall:
            return 0
        else:
            return max(q for q in self.q_values if q is not None)

    def get_best_action(self):
        return Action(self.q_values.index(self.get_max_q()))

    def update_q(self, q, direction):
        self.q_values[direction.value] = q
    
    def __str__(self):
        tile_string = '[' + str(self.label) + ']:'
        for i in range(4):
            cur_q = self.q_values[i]
            if cur_q is not None:
                tile_string += str(self.q_values[i]) + '|'
            else:
                tile_string += 'n|'
        tile_string += '___'
        for action in self.actions:
            if action is not None:
                tile_string += str(action) + ','
        return tile_string

class Board:

    def __init__(self, input_list):
        self.tiles = [[1]*4 for n in range(4)]
        # tiles[row][col]
        # max_row = height - 1
        # max_col = width - 1
        # Neighbors are in order left, up, right, down
        self.tiles[0][0] = Tile(1,[None,5,2,None])
        self.tiles[0][1] = Tile(2,[1,6,3,None])
        self.tiles[0][2] = Tile(3,[2,7,4,None])
        self.tiles[0][3] = Tile(4,[3,8,None,None])

        self.tiles[1][0] = Tile(5,[None,9,6,1])
        self.tiles[1][1] = Tile(6,[5,10,7,2])
        self.tiles[1][2] = Tile(7,[6,11,8,3])
        self.tiles[1][3] = Tile(8,[7,12,None,4])

        self.tiles[2][0] = Tile(9,[None,13,10,5])
        self.tiles[2][1] = Tile(10,[9,14,11,6])
        self.tiles[2][2] = Tile(11,[10,15,12,7])
        self.tiles[2][3] = Tile(12,[11,16,None,8])

        self.tiles[3][0] = Tile(13,[None,None,14,9])
        self.tiles[3][1] = Tile(14,[13,None,15,10])
        self.tiles[3][2] = Tile(15,[14,None,16,11])
        self.tiles[3][3] = Tile(16,[15,None,None,12])

        self.goal_labels = [int(input_list[0]), int(input_list[1])]
        self.forbidden_label = int(input_list[2])
        self.wall_label = int(input_list[3])

        # Changes default tiles to special ones specified by user
        # Updates actions for all tiles according to available neighbors and special status 
        for row in range(4):
            for col in range(4):
                cur_tile = self.tiles[row][col]
                if (cur_tile.label in self.goal_labels):
                    cur_tile.isGoal = True
                    cur_tile.actions.insert(4,Action.EXIT)
                    cur_tile.q_values.insert(4,0)
                elif (cur_tile.label == self.forbidden_label):
                    cur_tile.isForbidden = True
                    cur_tile.actions.insert(4,Action.EXIT)
                    cur_tile.q_values.insert(4,0)
                elif (cur_tile.label == self.wall_label):
                    cur_tile.isWall = True
                else:
                    for direction_num in range(4):
                        if cur_tile.neighbors[direction_num] is not None:
                            cur_tile.actions.insert(direction_num, Action(direction_num))
                            cur_tile.q_values.insert(direction_num, 0)
                            
    def get_tile(self, label):
        for row in self.tiles:
            for tile in row:
                if (tile.label == label):
                    return tile
        print("ERROR IF TILE NOT FOUND")
    
    def update_q(self, q, direction, tile_label):
        for row in range(4):
            for col in range(4):
                cur_tile = self.tiles[row][col]
                if (cur_tile.label == tile_label):
                    cur_tile.update_q(q, direction)
        
    def print_to_file(self):
        outF = open("board_output.txt","w")
        outF.writelines(str(self))
        outF.close()

    def print_all_states(self):
        for row in self.tiles:
            for tile in row:
                best_action_str = None
                if tile.isGoal or tile.isForbidden:
                    best_action_str = 'EXIT'
                elif tile.isWall:
                    best_action_str = 'WALL'
                else:
                    best_action_str = str(tile.get_best_action())
                print(str(tile.label) + best_action_str)

    def print_tile_values(self, index):
        tile = self.get_tile(index)
        if tile.isGoal:
            print('EXIT +100')
        elif tile.isForbidden:
            print('EXIT -100')
        elif tile.isWall:
            print('WALL 0')
        else:
            for q in tile.q_values:
                if q is not None:
                    print(str(Action(tile.q_values.index(q))) + ' ' + str(q))

    def __str__(self):
        tile_string = ''
        for row in range(3,-1,-1):
            for col in range(4):
                tile_string += str(self.tiles[row][col]) + '    '
            tile_string += '\n\n'
        return tile_string

def q_learn(board, print_all_states, index):
    iterations = 0
    while (iterations < MAX_ITERATIONS):
        # Reset starting variables
        cur_tile = board.get_tile(START_LABEL)
        action = None
        exited = False
        small_iter = 0
        reward_sum = 0
        while (small_iter < 100):
            # TODO: Choose random (0.1) or calculated (0.9) action
            # Currently RANDOM
            if random.uniform(0, 1) < EPSILON:
                action = random.choice([x for x in cur_tile.actions if x is not None])          
            else:
                action = cur_tile.get_best_action()

            # Set reward
            next_tile = board.get_tile(cur_tile.neighbors[action.value])
            if next_tile.isGoal:
                reward = GOAL_REWARD
            elif next_tile.isForbidden:
                reward = FORBIDDEN_REWARD
            else:
                reward = LIVING_REWARD
            reward_sum += reward

            old_q = cur_tile.q_values[action.value]

            # Calculate and update q value
            new_q = old_q + ALPHA*(reward + GAMMA*next_tile.get_max_q() - old_q)
            cur_tile.q_values[action.value] = new_q

            if next_tile.isGoal:
                # print("GOOOOOOAAAAAAAL")
                break
            elif next_tile.isForbidden:
                # print("DEATH")
                break
            else:
                if next_tile.isWall:
                    yeet = 0
                    # print("YOU HIT A WALL")
                else:
                    cur_tile = next_tile
            small_iter += 1
        if (small_iter > 98):
            print("YOU ALMOST HAD AN INFINITE LOOP HUNNNY")
        iterations += 1
    if (print_all_states):
        board.print_all_states()
    else:
        board.print_tile_values(index)

if __name__ == "__main__":

    input_list = sys.argv
    input_list.pop(0)
    board = Board(input_list)

    board.print_to_file()

    if (len(input_list) == 5) and (input_list[4] == 'p'):
        q_learn(board, True, 0)
    elif (len(input_list) == 6) and (input_list[4] == 'q'):
        q_learn(board, False, int(input_list[5]))
    else:
        print('Invalid input, please run again.')

RE: How to fix list index out of range - deanhystad - Apr-13-2022

What arguments did you supply? None? When you run this program you need to provide:
2 goal labels
1 forbidden label
1 wall label

        self.goal_labels = [int(input_list[0]), int(input_list[1])]
        self.forbidden_label = int(input_list[2])
        self.wall_label = int(input_list[3])

I have no idea what any of that means.

I think it is a bad idea forcing the user to provide command line arguments. Command line arguments should be optional, and your program should provide appropriate default values when not arguments are not supplied.

RE: How to fix list index out of range - longmen - Apr-15-2022

Hi , Thanks for your response. Here is the input that I am supposed to the feed to the program 14 12 7 6 p. However, I seem to not be able to figure out where to feed the input into this code. Also, I am trying to edit the code to ask for user input every time it runs; however, I still cannot figure it out for the last two days. I wonder if you could help?

(Apr-13-2022, 02:45 AM)deanhystad Wrote: What arguments did you supply? None? When you run this program you need to provide:
2 goal labels
1 forbidden label
1 wall label
        self.goal_labels = [int(input_list[0]), int(input_list[1])]
        self.forbidden_label = int(input_list[2])
        self.wall_label = int(input_list[3])
I have no idea what any of that means.

I think it is a bad idea forcing the user to provide command line arguments. Command line arguments should be optional, and your program should provide appropriate default values when not arguments are not supplied.

RE: How to fix list index out of range - deanhystad - Apr-15-2022

The program takes input as command line arguments.

Output:
python program.py 14 12 7 6 p

Replace "program" with the name of your program.

If you want to modify the program so it takes user input instead of using command line arguments you need to replace these lines:

# change these to get user input instead of using command line args in input_list
self.goal_labels = [int(input_list[0]), int(input_list[1])]
self.forbidden_label = int(input_list[2])
self.wall_label = int(input_list[3])

    input_list = sys.argv   # Remove
    input_list.pop(0)  # Remove
    board = Board(input_list)  # Remove input_list
 
    # Get this as user input instead of command line args
    if (len(input_list) == 5) and (input_list[4] == 'p'):
        q_learn(board, True, 0)
    elif (len(input_list) == 6) and (input_list[4] == 'q'):
        q_learn(board, False, int(input_list[5]))
    else:
        print('Invalid input, please run again.')

RE: How to fix list index out of range - longmen - Apr-18-2022

I wonder if you could explain and simplify this line of code

self.tiles = [[1]*4 for n in range(4)]

as well as this one? Thanks

action = random.choice([x for x in cur_tile.actions if x is not None])

RE: How to fix list index out of range - deanhystad - Apr-18-2022

These are both list comprehensions, a short and efficient way of building lists. You can read about list comprehensions here:

https://docs.python.org/3/tutorial/datastructures.html#list-comprehensions

This code creates a lists of lists.

self.tiles = [[1]*4 for n in range(4)]

You could figure this out yourself by writing a short test program.

tiles = [[1]*4 for n in range(4)]
print(tiles)

tile = [1] * 4
print(tile)

Output:[[1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1], [1, 1, 1, 1]]
[1, 1, 1, 1]

The [1] x 4 makes a list containing four 1's
The [[1]x4 for n in range(4)] makes a list that contains four of these lists.

This code randomly choses a tile action that is not None (I don't know what the tile actions are supposed to be).

action = random.choice([x for x in cur_tile.actions if x is not None])

The following for loop accomplishes the same task:

actions = []
for x in cur_tile.actions:
    if x is not None:
        actions.append(x)
action = random.choice(actions)

You can read about random.choice here.

https://docs.python.org/3/library/random.html

RE: How to fix list index out of range - longmen - Apr-20-2022

@deanhystad Thanks for your responses. It has been very helpful.
I am testing this input 12 7 5 6 p with the code and it prints this output:
1Action.RIGHT
2Action.RIGHT
3Action.UP
4Action.LEFT
5EXIT
6WALL
7EXIT
8Action.LEFT
9Action.UP
10Action.LEFT
11Action.LEFT
12EXIT
13Action.RIGHT
14Action.LEFT
15Action.LEFT
16Action.LEFT
However I am looking for this output:
1 Action.right
2 Action.right
3 Action.up
4 Action.up
5 EXIT
6 wall-square
7 EXIT
8 Action.up
9 Action.up
10 Action.up
11 Action.up
12 EXIT
13 Action.up
14 Action.up
15 Action.up
16 Action.up
I followed the clockwise priority from up, right, down, left and it did not match. I wonder if you have any advice? Thanks

RE: How to fix list index out of range - deanhystad - Apr-23-2022

I would start by verifying the board. For each tile verify the neighbors, actions and q_values are correct. You cannot debug the problem if the starting conditions are wrong.

You use insert() to modify actions and q_values in board.__init__(). Should you be using indexing?

You never call update_q(). I don't know if that is a bug or just old unused code.

RE: How to fix list index out of range - longmen - Apr-23-2022

It has been three days and I could not make much progress. This is how close I am getting to the expected result. However, it still does not print out the out put that I want.
For example with this
with this input 15 12 8 6 p
I expect this result
1 up
2 right
3 up
4 left
5 up
6 wall-square
7 up
8 forbid
9 up
10 up
11 up
12 goal
13 right
14 right
15 goal
16 up

with this input 15 12 8 6 q 11
i am expecting this

up 100.0
right 100.0
down 0.89
left 0.89

Updated: I am able to print out correctly with this input 15 12 8 6 q 11. However, it throws an error when I tried different inputs such as
12 7 5 6 q 3

Traceback (most recent call last):
File "C:", line 200, in <module>
user_input()
File "C:", line 197, in user_input
environment.print_four_Q_value(int(input_list[5]))
File "C:", line 142, in print_four_Q_value
print("down" + ' ' + str(round(episode.qValues[3], 2)))
TypeError: type NoneType doesn't define __round__ method
up 100.0
right 0.89

The expected output is
up 100.0
right 0.89
down 9.9
left 0.89

I wonder if you have anymore advice?

import random
import numpy as np
import enum

EACH_STEP_REWARD = -0.1
GOAL_SQUARE_REWARD = 100
FORBIDDEN_SQUARE_REWARD = -100
DISCOUNT_RATE_GAMMA = 0.1  # Discount Rate
LEARNING_RATE_ALPHA = 0.3  # Learning Rate
GREEDY_PROBABILITY_EPSILON = 0.5  # Greedy Probability
ITERATION_MAX_NUM = 10000  # Will be 10,000
START_LABEL = 2
LEVEL = 4
HEIGHT = 4
WEIGHT = 4


class Direction(enum.Enum):
    up = 1
    right = 2
    down = 3
    left = 0


class Node:
    def __init__(self, title, next, Goal=False, Forbidden=False, Wall=False, qValues=None, actions=None):
        self.title = title
        self.next = next
        self.qValues = [qValues] * 5
        self.move = [actions] * 5
        self.goal = Goal
        self.forbidden = Forbidden
        self.wall = Wall

    def max_Q_value(self):
        if self.wall:
            return False
        max_q = []
        for q in self.qValues:
            if q is not None:
                max_q.append(q)
        return max(max_q)

    def find_best_move(self):
        max_q = self.max_Q_value()
        q_index = self.qValues.index(max_q)
        return Direction(q_index)


class create_env:
    def __init__(self, input_list, wall=None):
        self.wall = wall
        self.episode = [[13, 14, 15, 16], [9, 10, 11, 12], [5, 6, 7, 8], [1, 2, 3, 4]]
        S = 2
        Node_1 = Node(1, [self.wall, 5, S, self.wall])
        Node_Start = Node(S, [1, 6, 3, self.wall])
        Node_3 = Node(3, [S, 7, 4, self.wall])
        Node_4 = Node(4, [3, 8, self.wall, self.wall])
        Node_5 = Node(5, [self.wall, 9, 6, 1])
        Node_6 = Node(6, [5, 10, 7, S])
        Node_7 = Node(7, [6, 11, 8, 3])
        Node_8 = Node(8, [7, 12, self.wall, 4])
        Node_9 = Node(9, [self.wall, 13, 10, 5])
        Node_10 = Node(10, [9, 14, 11, 6])
        Node_11 = Node(11, [10, 15, 12, 7])
        Node_12 = Node(12, [11, 16, self.wall, 8])
        Node_13 = Node(13, [self.wall, self.wall, 14, 9])
        Node_14 = Node(14, [13, self.wall, 15, 10])
        Node_15 = Node(15, [14, self.wall, 16, 11])
        Node_16 = Node(16, [15, self.wall, self.wall, 12])

        self.episode[0][0] = Node_1
        self.episode[0][1] = Node_Start
        self.episode[0][S] = Node_3
        self.episode[0][3] = Node_4
        self.episode[1][0] = Node_5
        self.episode[1][1] = Node_6
        self.episode[1][S] = Node_7
        self.episode[1][3] = Node_8
        self.episode[S][0] = Node_9
        self.episode[S][1] = Node_10
        self.episode[S][S] = Node_11
        self.episode[S][3] = Node_12
        self.episode[3][0] = Node_13
        self.episode[3][1] = Node_14
        self.episode[3][S] = Node_15
        self.episode[3][3] = Node_16

        self.goal_labels = [int(input_list[0]), int(input_list[1])]
        self.forbidden_label = int(input_list[2])
        self.wall_label = int(input_list[3])
        x = 0
        while x < LEVEL:
            y = 0
            while y < LEVEL:
                current_episode = self.episode[x][y]
                if current_episode.title in self.goal_labels:
                    current_episode.goal = 1
                    current_episode.move.insert(4, 0)
                    current_episode.qValues.insert(4, 0)
                elif current_episode.title == self.forbidden_label:
                    current_episode.forbidden = 1
                    current_episode.move.insert(4, 0)
                    current_episode.qValues.insert(4, 0)
                elif current_episode.title == self.wall_label:
                    current_episode.wall = 1
                else:
                    position = 0
                    while position < LEVEL:
                        if current_episode.next[position] is not None:
                            current_episode.move.insert(position,
                                                        Direction(position)), current_episode.qValues.insert(
                                position, False)
                        position += 1
                y += 1
            x += 1

    def get_episode(self, name):
        for x in self.episode:
            for episode in x:
                if episode.title == name:
                    # print(episode)
                    return episode

    def print_best_actions(self):
        for row in self.episode:
            for episode in row:
                if episode.goal:
                    best_action_str = 'Direction.goal'
                elif episode.forbidden:
                    best_action_str = "Direction.forbid"
                elif episode.wall:
                    best_action_str = 'Direction.wall-square'
                else:
                    best_action_str = str(episode.find_best_move())
                print(str(episode.title) + " " + best_action_str[10:])

    def print_four_Q_value(self, index):
        episode = self.get_episode(index)
        print("up" + ' ' + str(round(episode.qValues[1], 2)))
        print("right" + ' ' + str(round(episode.qValues[2], 2)))
        print("down" + ' ' + str(round(episode.qValues[3], 2)))
        print("left" + ' ' + str(round(episode.qValues[0], 2)))


def Q_learning(environment, print_best_actions, index):
    for iteration in range(ITERATION_MAX_NUM):
        current_episode = environment.get_episode(START_LABEL)
        total_episode_reward = 0
        for episode in range(100):
            if np.random.uniform(0, 1) < GREEDY_PROBABILITY_EPSILON:
                next_move = []
                for score in current_episode.move:
                    if score is not None:
                        next_move.append(score)
                next_move = random.choice(next_move)
            else:
                next_move = current_episode.find_best_move()
            next_episode = environment.get_episode(current_episode.next[next_move.value])
            if next_episode.goal:
                reward = GOAL_SQUARE_REWARD
            elif next_episode.forbidden:
                reward = FORBIDDEN_SQUARE_REWARD
            else:
                reward = EACH_STEP_REWARD
            total_episode_reward += reward

            old_q = current_episode.qValues[next_move.value]
            new_q = old_q + LEARNING_RATE_ALPHA * (reward + DISCOUNT_RATE_GAMMA * next_episode.max_Q_value() - old_q)
            current_episode.qValues[next_move.value] = new_q
            if next_episode.goal:
                break
            elif next_episode.forbidden:
                break
            else:
                if next_episode.wall:
                    break
                else:
                    current_episode = next_episode


def user_input():
    try:
        input_list = []
        input_str = input()
        input_list = input_str.split()
    except:
        print("The input should be like: 15 12 8 6 p")

    environment = create_env(input_list)

    if (len(input_list) == 5) and (input_list[-1] == 'p'):
        Q_learning(environment, 1, 0)
        environment.print_best_actions()
    elif (len(input_list) == 6) and (input_list[-2] == 'q'):
        Q_learning(environment, 0, int(input_list[5]))
        environment.print_four_Q_value(int(input_list[5]))


user_input()

RE: How to fix list index out of range - deanhystad - Apr-23-2022

I still think your building the board incorrectly. You should not be using insert() to set moves or values.

You should describe how your Q_learning function is supposed to work.