cg #78

started at 2025-06-07T17:53:41.787632+00:00; stage 2 at 2025-06-18T20:07:22.180774+00:00; ended at 2025-06-19T06:00:23.960246+00:00

specification

welcome back! your task is to play battleship. submissions may be written in any language.

battleship is a two player game where each player has a 10x10 square grid, called "territory". coordinates are written as A-J (vertical) then 0-9 (horizontal) so J0 is the southwest corner. battleship consists of two phases

phase one

place your ships. you have five ships. they are 2, 3, 4, 5, 6 grids long each. first, you must place those ships in your territory without them overlapping or getting outside the grid. you do this by specifying the square where each ship starts, in order, preceded by "-" (hyphen) if it is to be placed horizontally or a | (vertical bar) if placed vertically. you must deploy all 5 ships and no more. if you output

1, 2, 3, 4 and 5 represent the ships and 0 represents no ship here. of course your enemy does not know this. now for the fun part.

phase two

it's time for BATTLE!!!!!!!!!! simply send a coordinate that you want to ORBITAL STRIKE CANNON OF DEATH, like B4, and then a newline to CONFIRM the attack. if you hit something, you will hear a HIT (all on a single line). otherwise you hit the ocean which is a MISS (all on a single line).

when you win you will receive a WIN (all on a single line). when you lose, it is already too late apart from this you might receive other lines like HIT B4. this represents your enemy sending missiles. don't let your morale be affected, as you can't really do anything about this. it's just for demoralizing effect.

your challenge is to make a program that plays battleship. all unspecified behaviour is unspecified so you can fill the gaps with your imagination. as any language is allowed, there is no fixed API.

results

entries

entry #1

written by kimapr
submitted at 2025-06-16T17:55:01.949190+00:00
0 likes

guesses

kimapr (by oleander)
oleander (by Makefile_dot_in)

comments 0

post a comment

battleship.py ASCII text

from learner import Learner
import torch
import random

# train the gamer

W = 10
H = 10

class HalfBattleship:
    def __init__(self):
        self.W = W
        self.H = H
        self.board = [None for i in range(W*H)]
        self.hits = [None for i in range(W*H)]
        self.pieces = []
        for l in [2,3,4,5,6]:
            while True:
                h = random.randint(0, 1) == 1
                if h:
                    x = random.randrange(0, self.W - l + 1)
                    y = random.randrange(0, self.H)
                    row = [(x+dx,y)
                           for dx in range(0, l)]
                else:
                    x = random.randrange(0, self.W)
                    y = random.randrange(0, self.H - l + 1)
                    row = [(x,y+dy)
                           for dy in range(0, l)]
                if len([v for v in filter(lambda o: self.get(o[0], o[1])[0] is not None, row)]) == 0:
                    for x, y in row:
                        self.board[x + y*self.W] = True
                    self.pieces.append((x,y,h))
                    break

    def get(self, x, y):
        return (self.board[x + y*self.W], self.hits[x + y*self.W])

    def state(self):
        return [item for it in [
            [
                1 if x is not None and x else 0,
                1 if x is not None and not x else 0
            ]
            for x in self.hits
        ] for item in it]

    def actions(self):
        return [(x,y) if self.get(x,y)[1] is None else None
                for y in range(self.H)
                for x in range(self.W)]

    def get_str(self, x, y):
        piece, hit = self.get(x, y)
        if hit is not None:
            return '#' if hit else '.'
        return 'o' if piece is not None else ' '

    def print(self, other):
        if self.W != other.W or self.H != other.H:
            raise ValueError('dimensions unequal')

        print('/'+''.join(['-' for i in range(0,self.W+self.W-1)])+'\\   /'+
                  ''.join(['-' for i in range(0,other.W+other.W-1)])+'\\')
        for y in range(self.H):
            print("|" + (' '.join([self.get_str(x,y)
                                    for x in range(self.W)])) + "|   |" +
                          (' '.join([other.get_str(x,y)
                                    for x in range(other.W)]))+'|')
        print('\\'+''.join(['-' for i in range(0,self.W+self.W-1)])+'/   \\'+
                  ''.join(['-' for i in range(0,other.W+other.W-1)])+'/')

    def play(self, x, y):
        piece, hit = self.get(x, y)

        if hit is not None:
            return None

        hit = True if piece else False
        self.hits[x + y*self.W] = hit

        if len([v for v in filter(lambda v: v[0] is not None and v[1] is None,
                      [self.get(x,y)
                       for y in range(self.H)
                       for x in range(self.W)])]) == 0:
            return (True, True)
        return (False, hit)

if __name__ == '__main__':

    state = None

    try:
        state = torch.load("model-bship.pt", weights_only = True)
    except:
        pass

    players = [1,2]
    games = [HalfBattleship() for pl in players]
    states = [None for pl in players]
    acts = [None for pl in players]
    next_states = [None for pl in players]

    agent = Learner(len(games[0].actions()), len(games[0].state()), state = state, EPS_DECAY=10000)

    while True:
        for i, pl in enumerate(players):
            print()
            game = games[i]
            states[i] = game.state()
            next_states[i] = None
            state = states[i]

            act = None
            actions = game.actions()
            while act is None:
                act = agent.decide(state)
                if actions[act] is None:
                    print('badact', act)
                    agent.learn(state, act, state, -1)
                    act = None

            acts[i] = act

            term, winner = game.play(actions[act][0], actions[act][1])
            print("state:",(term,winner))
            next_states[i] = game.state()
            print("learn", 1 if winner else -0.1)
            agent.learn(states[i], acts[i], next_states[i], 1 if winner else 0)

            other_reward = 0
            if term and winner:
                print()
                print("@@@@")
                print("@@@@")
                print("@@@@ WINNER :"+str(pl)+" @@@@")
                print("@@@@")
                print("@@@@")
                print()

            coverage = len([v for v in filter(lambda v: v is not None, game.hits)]) / len(game.hits)
            print(str(round(coverage*100))+"% covered")
            games[0].print(games[1])
            torch.save(agent.get_model_state(), "model-bship.pt")

            if term or coverage > 0.6:
                games = [HalfBattleship() for pl in players]
                states = [None for pl in players]
                acts = [None for pl in players]
                next_states = [None for pl in players]
                break

gamer.py ASCII text

from learner import Learner
from battleship import HalfBattleship
import torch
import random

state = None

try:
    state = torch.load("model-bship.pt", weights_only = True)
except:
    pass

game = HalfBattleship()
agent = Learner(len(game.actions()), len(game.state()), state = state, layers = [100,100])

for x, y, h in game.pieces:
    print(('-' if h else '|')+("ABCDEFGHIJ")[y]+("0123456789")[x])

gaming = True

while gaming:
    act = None
    actions = game.actions()
    act = agent.decide(game.state(), final=True)
    while actions[act] is None:
        act = random.randrange(0, len(actions))
    print(("ABCDEFGHIJ")[actions[act][1]]+("0123456789")[actions[act][0]])

    while True:
        try:
            line = input()
        except:
            gaming = False
            break

        if line == "WIN":
            gaming = False
            break

        if line == "MISS" or line == "HIT":
            game.hits[actions[act][0] + actions[act][1]*game.W] = line == "HIT"
            break

learner.py ASCII text

# i dont know the math i stole it from https://docs.pytorch.org/tutorials/intermediate/reinforcement_q_learning.html

import math
import random
from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F

device = torch.device(
    "cuda" if torch.cuda.is_available() else
    "mps" if torch.backends.mps.is_available() else
    "cpu"
)

Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward'))

class ReplayMemory(object):

    def __init__(self, capacity):
        self.memory = deque([], maxlen=capacity)

    def push(self, *args):
        self.memory.append(Transition(*args))

    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)

    def __len__(self):
        return len(self.memory)


class DQN(nn.Module):

    def __init__(self, n_observations, n_actions, layers):
        super(DQN, self).__init__()
        self.layers = [];
        self.layers.append(nn.Linear(n_observations, layers[0]))
        for i, lay in enumerate(layers[:-1]):
            self.layers.append(nn.Linear(lay, layers[i+1]))
        self.layers.append(nn.Linear(layers[len(layers) - 1], n_actions))
        self.layers = nn.Sequential(*[item for it in [[l, nn.ReLU()] for l in self.layers] for item in it][:-1])

    def forward(self, x):
        return self.layers(x)

class Learner:

    def __init__(self,
        n_actions,
        n_observations,
        state = None,
        layers = [128, 128],
        BATCH_SIZE = 128,
        GAMMA = 0.99,
        EPS_START = 0.9,
        EPS_END = 0.05,
        EPS_DECAY = 1000,
        TAU = 0.005,
        LR = 1e-4
    ):
        self.BATCH_SIZE = BATCH_SIZE
        self.GAMMA = GAMMA
        self.EPS_START = EPS_START
        self.EPS_END = EPS_END
        self.EPS_DECAY = EPS_DECAY
        self.TAU = TAU
        self.LR = LR
        self.n_observations = n_observations
        self.n_actions = n_actions
        if state is not None and 'layers' in state:
            layers = state['layers'];
        self.layers = layers
        self.policy_net = DQN(n_observations, n_actions, layers).to(device)
        self.steps_done = 0
        if state is not None:
            self.policy_net.load_state_dict(state['model'])
            self.steps_done = state['steps_done']
        self.target_net = DQN(n_observations, n_actions, layers).to(device)
        self.target_net.load_state_dict(self.policy_net.state_dict())
        self.state = None

        self.optimizer = optim.AdamW(self.policy_net.parameters(), lr=self.LR, amsgrad=True)
        self.memory = ReplayMemory(10000)

    def get_model_state(self):
        return { 'model': self.policy_net.state_dict(), 'steps_done': self.steps_done, 'layers': self.layers }

    def select_action(self, state, final=False):
        sample = random.random()
        if final:
            eps_threshold = 0
        else:
            eps_threshold = self.EPS_END + (self.EPS_START - self.EPS_END) * \
                math.exp(-1. * self.steps_done / self.EPS_DECAY)
            self.steps_done += 1
        if sample > eps_threshold:
            with torch.no_grad():
                return self.policy_net(state).max(1).indices.view(1, 1)
        else:
            return torch.tensor([[random.randrange(0, self.n_actions)]], device=device, dtype=torch.long)

    def optimize_model(self):
        memory = self.memory
        optimizer = self.optimizer
        policy_net = self.policy_net

        if len(memory) < self.BATCH_SIZE:
            return

        transitions = memory.sample(self.BATCH_SIZE)
        batch = Transition(*zip(*transitions))
        non_final_mask = torch.tensor(tuple(map(lambda s: s is not None,
                                              batch.next_state)), device=device, dtype=torch.bool)
        non_final_next_states = torch.cat([s for s in batch.next_state
                                                    if s is not None])
        state_batch = torch.cat(batch.state)
        action_batch = torch.cat(batch.action)
        reward_batch = torch.cat(batch.reward)

        state_action_values = policy_net(state_batch).gather(1, action_batch)

        next_state_values = torch.zeros(self.BATCH_SIZE, device=device)
        with torch.no_grad():
            next_state_values[non_final_mask] = self.target_net(non_final_next_states).max(1).values
        expected_state_action_values = (next_state_values * self.GAMMA) + reward_batch

        criterion = nn.SmoothL1Loss()
        loss = criterion(state_action_values, expected_state_action_values.unsqueeze(1))

        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_value_(policy_net.parameters(), 100)
        optimizer.step()

    def decide(self, state, final=False):
        state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)
        action = self.select_action(state, final=False)
        return action

    def learn(self, state, action, observation, reward):
        state = torch.tensor(state, dtype=torch.float32, device=device).unsqueeze(0)

        if not observation:
            next_state = None
        else:
            next_state = torch.tensor(observation, dtype=torch.float32, device=device).unsqueeze(0)

        reward = torch.tensor([reward], device=device)

        self.memory.push(state, action, next_state, reward)

        self.optimize_model()

        target_net_state_dict = self.target_net.state_dict()
        policy_net_state_dict = self.policy_net.state_dict()
        for key in policy_net_state_dict:
            target_net_state_dict[key] = policy_net_state_dict[key]*self.TAU + target_net_state_dict[key]*(1-self.TAU)
        self.target_net.load_state_dict(target_net_state_dict)

model-bship.pt data
model-ttt.pt data

ttt.py ASCII text

from learner import Learner
import torch

# its for initial test

def checkrow(row):
    if len(row) == 0:
        return None
    if row[0] is None:
        return None

    for x in row:
        if x != row[0]:
            return None

    return row[0]

class TicTacToe:
    def __init__(self, W, H, L):
        self.W = W
        self.H = H
        self.L = L
        self.board = [None for i in range(W*H)]

    def get(self, x, y):
        return self.board[x + y*self.W]

    def state(self, pl):
        return [item for it in [
            [
                1 if x == pl else 0,
                1 if x is not None and x != pl else 0
            ]
            for x in self.board
        ] for item in it]

    def actions(self):
        return [(x,y) if self.get(x,y) is None else None
                for y in range(self.H)
                for x in range(self.W)]

    def print(self):
        print("===")
        for y in range(self.H):
            print("  " + (' '.join([str(self.get(x,y))
                                    if self.get(x,y) is not None else '.'
                                    for x in range(self.W)])))
        print("===")

    def play(self, x, y, pl):
        v = self.get(x, y)

        if v is not None:
            return None

        self.board[x + y*self.W] = pl

        # rows
        for x, y in [(x,y)
                     for y in range(self.H)
                     for x in range(self.W - self.L + 1)]:
            row = [self.get(x+d, y)
                   for d in range(self.L)]
            row = checkrow(row)
            if row is not None:
                return (True, row)

        # cols
        for x, y in [(x,y)
                     for y in range(self.H - self.L + 1)
                     for x in range(self.W)]:
            col = [self.get(x, y+d)
                   for d in range(self.L)]
            col = checkrow(col)
            if col is not None:
                return (True, col)

        # diags
        for x, y in [(x,y)
                     for y in range(self.H - self.L + 1)
                     for x in range(self.W - self.L + 1)]:
            dia1 = [self.get(x+dx, y+dy)
                    for dy in range(self.L)
                    for dx in range(self.L)]
            dia2 = [self.get(x+self.L-1-dx, y+dy)
                    for dy in range(self.L)
                    for dx in range(self.L)]
            dia1 = checkrow(dia1)
            dia2 = checkrow(dia2)
            if dia1 is not None:
                return (True, dia1)
            if dia2 is not None:
                return (True, dia2)

        if len([i for i in filter(lambda x: x is not None, self.actions())]) == 0:
            return (True, None)

        return (False, None)


W = 3
H = 3
L = 3

game = TicTacToe(W,H,L)

state = None

try:
    state = torch.load("model-ttt.pt", weights_only = True)
except:
    pass

agent = Learner(len(game.actions()), len(game.state(1)), state = state)

players = [1,2]
states = [None for pl in players]
acts = [None for pl in players]
next_states = [None for pl in players]

while True:
    for i, pl in enumerate(players):
        print()
        states[i] = game.state(pl)
        next_states[i] = None
        state = states[i]

        act = None
        actions = game.actions()
        while act is None:
            act = agent.decide(state)
            if actions[act] is None:
                print('badact', act)
                agent.learn(state, act, None, -0.5)
                act = None

        acts[i] = act

        term, winner = game.play(actions[act][0], actions[act][1], pl)
        print("state:",(term,winner))
        next_states[i] = game.state(pl)

        other_reward = 0
        if term and winner == pl:
            print()
            print("@@@@")
            print("@@@@")
            print("@@@@ WINNER :"+str(pl)+" @@@@")
            print("@@@@")
            print("@@@@")
            print()
            other_reward = -1
            print('learn', 1)
            agent.learn(states[i], acts[i], next_states[i], 1)
        elif term:
            print('learn', 0)
            agent.learn(states[i], acts[i], next_states[i], 0)


        for j, pln in filter(lambda p: p[1] != pl, enumerate(players)):
            if acts[j] is not None:
                print('learn', other_reward)
                agent.learn(states[j], acts[j], next_states[j], other_reward)

        game.print()

        if term:
            states = [None for pl in players]
            acts = [None for pl in players]
            next_states = [None for pl in players]
            game = TicTacToe(W,H,L)
            torch.save(agent.get_model_state(), "model-ttt.pt")
            break

entry #2

written by Makefile_dot_in
submitted at 2025-06-15T17:38:57.888552+00:00
0 likes

guesses

Makefile_dot_in (by oleander)
oleander (by kimapr)

comments 0

code guessing, round #78 (completed)

specification

phase one

phase two

results

entries

entry #1

post a comment

entry #2

post a comment

entry #3

post a comment