Tic tac toe python test

#!/usr/bin/env python3
“””
Tic-Tac-Toe with simple reinforcement learning (Monte Carlo control)
– Runs in a terminal (works in Pydroid on Android)
– Can train by self-play, play human vs AI, or AI vs AI
– Learns state-action values and improves over time
– Save/Load the learned policy to/from JSON

How it learns (short version):
We record the sequence of (state, action, player) for a game. At the end:
    winner’s actions get +1, loser’s actions get -1, draws get 0
We update a value table Q[(state, player)][action] by incremental averaging.
Policy is epsilon-greedy w.r.t. Q, so it explores a bit while learning.
“””

import json
import os
import random
from collections import defaultdict
from typing import Dict, List, Tuple

# =========================
# Game mechanics
# =========================

EMPTY = ” “
PLAYERS = (“X”, “O”)
WIN_LINES = [
    (0, 1, 2), (3, 4, 5), (6, 7, 8),  # rows
    (0, 3, 6), (1, 4, 7), (2, 5, 8),  # cols
    (0, 4, 8), (2, 4, 6)              # diagonals
]

Board = List[str]
State = str  # 9-char string of ‘X’, ‘O’, or ‘ ‘
Action = int  # index 0..8
QKey = Tuple[State, str]  # (state, current_player)

def new_board() -> Board:
    return [EMPTY] * 9

def board_to_state(board: Board) -> State:
    return “”.join(board)

def legal_actions(board: Board) -> List[Action]:
    return [i for i, c in enumerate(board) if c == EMPTY]

def check_winner(board: Board) -> str:
    for a, b, c in WIN_LINES:
        if board[a] != EMPTY and board[a] == board[b] == board[c]:
            return board[a]
    if EMPTY not in board:
        return “DRAW”
    return “”  # game continues

def print_board(board: Board) -> None:
    b = [c if c != EMPTY else str(i+1) for i, c in enumerate(board)]
    rows = [f” {b[0]} | {b[1]} | {b[2]}”, f” {b[3]} | {b[4]} | {b[5]}”, f” {b[6]} | {b[7]} | {b[8]}”]
    print(“\n” + “\n———–\n”.join(rows) + “\n”)

# =========================
# RL bits (First-Visit Monte Carlo control)
# =========================

class MonteCarloAgent:
    def __init__(self, epsilon: float = 0.1):
        self.epsilon = float(epsilon)
        # Q maps (state, player) -> vector of 9 action values
        self.Q: Dict[QKey, List[float]] = defaultdict(lambda: [0.0]*9)
        # N maps (state, player, action) -> visit count for incremental mean
        self.N: Dict[Tuple[State, str, Action], int] = defaultdict(int)

    def policy(self, board: Board, player: str) -> Action:
        acts = legal_actions(board)
        if not acts:
            return -1
        if random.random() < self.epsilon:
            return random.choice(acts)
        state = board_to_state(board)
        qvals = self.Q[(state, player)]
        # choose best legal; tie-break randomly among maxima
        best_val = max(qvals[a] for a in acts)
        best_actions = [a for a in acts if qvals[a] == best_val]
        return random.choice(best_actions)

    def episode(self, starting_player: str = “X”, show: bool = False) -> str:
        “””Play one game between two copies of this agent (self-play).
        Returns the result: ‘X’, ‘O’, or ‘DRAW’.”””
        board = new_board()
        current = starting_player
        # history of (state, player, action)
        trajectory: List[Tuple[State, str, Action]] = []

        while True:
            action = self.policy(board, current)
            if action == -1:
                # no legal moves (shouldn’t happen)
                result = “DRAW”
                break
            # record state before taking action
            s = board_to_state(board)
            trajectory.append((s, current, action))
            # apply move
            board[action] = current
            outcome = check_winner(board)
            if show:
                print_board(board)
            if outcome == “X” or outcome == “O”:
                result = outcome
                break
            if outcome == “DRAW”:
                result = “DRAW”
                break
            # switch player
            current = “O” if current == “X” else “X”

        # Assign returns
        if result == “DRAW”:
            G = {“X”: 0.0, “O”: 0.0}
        else:
            G = {“X”: 1.0 if result == “X” else -1.0,
                 “O”: 1.0 if result == “O” else -1.0}

        # First-visit MC update
        seen = set()
        for s, p, a in trajectory:
            key = (s, p, a)
            if key in seen:
                continue
            seen.add(key)
            qkey = (s, p)
            self.N[(s, p, a)] += 1
            n = self.N[(s, p, a)]
            old = self.Q[qkey][a]
            target = G[p]
            self.Q[qkey][a] = old + (target – old) / n
        return result

    def choose_move(self, board: Board, player: str, greedy: bool = True) -> Action:
        acts = legal_actions(board)
        if not acts:
            return -1
        state = board_to_state(board)
        qvals = self.Q[(state, player)]
        if greedy:
            best_val = max(qvals[a] for a in acts)
            best_actions = [a for a in acts if qvals[a] == best_val]
            return random.choice(best_actions)
        else:
            return self.policy(board, player)

    # ———- persistence ———-
    def save(self, path: str):
        data = {f”{k[0]}|{k[1]}”: v for k, v in self.Q.items()}
        with open(path, “w”, encoding=”utf-8″) as f:
            json.dump({“epsilon”: self.epsilon, “Q”: data}, f)
        print(f”Saved model to {path} ({len(self.Q)} states)”)

    def load(self, path: str):
        if not os.path.exists(path):
            print(f”No such file: {path}”)
            return
        with open(path, “r”, encoding=”utf-8″) as f:
            blob = json.load(f)
        self.epsilon = float(blob.get(“epsilon”, 0.1))
        rawQ = blob.get(“Q”, {})
        self.Q = defaultdict(lambda: [0.0]*9)
        for k, v in rawQ.items():
            state, player = k.split(“|”)
            vec = [float(x) for x in v]
            if len(vec) != 9:
                vec = (vec + [0.0]*9)[:9]
            self.Q[(state, player)] = vec
        print(f”Loaded model from {path} ({len(self.Q)} states), epsilon={self.epsilon}”)

# =========================
# Human interaction
# =========================

MODEL_PATH = “tictactoe_q.json”

def human_turn(board: Board, player: str) -> None:
    while True:
        try:
            move = input(f”Your move ({player}). Enter 1-9: “).strip()
            idx = int(move) – 1
            if idx < 0 or idx > 8:
                raise ValueError
            if board[idx] != EMPTY:
                print(“That spot is taken. Try again.”)
                continue
            board[idx] = player
            return
        except ValueError:
            print(“Please type a number 1-9 corresponding to the board position.”)

def ai_turn(board: Board, player: str, agent: MonteCarloAgent, greedy: bool = True) -> None:
    a = agent.choose_move(board, player, greedy=greedy)
    if a == -1:
        return
    board[a] = player

def play_human_vs_ai(agent: MonteCarloAgent):
    print(“\n=== Human vs AI ===”)
    while True:
        choice = input(“Do you want to be X (first) or O (second)? [X/O]: “).strip().upper()
        if choice in (“X”, “O”):
            human = choice
            ai = “O” if human == “X” else “X”
            break
        print(“Please answer X or O.”)

    greedy = input(“Should the AI play greedily (y) or allow exploration (n)? [y/n]: “).strip().lower() != ‘n’

    board = new_board()
    current = “X”
    print_board(board)

    while True:
        if current == human:
            human_turn(board, human)
        else:
            ai_turn(board, ai, agent, greedy=greedy)
            print(“AI moves:”)
        print_board(board)
        outcome = check_winner(board)
        if outcome in (“X”, “O”):
            print(f”{outcome} wins!” + (” (You win!)” if outcome == human else ” (AI wins!)”))
            break
        if outcome == “DRAW”:
            print(“It’s a draw.”)
            break
        current = “O” if current == “X” else “X”

def watch_ai_vs_ai(agent: MonteCarloAgent, games: int = 1, greedy: bool = True):
    print(“\n=== AI vs AI ===”)
    for g in range(1, games+1):
        print(f”Game {g}:”)
        result = agent.episode(starting_player=”X”, show=True)
        if result == “DRAW”:
            print(“Result: draw\n”)
        else:
            print(f”Result: {result} wins\n”)

def train_self_play(agent: MonteCarloAgent, episodes: int = 10000, report_every: int = 1000):
    print(f”Training for {episodes} self-play games (epsilon={agent.epsilon})…”)
    w = {“X”: 0, “O”: 0, “DRAW”: 0}
    for i in range(1, episodes+1):
        # alternate starting player to avoid bias
        starter = “X” if i % 2 else “O”
        result = agent.episode(starting_player=starter, show=False)
        w[result] += 1
        if report_every and i % report_every == 0:
            total = i
            wx, wo, d = w[“X”], w[“O”], w[“DRAW”]
            print(f”  After {total:6d}: X={wx} O={wo} Draw={d} | states learned={len(agent.Q)}”)
    print(“Done.”)

# =========================
# Menu / main
# =========================

def main():
    agent = MonteCarloAgent(epsilon=0.1)
    if os.path.exists(MODEL_PATH):
        try:
            agent.load(MODEL_PATH)
        except Exception as e:
            print(f”(Could not auto-load model: {e})”)

    while True:
        print(“””
==============================
Tic-Tac-Toe RL (console)
==============================
1) Train by self-play
2) Play Human vs AI
3) Watch AI vs AI
4) Set exploration epsilon
5) Save model
6) Load model
7) Reset model
0) Exit
“””)
        choice = input(“Select an option: “).strip()

        if choice == “1”:
            try:
                n = int(input(“How many self-play games? (e.g., 5000): “).strip())
            except ValueError:
                print(“Please enter a number.”)
                continue
            report = max(1000, n // 10)
            train_self_play(agent, episodes=n, report_every=report)
        elif choice == “2”:
            play_human_vs_ai(agent)
        elif choice == “3”:
            try:
                g = int(input(“How many games to show? “).strip())
            except ValueError:
                g = 1
            greedy = input(“Play greedily (y) or with exploration (n)? [y/n]: “).strip().lower() != ‘n’
            watch_ai_vs_ai(agent, games=g, greedy=greedy)
        elif choice == “4”:
            try:
                e = float(input(“Set epsilon (0 = greedy, 0.1 default, 1 = random): “).strip())
                e = min(max(e, 0.0), 1.0)
                agent.epsilon = e
                print(f”epsilon set to {agent.epsilon}”)
            except ValueError:
                print(“Please enter a number (e.g., 0.1)”)
        elif choice == “5”:
            path = input(f”Save path [{MODEL_PATH}]: “).strip() or MODEL_PATH
            agent.save(path)
        elif choice == “6”:
            path = input(f”Load path [{MODEL_PATH}]: “).strip() or MODEL_PATH
            agent.load(path)
        elif choice == “7”:
            agent = MonteCarloAgent(epsilon=agent.epsilon)
            print(“Model reset. (Learning starts fresh.)”)
        elif choice == “0”:
            print(“Goodbye!”)
            break
        else:
            print(“Unknown option. Please choose from the menu.”)

if __name__ == “__main__”:
    main()

Day 20654

Afternoon carried me and my companions into Evie’s Bakery, quiet hum of ovens and glass cases shining with sugar. Amanda, smiling and steady, steered us through the tasting like she’d been waiting just for us.

Better Than Sex cake first. Devil’s food rich and shadow dark, chocolate chips hidden in the crumb. Whipped white frosting smoothed over, tangled with chocolate and caramel. Each forkful heavy with sweetness, melt and bite together, little bursts of joy.

Then sunshine cake. Yellow crumb, tender as a cloud, holding its weight under a plate of milk chocolate icing. The balance of bright and sweet, golden light under a darker crown, lifted everything a little higher.

We lingered, fork tapping the empty plate, warm air rising from the ovens, downstairs. We put an order in for both full cakes for an upcoming birthday party later this month.

When we stepped back outside, the sidewalk carried its own invitation. Hopscotch chalked in pastel squares, the word dance scrawled in the final box. Sunlight caught it just so, and for a moment it felt like the cakes had followed us out, turning the street into something just as sweet.

#EviesBakery #CakeTasting #BetterThanSexCake #SunshineCake #SweetLife #ChalkHopscotch #DanceAtTheEnd