from dataclasses import dataclass

@dataclass
class TruthValue:
    f: float
    c: float

    def clamp(self):
        self.f = min(1.0, max(0.0, self.f))
        self.c = min(1.0, max(0.0, self.c))
        return self

def deduction(a, b):
    return TruthValue(a.f * b.f, a.c * b.c * 0.9).clamp()

def revision(x, y):
    if x is None:
        return y.clamp() if y is not None else None
    if y is None:
        return x.clamp()
    w1 = x.c / max(1e-9, 1.0 - x.c)
    w2 = y.c / max(1e-9, 1.0 - y.c)
    f = (w1 * x.f + w2 * y.f) / max(1e-9, w1 + w2)
    c = (w1 + w2) / (w1 + w2 + 1.0)
    return TruthValue(f, c).clamp()

@dataclass
class Rule:
    left: str
    link: str
    right: str
    cost: float = 1.0
    value: float = 1.0

@dataclass
class AttentionRecord:
    spent: float = 0.0
    earned: float = 0.0
    hits: int = 0

class Reasoner:
    def __init__(self, attention_budget=2.0):
        self.beliefs = {}
        self.rules = []
        self.attention_budget = attention_budget
        self.attention = {}

    def add_fact(self, stmt, tv):
        merged = revision(self.beliefs.get(stmt), tv)
        if merged is not None:
            self.beliefs[stmt] = merged

    def add_rule(self, left, link, right, cost=1.0, value=1.0):
        self.rules.append(Rule(left, link, right, cost, value))
        self.attention.setdefault(right, AttentionRecord())

    def market_value(self, stmt, inferred):
        old = self.beliefs.get(stmt)
        novelty = 1.0 if old is None else abs(old.f - inferred.f) + abs(old.c - inferred.c)
        confidence_gain = inferred.c if old is None else max(0.0, inferred.c - old.c)
        return novelty + confidence_gain

    def candidate_trades(self):
        trades = []
        for rule in self.rules:
            if rule.left in self.beliefs and rule.link in self.beliefs:
                inferred = deduction(self.beliefs[rule.left], self.beliefs[rule.link])
                utility = rule.value * self.market_value(rule.right, inferred)
                roi = utility / max(1e-9, rule.cost)
                trades.append((roi, utility, rule, inferred))
        trades.sort(key=lambda x: x[0], reverse=True)
        return trades

    def step(self):
        spent = 0.0
        changed = False
        for roi, utility, rule, inferred in self.candidate_trades():
            if spent + rule.cost > self.attention_budget:
                continue
            old = self.beliefs.get(rule.right)
            merged = revision(old, inferred)
            spent += rule.cost
            book = self.attention.setdefault(rule.right, AttentionRecord())
            book.spent += rule.cost
            book.earned += utility
            book.hits += 1
            if old is None or abs(old.f - merged.f) > 1e-9 or abs(old.c - merged.c) > 1e-9:
                self.beliefs[rule.right] = merged
                changed = True
        return changed

    def run(self, steps=8):
        for _ in range(steps):
            if not self.step():
                break

if __name__ == "__main__":
    r = Reasoner(attention_budget=2.0)
    r.add_fact("bird:robin", TruthValue(0.95, 0.90))
    r.add_fact("bird_implies_animal", TruthValue(0.98, 0.85))
    r.add_fact("animal_implies_mortal", TruthValue(0.97, 0.80))
    r.add_fact("bird_implies_can_fly", TruthValue(0.90, 0.70))
    r.add_rule("bird:robin", "bird_implies_animal", "animal:robin", cost=0.7, value=1.4)
    r.add_rule("animal:robin", "animal_implies_mortal", "mortal:robin", cost=0.9, value=1.2)
    r.add_rule("bird:robin", "bird_implies_can_fly", "can_fly:robin", cost=1.6, value=0.8)
    r.run()
    r.add_fact("animal:robin", TruthValue(0.70, 0.60))
    r.add_fact("animal:robin", TruthValue(0.90, 0.70))
    r.run()
    for k in sorted(r.beliefs):
        v = r.beliefs[k]
        print(f"{k} -> f={v.f:.3f} c={v.c:.3f}")
    print("attention")
    for k in sorted(r.attention):
        a = r.attention[k]
        print(f"{k} -> spent={a.spent:.3f} earned={a.earned:.3f} hits={a.hits}")


@dataclass
class Goal:
    name: str
    desirability: TruthValue
    budget: float = 1.0
    achieved: bool = False

@dataclass
class GoalAction:
    src: str
    name: str
    dst: str
    cost: float = 1.0
    reliability: float = 1.0

@dataclass
class AuctionRecord:
    spent: float = 0.0
    reward: float = 0.0
    wins: int = 0

class GoalAuctionPlanner:
    def __init__(self, reserve_price=0.2, reward_scale=0.9):
        self.goals = {}
        self.actions = []
        self.reserve_price = reserve_price
        self.reward_scale = reward_scale
        self.auction_log = {}
        self.last_sale = None

    def add_goal(self, name, desirability, budget=1.0, achieved=False):
        self.goals[name] = Goal(name, TruthValue(desirability.f, desirability.c).clamp(), budget, achieved)

    def add_action(self, src, name, dst, cost=1.0, reliability=1.0):
        self.actions.append(GoalAction(src, name, dst, cost, reliability))
        self.auction_log.setdefault(name, AuctionRecord())

    def bid_for(self, action):
        goal = self.goals[action.dst]
        need = goal.desirability.f * goal.desirability.c
        novelty = 0.25 if goal.achieved else 1.0
        bid = (goal.budget * need * action.reliability * novelty) / max(1e-9, action.cost)
        reward_base = need * action.reliability
        return bid, reward_base

    def candidate_auctions(self, current_goal):
        bids = []
        for action in self.actions:
            if action.src != current_goal:
                continue
            if action.dst not in self.goals:
                continue
            bid, reward_base = self.bid_for(action)
            bids.append((bid, reward_base, action))
        bids.sort(key=lambda x: x[0], reverse=True)
        return bids

    def step(self, current_goal):
        bids = self.candidate_auctions(current_goal)
        if not bids:
            return current_goal, None
        winner_bid, reward_base, winner = bids[0]
        second_bid = bids[1][0] if len(bids) > 1 else self.reserve_price
        price = max(self.reserve_price, min(winner_bid, second_bid))
        goal = self.goals[winner.dst]
        if winner_bid < self.reserve_price or goal.budget + 1e-9 < price:
            return current_goal, None
        goal.budget -= price
        reward = self.reward_scale * reward_base
        goal.achieved = True
        goal.budget = min(3.0, goal.budget + reward)
        book = self.auction_log.setdefault(winner.name, AuctionRecord())
        book.spent += price
        book.reward += reward
        book.wins += 1
        self.last_sale = (winner.name, winner.dst, winner_bid, price, reward)
        return winner.dst, winner

    def run(self, start_goal, max_steps=8):
        current = start_goal
        path = [current]
        sales = []
        for _ in range(max_steps):
            nxt, winner = self.step(current)
            if winner is None or nxt == current:
                break
            sales.append(self.last_sale)
            current = nxt
            path.append(current)
        return path, sales


print("auction_demo")
planner = GoalAuctionPlanner(reserve_price=0.2, reward_scale=0.9)
planner.add_goal("wake", TruthValue(1.00, 1.00), budget=0.30, achieved=True)
planner.add_goal("find_food", TruthValue(0.95, 0.85), budget=1.10)
planner.add_goal("eat_breakfast", TruthValue(0.98, 0.90), budget=1.35)
planner.add_goal("start_work", TruthValue(0.96, 0.92), budget=1.20)
planner.add_action("wake", "search_pantry", "find_food", cost=0.6, reliability=0.90)
planner.add_action("wake", "order_takeout", "eat_breakfast", cost=1.1, reliability=0.98)
planner.add_action("find_food", "cook_breakfast", "eat_breakfast", cost=0.8, reliability=0.95)
planner.add_action("find_food", "grab_snack", "start_work", cost=0.9, reliability=0.70)
path, sales = planner.run("wake", max_steps=4)
print(path)
for sale in sales:
    print(sale)
