#!/usr/bin/env python3
"""
Synapse Layer Cookbook — 04: LangChain Agent with Persistent Memory
https://synapselayer.org/docs

Wraps Synapse Layer as a LangChain-compatible memory backend.
Memories persist across sessions and are shared across agents.

Prerequisites:
    pip install synapse-layer langchain langchain-openai
    export SYNAPSE_API_KEY=sk_live_...
    export OPENAI_API_KEY=sk-...
"""

from synapse_layer import SynapseClient
from langchain.memory import BaseMemory
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationChain
from pydantic import Field
import os
from typing import Any


class SynapseMemory(BaseMemory):
    """LangChain-compatible memory backed by Synapse Layer.

    Benefits over ConversationBufferMemory:
    - Persists across sessions (server-side, encrypted)
    - Semantic recall (not just last N messages)
    - Trust Quotient filtering (noise reduction)
    - Cross-agent: memories stored by one agent are
      available to all agents in the same tenant
    """

    client: Any = Field(default=None, exclude=True)
    agent_name: str = "langchain-agent"
    memory_key: str = "history"
    min_tq: float = 0.5

    class Config:
        arbitrary_types_allowed = True

    def __init__(self, api_key: str, agent_name: str = "langchain-agent", **kwargs: Any):
        super().__init__(**kwargs)
        self.client = SynapseClient(api_key=api_key)
        self.agent_name = agent_name

    @property
    def memory_variables(self) -> list[str]:
        return [self.memory_key]

    def load_memory_variables(self, inputs: dict[str, Any]) -> dict[str, str]:
        """Recall relevant memories before the LLM responds."""
        query = inputs.get("input", "")
        if not query:
            return {self.memory_key: ""}

        memories = self.client.recall(
            query=query,
            limit=5,
            min_tq=self.min_tq,
        )

        if not memories:
            return {self.memory_key: "No prior context found."}

        # Format as context block
        lines = []
        for m in memories:
            lines.append(f"[TQ:{m.trust_quotient:.2f}] {m.content}")

        context = "Prior context (from persistent memory):\n" + "\n".join(lines)
        return {self.memory_key: context}

    def save_context(self, inputs: dict[str, Any], outputs: dict[str, str]) -> None:
        """Store the assistant's response as a memory."""
        output_text = outputs.get("response", "")
        if not output_text or len(output_text) < 10:
            return  # Skip trivial responses

        self.client.store(
            content=f"User: {inputs.get('input', '')} | Assistant: {output_text}",
            agent=self.agent_name,
            memory_type="conversation",
            tags=["langchain"],
        )

    def clear(self) -> None:
        """Not implemented — memories are persistent by design."""
        pass


def main():
    # Initialize Synapse-backed memory
    memory = SynapseMemory(
        api_key=os.environ["SYNAPSE_API_KEY"],
        agent_name="langchain-support-agent",
    )

    # Create LangChain conversation chain
    llm = ChatOpenAI(model="gpt-4", temperature=0.7)
    chain = ConversationChain(llm=llm, memory=memory, verbose=True)

    # Conversation turn 1
    response1 = chain.predict(input="I'm building a RAG pipeline with LlamaIndex.")
    print(f"Assistant: {response1}\n")

    # Conversation turn 2 (could be a new session — memory persists)
    response2 = chain.predict(input="What tools did I mention I'm using?")
    print(f"Assistant: {response2}\n")

    # Benefit: the LLM receives prior context from Synapse recall,
    # reducing token usage vs sending full conversation history.
    # Typical reduction: 40-60% fewer input tokens for long conversations.


if __name__ == "__main__":
    main()


"""
Expected output:
    > Entering new ConversationChain chain...
    Prompt after formatting:
    ...
    Prior context (from persistent memory):
    [TQ:0.89] User: I'm building a RAG pipeline with LlamaIndex. | Assistant: ...
    ...
    Assistant: You mentioned you're building a RAG pipeline with LlamaIndex...

    # Token reduction: ~40-60% fewer input tokens vs full conversation history
    # because Synapse returns only semantically relevant memories.
"""