import PyPDF2
from neo4j import GraphDatabase
import re

class OpenClawIngestionPipeline:
    def __init__(self, neo4j_uri, neo4j_user, neo4j_password):
        # Initialize the connection to your local Neo4j instance
        self.driver = GraphDatabase.driver(neo4j_uri, auth=(neo4j_user, neo4j_password))

    def close(self):
        self.driver.close()

    def parse_pdf(self, file_path):
        """Extracts text from a PDF, bypassing raw byte streams."""
        print(f"Parsing {file_path}...")
        try:
            with open(file_path, 'rb') as file:
                reader = PyPDF2.PdfReader(file)
                text = ""
                for page in reader.pages:
                    text += page.extract_text() + "\n"
                return self.clean_text(text)
        except Exception as e:
            return f"Error parsing PDF: {e}"

    def clean_text(self, text):
        """Cleans up the extracted text."""
        text = re.sub(r'\s+', ' ', text)
        return text.strip()

    def inject_to_graph(self, person_name, document_name, content):
        """Creates Trinity Graph nodes (Social & Knowledge) in Neo4j."""
        query = """
        MERGE (p:Person {name: $person_name})
        MERGE (d:Document {name: $document_name})
        MERGE (p)-[:OWNS_DOCUMENT]->(d)
        SET d.content = $content, d.type = 'Resume', d.ingested_at = timestamp()
        RETURN p, d
        """
        with self.driver.session() as session:
            session.run(query, person_name=person_name, document_name=document_name, content=content)
            print(f"[{document_name}] successfully injected into the Knowledge Graph for {person_name}.")

if __name__ == "__main__":
    # Setup for Bennett's OpenClaw environment
    pipeline = OpenClawIngestionPipeline("bolt://localhost:7687", "neo4j", "openclaw_admin")
    
    # Process the corrupted PDF stream you just uploaded
    extracted_text = pipeline.parse_pdf("Bennett_Vernon_Resume.pdf")
    
    if "Error" not in extracted_text:
        pipeline.inject_to_graph("Bennett Vernon", "Bennett_Vernon_Resume.pdf", extracted_text)
    
    pipeline.close()