VIBECODE-THEORY/tools/integrator/integrate.py

#!/usr/bin/env python3
"""Integrate research markdown files into a unified digest for Paper 009 planning."""

from __future__ import annotations

import argparse
import json
import re
from dataclasses import dataclass
from pathlib import Path
from typing import Any


HEADING_RE = re.compile(r"^(#{1,6})\s+(.+?)\s*$", re.MULTILINE)
BULLET_RE = re.compile(r"^\s*[-*]\s+(.+?)\s*$")
NUMBERED_RE = re.compile(r"^\s*(\d+)\.\s+(.+?)\s*$")
SENTENCE_SPLIT_RE = re.compile(r"(?<=[.!?])\s+|\n+")
NAME_RE = re.compile(
    r"\b(?:[A-Z](?:\.[A-Z])+\.?|[A-Z][a-zA-Z'-]+)"
    r"(?:\s+(?:[A-Z](?:\.[A-Z])+\.?|[A-Z][a-zA-Z'-]+)){1,3}\b"
)
BAD_NAME_WORDS = {
    "executive",
    "summary",
    "task",
    "sources",
    "paper",
    "physics",
    "technology",
    "society",
    "logs",
    "pricing",
    "history",
    "quantum",
    "analysis",
}


TOPIC_RULES = [
    {
        "id": "determinism_vs_agency",
        "label": "Technological determinism vs social agency",
        "pro_markers": [
            "autonomous technique",
            "irreversible",
            "lock-in",
            "path dependence",
            "ratchet",
            "structurally fixed",
            "cannot reverse",
        ],
        "con_markers": [
            "social construct",
            "interpretive flexibility",
            "democratic rationalization",
            "human agency",
            "selective adoption",
            "tool taming",
            "re-shaped",
            "can change",
        ],
    },
    {
        "id": "unification_vs_homogenization",
        "label": "Knowledge unification vs statistical homogenization",
        "pro_markers": [
            "knowledge unification",
            "integration layer",
            "interconnectedness",
            "consilience",
            "compiled",
            "coherent",
        ],
        "con_markers": [
            "stochastic parrot",
            "homogenization",
            "illusion",
            "veneer",
            "lossy",
            "lacks understanding",
            "database lookup",
        ],
    },
    {
        "id": "cognition_commodity_vs_mimicry",
        "label": "AI cognition commodity vs token mimicry",
        "pro_markers": [
            "cognition as a commodity",
            "price of thinking",
            "task-based framework",
            "automation",
            "productivity",
            "cognitive offloading",
        ],
        "con_markers": [
            "stochastic parrot",
            "doesn't think",
            "mimicry",
            "predicts tokens",
            "no cognitive model",
        ],
    },
    {
        "id": "retrocausal_attractor",
        "label": "Teleological attractor vs unfalsifiable retrocausality",
        "pro_markers": [
            "teleological attractor",
            "retrocausal",
            "omega point",
            "final cause",
            "participatory universe",
            "transactional interpretation",
        ],
        "con_markers": [
            "unfalsifiability",
            "pseudoscience",
            "woo",
            "causality violation",
            "superdeterminism",
        ],
    },
    {
        "id": "efficiency_vs_jevons",
        "label": "Efficiency frees time vs Jevons expansion",
        "pro_markers": [
            "efficiency gains",
            "free up human time",
            "productivity",
            "surplus",
            "cost disease",
        ],
        "con_markers": [
            "jevons paradox",
            "increased consumption",
            "reasoning inflation",
            "more complex systems",
            "dependency",
        ],
    },
]


CHALLENGE_KEYWORDS = {
    "unfalsifiable": 5,
    "dogma": 4,
    "pseudoscience": 5,
    "illusion": 4,
    "mimicry": 4,
    "lacks understanding": 4,
    "circular": 3,
    "causality violation": 4,
    "superdeterminism": 3,
    "lossy": 2,
    "stochastic parrot": 5,
}


QUESTION_KEYWORDS = {
    1: [
        "falsifiable",
        "falsifiability",
        "unification",
        "replacement",
        "fragment",
        "distort",
        "evidence",
        "test",
        "stochastic",
    ],
    2: [
        "identity",
        "human",
        "consciousness",
        "agency",
        "values",
        "pragmatic",
        "continuity",
        "survival",
    ],
    3: [
        "individual",
        "workers",
        "labor",
        "skills",
        "strategy",
        "governance",
        "practical",
        "action",
    ],
    4: [
        "cheating",
        "tools",
        "dependency",
        "ratchet",
        "adoption",
        "ethics",
        "norm",
    ],
    5: [
        "timeline",
        "threshold",
        "when",
        "prediction",
        "curve",
        "years",
        "exponential",
        "phase",
    ],
}


@dataclass
class Doc:
    path: Path
    slug: str
    title: str
    text: str
    sections: dict[str, str]


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description=__doc__)
    default_root = Path(__file__).resolve().parents[2]
    parser.add_argument("--project-root", type=Path, default=default_root)
    parser.add_argument("--research-dir", type=Path)
    parser.add_argument("--paper-008", type=Path)
    parser.add_argument("--out-dir", type=Path, default=Path(__file__).resolve().parent)
    return parser.parse_args()


def clean_inline_md(text: str) -> str:
    text = text.strip()
    text = re.sub(r"`([^`]+)`", r"\1", text)
    text = re.sub(r"\*\*([^*]+)\*\*", r"\1", text)
    text = re.sub(r"\*([^*]+)\*", r"\1", text)
    text = re.sub(r"\[(.*?)\]\((.*?)\)", r"\1", text)
    return re.sub(r"\s+", " ", text).strip()


def extract_sections(text: str) -> dict[str, str]:
    matches = list(HEADING_RE.finditer(text))
    if not matches:
        return {}

    sections: dict[str, str] = {}
    for idx, match in enumerate(matches):
        heading = clean_inline_md(match.group(2)).lower()
        start = match.end()
        end = matches[idx + 1].start() if idx + 1 < len(matches) else len(text)
        sections[heading] = text[start:end].strip()
    return sections


def load_research_docs(research_dir: Path) -> list[Doc]:
    docs: list[Doc] = []
    for path in sorted(research_dir.glob("*.md")):
        raw = path.read_text(encoding="utf-8")
        title = path.stem
        for line in raw.splitlines():
            if line.startswith("# "):
                title = clean_inline_md(line[2:])
                break
        docs.append(
            Doc(
                path=path,
                slug=path.stem,
                title=title,
                text=raw,
                sections=extract_sections(raw),
            )
        )
    return docs


def normalize_person_name(name: str) -> str:
    stripped = re.sub(r"\bet al\.?", "", name, flags=re.IGNORECASE)
    stripped = stripped.replace("&", " and ")
    stripped = re.sub(r"\([^)]*\)", "", stripped)
    stripped = re.sub(r"[^A-Za-z .'-]", " ", stripped)
    stripped = re.sub(r"\s+", " ", stripped).strip()
    return stripped.lower()


def split_possible_names(chunk: str) -> list[str]:
    chunk = clean_inline_md(chunk)
    chunk = chunk.split(":", 1)[0]
    chunk = re.sub(r"\([^)]*\)", "", chunk)
    chunk = re.sub(r'"[^"]+"', "", chunk)
    chunk = chunk.replace("&", " and ")
    names = NAME_RE.findall(chunk)
    out: list[str] = []
    for name in names:
        name = re.sub(r"\s+", " ", name).strip(" .,:;")
        words = [w for w in name.split() if w and w[0].isalpha()]
        if len(words) >= 2 and not any(w.lower() in BAD_NAME_WORDS for w in words):
            out.append(" ".join(words))
    if not out:
        single = re.sub(r"[^A-Za-z'-]", "", chunk).strip()
        if (
            single
            and single[0].isupper()
            and single.lower() not in BAD_NAME_WORDS
            and len(single) > 3
        ):
            out.append(single)
    return out


def extract_scholars(docs: list[Doc]) -> dict[str, dict[str, Any]]:
    scholars: dict[str, dict[str, Any]] = {}

    for doc in docs:
        key_sections = [text for name, text in doc.sections.items() if "key scholars" in name]

        candidates: list[str] = []
        for section in key_sections:
            for line in section.splitlines():
                bullet_match = BULLET_RE.match(line)
                if not bullet_match:
                    continue
                raw = bullet_match.group(1)
                bullet = clean_inline_md(raw)
                bold_match = re.search(r"\*\*([^*]+)\*\*", raw)
                if bold_match:
                    candidates.extend(split_possible_names(bold_match.group(1)))
                else:
                    lead = bullet.split(":", 1)[0]
                    candidates.extend(split_possible_names(lead))

        unique = sorted(set(candidates))
        text_lower = doc.text.lower()
        for name in unique:
            key = normalize_person_name(name)
            if not key:
                continue
            entry = scholars.setdefault(
                key,
                {
                    "name": name,
                    "aliases": set(),
                    "files": set(),
                    "mention_count": 0,
                    "contexts": [],
                },
            )
            entry["aliases"].add(name)
            entry["files"].add(doc.slug)

            surname = name.split()[-1].lower().strip(".,")
            local_mentions = []
            for sentence in SENTENCE_SPLIT_RE.split(doc.text):
                sentence_clean = clean_inline_md(sentence)
                if surname and surname in sentence_clean.lower():
                    local_mentions.append(sentence_clean)

            if not local_mentions:
                if surname and surname in text_lower:
                    local_mentions = [f"Mentioned in {doc.slug}"]
                else:
                    local_mentions = [f"Listed in {doc.slug}"]

            entry["mention_count"] += len(local_mentions)
            for snippet in local_mentions[:3]:
                entry["contexts"].append({"file": doc.slug, "snippet": snippet})

    for entry in scholars.values():
        entry["aliases"] = sorted(entry["aliases"])
        entry["files"] = sorted(entry["files"])

    return scholars


def extract_title_from_source_line(line: str) -> str:
    line_clean = clean_inline_md(line)
    quoted = re.findall(r'"([^"]+)"', line)
    if quoted:
        return clean_inline_md(quoted[0])
    italic = re.findall(r"\*([^*]+)\*", line)
    if italic:
        return clean_inline_md(italic[0])

    year_match = re.search(r"\(\d{4}\)\.?", line_clean)
    if year_match:
        tail = line_clean[year_match.end() :].strip(" .:-")
        if tail:
            return tail.split(".", 1)[0].strip()

    return line_clean


def extract_authors_from_source_line(line: str) -> list[str]:
    line_clean = clean_inline_md(line)
    year_match = re.search(r"\(\d{4}\)", line_clean)
    head = line_clean[: year_match.start()].strip() if year_match else line_clean
    head = head.replace("&", " and ")
    head = re.sub(r"\bet al\.?", "", head, flags=re.IGNORECASE)
    parts = [p.strip(" ,.-") for p in re.split(r"\band\b|;", head) if p.strip(" ,.-")]

    names: list[str] = []
    for part in parts:
        if re.search(r"[A-Za-z]", part):
            names.append(part)
    return names


def normalize_title(title: str) -> str:
    title = title.lower()
    title = re.sub(r"[^a-z0-9 ]", " ", title)
    return re.sub(r"\s+", " ", title).strip()


def extract_bibliography(docs: list[Doc]) -> dict[str, dict[str, Any]]:
    bibliography: dict[str, dict[str, Any]] = {}
    for doc in docs:
        sources = [text for name, text in doc.sections.items() if name.startswith("sources")]
        for src in sources:
            for line in src.splitlines():
                bullet = BULLET_RE.match(line)
                if not bullet:
                    continue
                raw = bullet.group(1)
                title = extract_title_from_source_line(raw)
                if not title:
                    continue
                key = normalize_title(title)
                if not key:
                    continue
                entry = bibliography.setdefault(
                    key,
                    {
                        "title": title,
                        "authors": set(),
                        "files": set(),
                        "raw_mentions": [],
                    },
                )
                entry["files"].add(doc.slug)
                entry["raw_mentions"].append(clean_inline_md(raw))
                for author in extract_authors_from_source_line(raw):
                    entry["authors"].add(author)

    for entry in bibliography.values():
        entry["authors"] = sorted(entry["authors"])
        entry["files"] = sorted(entry["files"])
        entry["relevance"] = len(entry["files"]) * 2 + len(entry["raw_mentions"])

    return bibliography


def first_sentence_with_marker(text: str, marker: str) -> str | None:
    for sentence in SENTENCE_SPLIT_RE.split(text):
        if marker in sentence.lower():
            return clean_inline_md(sentence)
    return None


def detect_contradictions(docs: list[Doc]) -> list[dict[str, Any]]:
    contradictions: list[dict[str, Any]] = []

    for rule in TOPIC_RULES:
        pro_evidence: list[dict[str, str]] = []
        con_evidence: list[dict[str, str]] = []

        for doc in docs:
            text_lower = doc.text.lower()
            for marker in rule["pro_markers"]:
                if marker in text_lower:
                    snippet = first_sentence_with_marker(doc.text, marker)
                    if snippet:
                        pro_evidence.append(
                            {"file": doc.slug, "marker": marker, "snippet": snippet}
                        )
                    break
            for marker in rule["con_markers"]:
                if marker in text_lower:
                    snippet = first_sentence_with_marker(doc.text, marker)
                    if snippet:
                        con_evidence.append(
                            {"file": doc.slug, "marker": marker, "snippet": snippet}
                        )
                    break

        pro_files = {item["file"] for item in pro_evidence}
        con_files = {item["file"] for item in con_evidence}
        if pro_files - con_files and con_files - pro_files:
            contradictions.append(
                {
                    "topic": rule["label"],
                    "topic_id": rule["id"],
                    "supports": pro_evidence[:4],
                    "challenges": con_evidence[:4],
                    "supporting_files": sorted(pro_files),
                    "challenging_files": sorted(con_files),
                }
            )

    return contradictions


def extract_open_questions(paper_008: Path) -> list[dict[str, Any]]:
    text = paper_008.read_text(encoding="utf-8")
    marker = "## Open Questions for Paper 009"
    if marker not in text:
        raise RuntimeError("Could not find 'Open Questions for Paper 009' in paper 008")

    section = text.split(marker, 1)[1]
    next_header = re.search(r"\n##\s+", section)
    if next_header:
        section = section[: next_header.start()]

    questions: list[dict[str, Any]] = []
    for line in section.splitlines():
        match = NUMBERED_RE.match(line)
        if not match:
            continue
        idx = int(match.group(1))
        body = clean_inline_md(match.group(2))
        body = re.sub(r"^\*\*", "", body)
        body = re.sub(r"\*\*", "", body)
        questions.append({"id": idx, "text": body})

    if not questions:
        raise RuntimeError("No numbered open questions found in paper 008")
    return questions


def map_to_open_questions(
    docs: list[Doc], open_questions: list[dict[str, Any]]
) -> list[dict[str, Any]]:
    coverage: list[dict[str, Any]] = []

    for question in open_questions:
        qid = question["id"]
        keywords = QUESTION_KEYWORDS.get(qid, [])
        file_scores: list[dict[str, Any]] = []
        total = 0

        for doc in docs:
            score = 0
            snippets: list[str] = []
            for sentence in SENTENCE_SPLIT_RE.split(doc.text):
                sentence_clean = clean_inline_md(sentence)
                hits = sum(1 for kw in keywords if kw in sentence_clean.lower())
                if hits:
                    score += hits
                    if len(snippets) < 3:
                        snippets.append(sentence_clean)
            if score:
                total += score
                if score >= 12:
                    level = "high"
                elif score >= 6:
                    level = "medium"
                else:
                    level = "low"
                file_scores.append(
                    {
                        "file": doc.slug,
                        "score": score,
                        "level": level,
                        "snippets": snippets,
                    }
                )

        file_scores.sort(key=lambda x: x["score"], reverse=True)
        coverage.append(
            {
                "question_id": qid,
                "question": question["text"],
                "total_score": total,
                "supporting_files": file_scores,
            }
        )

    coverage.sort(key=lambda x: x["question_id"])
    return coverage


def extract_strongest_challenges(docs: list[Doc]) -> list[dict[str, Any]]:
    challenges: list[dict[str, Any]] = []

    for doc in docs:
        counter_sections = [
            text
            for name, text in doc.sections.items()
            if "counterarguments" in name or "critiques" in name
        ]
        if not counter_sections:
            continue
        for section in counter_sections:
            for line in section.splitlines():
                bullet = BULLET_RE.match(line)
                if not bullet:
                    continue
                text = clean_inline_md(bullet.group(1))
                lower = text.lower()
                score = 1
                for keyword, weight in CHALLENGE_KEYWORDS.items():
                    if keyword in lower:
                        score += weight
                challenges.append({"file": doc.slug, "text": text, "score": score})

    merged: dict[str, dict[str, Any]] = {}
    for challenge in challenges:
        key = challenge["text"].lower()
        if key not in merged:
            merged[key] = {
                "text": challenge["text"],
                "score": challenge["score"],
                "files": {challenge["file"]},
            }
        else:
            merged[key]["score"] += challenge["score"]
            merged[key]["files"].add(challenge["file"])

    ranked = sorted(
        (
            {
                "text": item["text"],
                "score": item["score"],
                "files": sorted(item["files"]),
            }
            for item in merged.values()
        ),
        key=lambda x: x["score"],
        reverse=True,
    )

    return ranked[:10]


def detect_emergent_themes(docs: list[Doc]) -> list[dict[str, Any]]:
    themes = {
        "Governance and agency design": [
            "agency",
            "democratic",
            "community",
            "policy",
            "selective adoption",
            "governance",
        ],
        "Economic concentration and labor shift": [
            "labor",
            "capital",
            "commodity",
            "automation",
            "class",
            "pricing",
        ],
        "Epistemic reliability and grounding": [
            "understand",
            "stochastic",
            "illusion",
            "lossy",
            "falsifiable",
            "evidence",
        ],
        "Civilizational lock-in and resilience": [
            "lock-in",
            "path dependence",
            "retreat",
            "dependency",
            "ratchet",
            "reversal",
        ],
    }

    scored: list[dict[str, Any]] = []
    corpus = "\n".join(doc.text.lower() for doc in docs)
    for theme, keywords in themes.items():
        score = sum(corpus.count(k) for k in keywords)
        if score > 0:
            scored.append({"theme": theme, "score": score})
    scored.sort(key=lambda x: x["score"], reverse=True)
    return scored


def build_structured_result(
    docs: list[Doc],
    scholars: dict[str, dict[str, Any]],
    bibliography: dict[str, dict[str, Any]],
    contradictions: list[dict[str, Any]],
    open_question_coverage: list[dict[str, Any]],
    strongest_challenges: list[dict[str, Any]],
    emergent_themes: list[dict[str, Any]],
) -> dict[str, Any]:
    scholars_ranked = sorted(
        scholars.values(),
        key=lambda s: (len(s["files"]), s["mention_count"]),
        reverse=True,
    )

    bibliography_ranked = sorted(
        bibliography.values(), key=lambda b: b["relevance"], reverse=True
    )

    return {
        "meta": {
            "research_files": [doc.slug for doc in docs],
            "research_file_count": len(docs),
        },
        "scholars": scholars_ranked,
        "bibliography": bibliography_ranked,
        "contradictions": contradictions,
        "open_question_coverage": open_question_coverage,
        "strongest_challenges": strongest_challenges,
        "emergent_themes": emergent_themes,
    }


def render_digest(result: dict[str, Any]) -> str:
    lines: list[str] = []
    lines.append("# Integrated Research Digest")
    lines.append("")
    lines.append("## Scope")
    lines.append(
        f"Processed {result['meta']['research_file_count']} research file(s): "
        + ", ".join(result["meta"]["research_files"])
    )
    lines.append("")

    lines.append("## Scholars by Frequency")
    for scholar in result["scholars"][:20]:
        files = ", ".join(scholar["files"])
        lines.append(
            f"- **{scholar['name']}** — files: {len(scholar['files'])}; mentions: {scholar['mention_count']}; in: {files}"
        )
    lines.append("")

    lines.append("## Unified Bibliography")
    for item in result["bibliography"][:40]:
        authors = ", ".join(item["authors"]) if item["authors"] else "Unknown"
        files = ", ".join(item["files"])
        lines.append(
            f"- **{item['title']}** ({authors}) — relevance {item['relevance']}; cited in: {files}"
        )
    lines.append("")

    lines.append("## Contradiction Report")
    if not result["contradictions"]:
        lines.append("- No cross-file contradictions detected by the current heuristic.")
    for item in result["contradictions"]:
        lines.append(f"### {item['topic']}")
        lines.append("- Supporting evidence:")
        for support in item["supports"]:
            lines.append(
                f"  - `{support['file']}` ({support['marker']}): {support['snippet']}"
            )
        lines.append("- Challenging evidence:")
        for challenge in item["challenges"]:
            lines.append(
                f"  - `{challenge['file']}` ({challenge['marker']}): {challenge['snippet']}"
            )
    lines.append("")

    lines.append("## Paper 009 Coverage Map")
    for item in result["open_question_coverage"]:
        if item["supporting_files"]:
            max_level = max(fs["level"] for fs in item["supporting_files"])
        else:
            max_level = "none"
        lines.append(
            f"### Q{item['question_id']} (total score {item['total_score']}, strongest level {max_level})"
        )
        lines.append(f"{item['question']}")
        if not item["supporting_files"]:
            lines.append("- No supporting material detected.")
            continue
        for fs in item["supporting_files"][:4]:
            lines.append(f"- `{fs['file']}`: score {fs['score']} ({fs['level']})")
            for snip in fs["snippets"][:2]:
                lines.append(f"  - {snip}")
    lines.append("")

    lines.append("## Strongest Challenges")
    if not result["strongest_challenges"]:
        lines.append("- No challenge bullets detected.")
    for item in result["strongest_challenges"]:
        lines.append(
            f"- **Score {item['score']}** ({', '.join(item['files'])}): {item['text']}"
        )

    return "\n".join(lines) + "\n"


def render_outline(result: dict[str, Any]) -> str:
    coverage_sorted = sorted(
        result["open_question_coverage"], key=lambda x: x["total_score"], reverse=True
    )
    most_covered = coverage_sorted[:2]
    least_covered = coverage_sorted[-2:] if len(coverage_sorted) >= 2 else coverage_sorted

    lines: list[str] = []
    lines.append("# Suggested Outline for Paper 009")
    lines.append("")
    lines.append("## Why This Sequence")
    lines.append(
        "Order starts with heavily-supported questions, then closes with low-coverage questions that require new argumentation or new research."
    )
    lines.append("")

    lines.append("## Coverage Priorities")
    lines.append("- Most supported open questions:")
    for item in most_covered:
        lines.append(
            f"  - Q{item['question_id']} (score {item['total_score']}): {item['question']}"
        )
    lines.append("- Least supported open questions:")
    for item in least_covered:
        lines.append(
            f"  - Q{item['question_id']} (score {item['total_score']}): {item['question']}"
        )
    lines.append("")

    lines.append("## Proposed Sections")
    for item in coverage_sorted:
        qid = item["question_id"]
        lines.append(f"### Section {qid}: Q{qid}")
        lines.append(item["question"])
        if item["supporting_files"]:
            top_files = ", ".join(fs["file"] for fs in item["supporting_files"][:3])
            lines.append(f"- Primary evidence files: {top_files}")
            top_snips = [
                snip
                for fs in item["supporting_files"][:2]
                for snip in fs["snippets"][:1]
            ]
            for snip in top_snips:
                lines.append(f"- Anchor claim: {snip}")
        else:
            lines.append("- Primary evidence files: none detected; requires fresh synthesis.")
    lines.append("")

    lines.append("## Cross-Cutting Counterarguments To Address Explicitly")
    for challenge in result["strongest_challenges"][:5]:
        lines.append(f"- {challenge['text']} ({', '.join(challenge['files'])})")
    lines.append("")

    lines.append("## New Themes To Add Beyond Original Open Questions")
    for theme in result["emergent_themes"][:4]:
        lines.append(f"- {theme['theme']} (signal score {theme['score']})")

    return "\n".join(lines) + "\n"


def main() -> int:
    args = parse_args()
    project_root = args.project_root.resolve()
    research_dir = (args.research_dir or (project_root / "research")).resolve()
    paper_008 = (args.paper_008 or (project_root / "008-the-ship-of-theseus.md")).resolve()
    out_dir = args.out_dir.resolve()
    out_dir.mkdir(parents=True, exist_ok=True)

    print(f"[integrator] project root: {project_root}")
    print(f"[integrator] research dir: {research_dir}")
    print(f"[integrator] paper 008: {paper_008}")
    print(f"[integrator] output dir: {out_dir}")

    docs = load_research_docs(research_dir)
    print(f"[integrator] loaded {len(docs)} research file(s)")
    if not docs:
        print("[integrator] no research files found; writing empty digest/outline")

    open_questions = extract_open_questions(paper_008)
    print(f"[integrator] extracted {len(open_questions)} open question(s) from Paper 008")

    scholars = extract_scholars(docs)
    print(f"[integrator] extracted {len(scholars)} unique scholar name(s)")

    bibliography = extract_bibliography(docs)
    print(f"[integrator] extracted {len(bibliography)} bibliography item(s)")

    contradictions = detect_contradictions(docs)
    print(f"[integrator] detected {len(contradictions)} contradiction topic(s)")

    coverage = map_to_open_questions(docs, open_questions)
    print("[integrator] mapped research evidence to Paper 008 open questions")

    strongest_challenges = extract_strongest_challenges(docs)
    print(f"[integrator] ranked {len(strongest_challenges)} strongest challenge(s)")

    emergent_themes = detect_emergent_themes(docs)
    print(f"[integrator] found {len(emergent_themes)} emergent theme(s)")

    result = build_structured_result(
        docs,
        scholars,
        bibliography,
        contradictions,
        coverage,
        strongest_challenges,
        emergent_themes,
    )

    json_path = out_dir / "integrated.json"
    digest_path = out_dir / "digest.md"
    outline_path = out_dir / "009_outline_suggestion.md"

    json_path.write_text(json.dumps(result, indent=2), encoding="utf-8")
    digest_path.write_text(render_digest(result), encoding="utf-8")
    outline_path.write_text(render_outline(result), encoding="utf-8")

    print(f"[integrator] wrote {json_path}")
    print(f"[integrator] wrote {digest_path}")
    print(f"[integrator] wrote {outline_path}")

    return 0


if __name__ == "__main__":
    raise SystemExit(main())