From 5d76ec85ec9bd9010815b5fad0752f88b506d134 Mon Sep 17 00:00:00 2001 From: "Dana K. Williams" Date: Thu, 2 Jul 2026 21:45:32 +0000 Subject: [PATCH] fix(tree_renderer): truncate overlong node titles in summary headings The no-TOC fallback structure generator sometimes copies an entire source sentence verbatim into a node's title when there's no natural short heading to extract (see VectifyAI/PageIndex#341), producing unreadable multi-line Markdown headings in the rendered summary. Truncate titles over 80 chars with an ellipsis when rendering the heading. The full title is still available from the underlying PageIndex tree; this only affects display in wiki/summaries/*.md. --- openkb/tree_renderer.py | 18 +++++++++++++++++- tests/test_tree_renderer.py | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 1 deletion(-) diff --git a/openkb/tree_renderer.py b/openkb/tree_renderer.py index 2424ba10..878b9e7c 100644 --- a/openkb/tree_renderer.py +++ b/openkb/tree_renderer.py @@ -15,6 +15,22 @@ def _yaml_frontmatter(source_name: str, doc_id: str, description: str = "") -> s return "---\n" + "\n".join(lines) + "\n---\n" +_MAX_TITLE_LEN = 80 + + +def _short_title(title: str) -> str: + """Truncate a title for heading display. + + The no-TOC fallback structure generator sometimes copies an entire + source sentence verbatim into ``title`` when there's no natural short + heading to extract; left unshortened that produces unreadable + multi-line Markdown headings (see PageIndex#341). + """ + if len(title) <= _MAX_TITLE_LEN: + return title + return title[:_MAX_TITLE_LEN].rstrip() + "…" + + def _render_nodes_summary(nodes: list[dict], depth: int) -> str: """Recursively render nodes for the *summary* view (summaries only).""" lines: list[str] = [] @@ -26,7 +42,7 @@ def _render_nodes_summary(nodes: list[dict], depth: int) -> str: summary = node.get("summary", "") children = node.get("nodes", []) - lines.append(f"{heading_prefix} {title} (pages {start}–{end})\n") + lines.append(f"{heading_prefix} {_short_title(title)} (pages {start}–{end})\n") if summary: lines.append(f"Summary: {summary}\n") if children: diff --git a/tests/test_tree_renderer.py b/tests/test_tree_renderer.py index 3786cfe4..937ef222 100644 --- a/tests/test_tree_renderer.py +++ b/tests/test_tree_renderer.py @@ -52,6 +52,38 @@ def test_summary_md_has_type_and_description(): assert 'full_text: "sources/my-doc.json"' in md +def test_overlong_title_is_truncated_in_heading(): + long_title = ( + "This is an entire source sentence copied verbatim into the title " + "field because the no-TOC fallback found no natural short heading " + "to extract from this section of the document." + ) + tree = { + "structure": [ + { + "title": long_title, + "start_index": 1, + "end_index": 2, + "summary": "x", + "nodes": [], + } + ] + } + md = render_summary_md(tree, "my-doc", "doc-123") + assert long_title not in md + assert f"# {long_title[:80]}…" in md + + +def test_short_title_is_not_truncated(): + tree = { + "structure": [ + {"title": "Background", "start_index": 1, "end_index": 2, "summary": "x", "nodes": []} + ] + } + md = render_summary_md(tree, "my-doc", "doc-123") + assert "# Background (pages 1–2)" in md + + def test_summary_full_text_quoted_yaml_safe(): import yaml