Skip to content

Commit

Permalink
Merge pull request #14 from betatim/add-bookmarks
Browse files Browse the repository at this point in the history
[WIP] Add table of contents to PDFs based on h1 tags
  • Loading branch information
betatim committed Jul 2, 2020
2 parents 2c53969 + 3e3503e commit 67f2519
Show file tree
Hide file tree
Showing 3 changed files with 71 additions and 8 deletions.
6 changes: 2 additions & 4 deletions example.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -138,11 +138,9 @@
]
},
{
"cell_type": "code",
"execution_count": null,
"cell_type": "markdown",
"metadata": {},
"outputs": [],
"source": []
"source": ["# A h1 title\n"]
}
],
"metadata": {
Expand Down
Binary file modified example.pdf
Binary file not shown.
73 changes: 69 additions & 4 deletions notebook_as_pdf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,22 @@ async def html_to_pdf(html_file, pdf_file, pyppeteer_args=None):
width = dimensions["width"]
height = dimensions["height"]

await page.evaluate(
"""
function getOffset( el ) {
var _x = 0;
var _y = 0;
while( el && !isNaN( el.offsetLeft ) && !isNaN( el.offsetTop ) ) {
_x += el.offsetLeft - el.scrollLeft;
_y += el.offsetTop - el.scrollTop;
el = el.offsetParent;
}
return { top: _y, left: _x };
}
""",
force_expr=True,
)

await page.addStyleTag(
{
"content": """
Expand All @@ -73,6 +89,17 @@ async def html_to_pdf(html_file, pdf_file, pyppeteer_args=None):
}
)

h1s = await page.evaluate(
"""() => {
var vals = []
for (const elem of document.getElementsByTagName("h1")) {
//console.log(elem, getOffset(elem).top, elem.innerText)
vals.push({ top: getOffset(elem).top, text: elem.innerText })
}
return vals
}"""
)

await page.pdf(
{
"path": pdf_file,
Expand All @@ -87,12 +114,47 @@ async def html_to_pdf(html_file, pdf_file, pyppeteer_args=None):

await browser.close()

return h1s


def finish_pdf(pdf_in, pdf_out, notebook, headings):
"""Add finishing touches to the PDF file.
To make the PDF nicer we:
def attach_notebook(pdf_in, pdf_out, notebook):
* attach the original notebook to the PDF for reference
* add bookmarks pointing to the headers in a notebook
"""
pdf = PyPDF2.PdfFileWriter()
pdf.appendPagesFromReader(PyPDF2.PdfFileReader(pdf_in, "rb"))
pdf.addAttachment(notebook["file_name"], notebook["contents"])

for heading in headings:
page_num = heading["top"] // (200 * 72)

page_height = pdf.getPage(page_num).artBox[-1]

# position on the page as measured from the bottom of the page
# with a bit of leeway so that clicking the bookmark doesn't put
# the heading right at the border
on_page_pos = page_height - (heading["top"] % (200 * 72)) + 20

# there is no nice way of passing the "zoom arguments" at the very
# end of the function call without explicitly listing all the parameters
# of the function. We can't use keyword arguments :(
pdf.addBookmark(
heading["text"],
page_num,
None,
None,
False,
False,
"/XYZ",
0,
on_page_pos,
None,
)

with open(pdf_out, "wb") as fp:
pdf.write(fp)

Expand All @@ -116,7 +178,9 @@ async def notebook_to_pdf(
with tempfile.NamedTemporaryFile(suffix=".html") as f:
f.write(exported_html.encode())
f.flush()
await html_to_pdf(f.name, pdf_path, pyppeteer_args)
heading_positions = await html_to_pdf(f.name, pdf_path, pyppeteer_args)

return heading_positions


class PDFExporter(Exporter):
Expand Down Expand Up @@ -161,7 +225,7 @@ def from_notebook_node(self, notebook, resources=None, **kwargs):
pdf_fname2 = os.path.join(name, "output-with-attachment.pdf")
pyppeteer_args = ["--no-sandbox"] if self.no_sandbox else None

self.pool.submit(
heading_positions = self.pool.submit(
asyncio.run,
notebook_to_pdf(
notebook,
Expand All @@ -174,13 +238,14 @@ def from_notebook_node(self, notebook, resources=None, **kwargs):
).result()
resources["output_extension"] = ".pdf"

attach_notebook(
finish_pdf(
pdf_fname,
pdf_fname2,
{
"file_name": f"{resources['metadata']['name']}.ipynb",
"contents": nbformat.writes(notebook).encode("utf-8"),
},
heading_positions,
)

with open(pdf_fname2, "rb") as f:
Expand Down

0 comments on commit 67f2519

Please sign in to comment.