Source code for nb4llm.converter

# converter.py

import re
from pathlib import Path

import nbformat


# How many back ticks do I need?!
[docs] def get_fence(cell_source: str, min_length: int = 3) -> str: """ Get the fence for a cell source. Ensures the fence is longer than any sequence of backticks in the cell. Parameters ---------- cell_source : str The source code or markdown of a cell. min_length : int, optional The minimum length of the fence (default: 3). Returns ------- str The fence string (e.g., '```' or '````'). Examples -------- >>> get_fence('Here is some code: ```python\nprint(1)\n```') '````' """ matches = re.findall(r"`+", cell_source) max_len = max([len(m) for m in matches], default=0) fence_len = max(min_length, max_len + 1) return "`" * fence_len
# convert .ipynb to .txt
[docs] def convert_ipynb_to_txt(ipynb_path: str, txt_path: str) -> None: """ Convert a Jupyter notebook (.ipynb) to a readable text file (.txt). This function reads a Jupyter notebook file and writes its content into a text format. Markdown and code cells are converted to fenced blocks, preserving code cell language. Parameters ---------- ipynb_path : str Path to the Jupyter notebook file. txt_path : str Path to the output text file. Returns ------- None Examples -------- >>> convert_ipynb_to_txt('notebook.ipynb', 'notebook.txt') # notebook.txt will contain: # # notebook.ipynb # ```markdown # Some markdown # ``` # ```python # print('hello') # ``` CLI Example ----------- $ nb4llm notebook.ipynb # Output: notebook.txt """ nb = nbformat.read(ipynb_path, as_version=4) out_lines = [] out_lines.append(f"# {Path(ipynb_path).name}\n") # Extract kernel language from notebook metadata kernel_language = "python" # default fallback if hasattr(nb, "metadata") and nb.metadata: kernelspec = nb.metadata.get("kernelspec", {}) if kernelspec and "language" in kernelspec: kernel_language = kernelspec["language"] for cell in nb.cells: fence = get_fence(cell.source) if cell.cell_type == "markdown": out_lines.append(f"{fence}markdown") out_lines.append(cell.source) out_lines.append(f"{fence}\n") elif cell.cell_type == "code": out_lines.append(f"{fence}{kernel_language}") out_lines.append(cell.source) out_lines.append(f"{fence}\n") with open(txt_path, "w") as f: f.write("\n".join(out_lines))
# Usage # ipynb_path = "note_book_name.ipynb" # txt_path = "note_book_name.txt" # with open(txt_path, "w") as f: # f.write(notebook_to_txt(ipynb_path)) # convert .txt to .ipynb
[docs] def convert_txt_to_ipynb(txt_path: str, ipynb_path: str) -> None: """ Convert a text file (.txt) in nb4llm format back to a Jupyter notebook (.ipynb). Parameters ---------- txt_path : str Path to the input text file. ipynb_path : str Path to the output Jupyter notebook file. Returns ------- None Examples -------- >>> convert_txt_to_ipynb('notebook.txt', 'notebook.ipynb') # notebook.ipynb will be created from the text blocks in notebook.txt CLI Example ----------- $ nb4llm --reverse notebook.txt # Output: notebook.ipynb """ import re with open(txt_path, "r") as f: content = f.read() # Split content into lines lines = content.split("\n") # Skip notebook name header if present if lines and lines[0].startswith("# "): lines = lines[1:] # Create notebook structure nb = nbformat.v4.new_notebook() # Default metadata (will be updated if we detect a different language) detected_language = "python" kernel_name = "python3" display_name = "Python 3" nb.metadata = { "kernelspec": { "display_name": display_name, "language": detected_language, "name": kernel_name, }, "language_info": { "codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.0", }, } # Parse blocks i = 0 while i < len(lines): line = lines[i].strip() # Skip empty lines if not line: i += 1 continue # Check for fence start fence_match = re.match(r"^(`+)(\w*)$", line) if fence_match: fence = fence_match.group(1) cell_type = fence_match.group(2) # Find the end of this block block_content = [] i += 1 # Move past the opening fence while i < len(lines): if lines[i].strip() == fence: break block_content.append(lines[i]) i += 1 # Create the appropriate cell if cell_type == "markdown": cell = nbformat.v4.new_markdown_cell("\n".join(block_content)) else: # Handle different code languages cell = nbformat.v4.new_code_cell("\n".join(block_content)) # Update kernel metadata if we detect a different language if cell_type and cell_type != "python": detected_language = cell_type # Update kernel info based on detected language if cell_type == "r": kernel_name = "ir" display_name = "R" elif cell_type == "julia": kernel_name = "julia-1.8" display_name = "Julia 1.8.5" elif cell_type == "javascript": kernel_name = "nodejs" display_name = "Node.js" else: # For other languages, use the language name as kernel name kernel_name = cell_type display_name = cell_type.capitalize() # Update notebook metadata nb.metadata["kernelspec"] = { "display_name": display_name, "language": detected_language, "name": kernel_name, } nb.cells.append(cell) i += 1 # Move past the closing fence else: # If we encounter content that's not in a fence, treat it as markdown block_content = [] while i < len(lines): line = lines[i].strip() if not line or re.match(r"^`+(\w*)$", line): break block_content.append(lines[i]) i += 1 if block_content: cell = nbformat.v4.new_markdown_cell("\n".join(block_content)) nb.cells.append(cell) else: i += 1 # Write the notebook nbformat.write(nb, ipynb_path)