#!/usr/bin/env python3

# Copyright 2024-2025 Rachael Brown
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
# “Software”), to deal in the Software without restriction, including
# without limitation the rights to use, copy, modify, merge, publish,
# distribute, sublicense, and/or sell copies of the Software, and to
# permit persons to whom the Software is furnished to do so, subject to
# the following conditions:
#
# The above copyright notice and this permission notice shall be
# included in all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND,
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
# LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
# OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
# WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

# Collection of DIY pandoc filters
#
# Known issues
# - Blockquotes immediately following message bubbles don't work right;
#   It appears panflute isn't passing the BlockQuote element into the filter
#   when I'd expect, but I don't know if this is a bug or a feature.

from collections.abc import Iterable
from textwrap import shorten
from abc import ABC, abstractmethod
from urllib.parse import urlparse
from requests_cache import CachedSession
from emoji import analyze
from hashlib import sha1
import panflute as pf
import re
import logging
import posixpath
import os.path

ENTITY_FORMATS = ('html', 'latex')
ENTITY_TABLE = {
    '->': [
        pf.RawInline(r'$ \rightarrow $', 'latex'),
        pf.RawInline('→')
    ],
    '<-': [
        pf.RawInline(r'$ \leftarrow $', 'latex'),
        pf.RawInline('←')
    ],
    '<->': [
        pf.RawInline(r'$ \leftrightarrow $', 'latex'),
        pf.RawInline('↔')
    ],
}

BASE_URLS = {
    'html'              : '/life'
}

IMAGE_CACHE = '.image-cache'
OEMBED_CACHE = '.oembed-cache'
OEMBED_YT = 'https://www.youtube.com/oembed?'
OEMBED_PROVIDERS = {
    'youtu.be'          : OEMBED_YT,
    'www.youtube.com'   : OEMBED_YT
}

def is_text(elem):
    """True if element is regular, in-document text"""
    return type(elem) == pf.Str and type(elem.parent) != pf.MetaInlines

class Filter(ABC):
    def __init__(self):
        self.log = logging.getLogger(self.__class__.__name__)

    @abstractmethod
    def filter(self, elem: pf.Element, doc: pf.Doc):
        """Implement this"""

class BoldTableHeadingsFilter(Filter):
    def _bold(self, elem, doc):
        """Helper filter: makes all text bold"""
        if is_text(elem):
            return pf.Strong(elem)

    def filter(self, elem, doc):
        """Make table headings bold"""
        if type(elem) == pf.Table:
            self.log.info("Making bold: %s", pf.stringify(elem.head))
            elem.head.walk(self._bold)

class SuperOrdinalFilter(Filter):
    RE_ORDINAL = re.compile(r'^((?:n|[0-9]+))((?:(?<=(?<!1)1)st|(?<=(?<!1)2)nd|(?<=(?<!1)3)rd|(?<=[n04-9])th)|(?<=1[0-9])th)(\W*)$')

    def filter(self, elem, doc):
        """Convert ordinal numbers to superscript form (i.e. 1st -> 1\\textsuperscript{st})"""
        if is_text(elem):
            match = self.RE_ORDINAL.match(elem.text)
            if match:
                self.log.info("Superscripting ordinal: %s", elem.text)
                out = [
                    pf.Str(match.group(1)),
                    pf.Superscript(pf.Str(match.group(2)))
                ]

                # This is usually just trailing punctuation
                remainder = match.group(3)
                if len(remainder) > 0:
                    out.append(pf.Str(remainder))

                return out

class NotesAfterPunctuationFilter(Filter):
    def _is_punctuation(self, elem):
        return isinstance(elem, pf.Str) and not elem.text.isalnum()

    def filter(self, elem, doc):
        """Reorder punctuation to proceed footnotes"""

        if isinstance(elem, pf.Note) and self._is_punctuation(elem.next):
            self.log.info("Reordering: %s%s[%s]",
                         pf.stringify(elem.prev),
                         elem.next.text,
                         shorten(pf.stringify(elem), 50))
            return elem.next
        elif self._is_punctuation(elem) and isinstance(elem.prev, pf.Note):
            return elem.prev

class TranslateEntitiesFilter(Filter):
    """Convert common ASCII idioms to their more friendly forms in unicode, etc."""
    def __init__(self, formats, table):
        super().__init__()
        self.formats = formats
        self.table = table

    def filter(self, elem, doc):
        if is_text(elem) and doc.format in self.formats:
            return self.table.get(elem.text)

class TrademarkFilter(Filter):
    """Make (tm) look good in latex"""
    SYMBOLS = ('™', '™️')

    def filter(self, elem, doc):
        if is_text(elem) and doc.format == 'latex':
            r = []
            start = 0

            for i in range(len(elem.text)):
                if elem.text[i] in self.SYMBOLS:
                    r.extend([
                        pf.Str(elem.text[start:i]),
                        pf.RawInline(r'\texttrademark{}', 'latex')
                    ])

                    start = i + 1

            if r and start < len(elem.text):
                r.append(pf.Str(elem.text[start:]))

            return r or None

class InlineHeadingsFilter(Filter):
    """Make headings 4-6 inline with the first paragraph
    (HTML; the latex template already does this)
    """
    def _shall(self, elem):
        return isinstance(elem, pf.Header) and elem.level >= 4

    def filter(self, elem, doc):
        if doc.format == 'html':
            if self._shall(elem) and isinstance(elem.next, pf.Para):
                self.log.info(f"Inlining heading (level {elem.level}): {pf.stringify(elem)}")
                return pf.Div(elem, elem.next, classes=['inline-heading'])
            elif isinstance(elem, pf.Para) and self._shall(elem.prev):
                self.log.debug("Skipping paragraph")
                return pf.Plain()

class HashtagFilter(Filter):
    RE_HASHTAG = re.compile(r'(#[A-Za-z]+)')

    def __init__(self):
        super().__init__()
        self._skip_block = None

    def _hereafter(self, elem):
        if elem:
            yield elem
            yield from self._hereafter(elem.next)

    def _tags(self, elem):
        tag_str = ''.join([pf.stringify(el) for el in self._hereafter(elem)])
        split = [s.strip() for s in self.RE_HASHTAG.split(tag_str)[1:]]
        self.log.debug(f"split={split}")
        while split:
            try:
                yield (split.pop(0), split.pop(0))
            except IndexError:
                self.log.error(f"Can't unpack hashtag in '{tag_str}'")

    def filter(self, elem, doc):
        if self._skip_block and self._skip_block == elem.parent:
            return pf.Plain() if isinstance(elem, pf.Block) else pf.RawInline('')

        if is_text(elem) and self.RE_HASHTAG.match(elem.text):
            self._skip_block = elem.parent

            r = []
            for tag, content in self._tags(elem):
                self.log.info(f"**{tag}** {shorten(content, 70)}")
                r.extend([
                    pf.RawInline(r'{\color{lightgray}', 'latex'),
                    pf.Span(pf.Strong(pf.Code(tag)), classes=['hash-tag']),
                ])

                if doc.format != 'html':
                    r.append(pf.Space())

                r.extend([
                    pf.Span(pf.Code(content), classes=['hash-content']),
                    pf.RawInline(r'\color{black}}', 'latex'),
                    pf.LineBreak(),
                ])

            return r

class TitleFilter(Filter):
    """Fallback to using the first heading as the document title"""
    def _get_headings(self, elem):
        return [e for e in elem.content if isinstance(e, pf.Header)]

    def filter(self, elem, doc):
        if elem == doc:
            title = doc.metadata.content.get('title')
            if not title:
                top_headings = self._get_headings(doc)
                if not top_headings:
                    top_headings = []
                    divs = [e for e in doc.content if isinstance(e, pf.Div)]
                    for div in divs:
                        top_headings.extend(self._get_headings(div))

                if top_headings:
                    heading = top_headings[0]
                    self.log.info(f"Setting page title: {pf.stringify(heading)}")
                    doc.metadata['title'] = pf.MetaInlines(*heading.content)
                    doc.metadata['title-plain'] = pf.MetaInlines(pf.Str(pf.stringify(heading)))
                    return doc
            else:
                self.log.debug(f"Title already set: {pf.stringify(title)}")

class OutlineFilter(Filter):
    """Add a hyperlinked table of contents to the beginning of the document."""
    def __init__(self, max_heading=3):
        super().__init__()
        self.max_heading = max_heading

    def _link_factory(self, header):
        return pf.Para(pf.Link(*header.content,
                                url=f'#{header.identifier}',
                                classes=['outline-link'],
                                attributes={'style': f'margin-left: {header.level-1}em;'}))

    def _get_headings(self, elem):
        return [self._link_factory(el) for el in elem.content
                    if isinstance(el, pf.Header) and el.level <= self.max_heading]

    def filter(self, elem, doc):
        if elem == doc and doc.format == 'html':
            links = self._get_headings(doc)
            if not links:
                # The --file-scope flag puts each file inside a div
                links = []
                divs = [el for el in doc.content if isinstance(el, pf.Div)]
                for div in divs:
                    links.extend(self._get_headings(div))

            self.log.info(f"Added {len(links)} headings to outline.")
            if not links:
                self.log.warning("Outline is empty, didn't find any headings.")

            elem.content.insert(0, pf.Div(pf.RawBlock('<details><summary>Outline</summary>'),
                       *links,
                       pf.RawBlock('</details>'),
                       classes=['outline']))

            return elem

class RelativeURLFilter(Filter):
    """Base class for filters to translate relative URLs."""
    def __init__(self, base_urls, object_type=None):
        super().__init__()
        self._base_urls = base_urls
        self._object_type = object_type

    def base_url(self, format):
        return self._base_urls.get(format) or self._base_urls.get('default')

    def filter(self, elem, doc):
        if not self._object_type or isinstance(elem, self._object_type):
            return self._filter(elem, doc)

    def _filter(self, elem, doc):
        base_url = self.base_url(doc.format)
        url_parts = urlparse(elem.url)
        if base_url and not url_parts.netloc and not url_parts.fragment:
            new_url = posixpath.join(base_url, elem.url)
            self.log.info(f"URL '{elem.url}' -> '{new_url}'")
            elem.url = new_url
            return elem

class ImageURLFilter(RelativeURLFilter):
    """Replace relative paths in images"""
    def __init__(self, base_urls):
        super().__init__(base_urls, pf.Image)

class HyperlinkFilter(RelativeURLFilter):
    """Replace relative paths in hyperlinks"""
    def __init__(self, base_urls):
        super().__init__(base_urls, pf.Link)

class LiquidImageFilter(Filter):
    """Image filter for https://rbuchberger.github.io/jekyll_picture_tag/"""
    def __init__(self, skip_class='raw-image'):
        super().__init__()
        self.skip_class = skip_class

    def _skip(self, elem: pf.Element):
        if 'classes' in dir(elem) and self.skip_class in elem.classes:
            self.log.info(f'Skipping raw image: {pf.stringify(elem)}')
            return True
        if not elem.parent:
            return False

        return self._skip(elem.parent)

    def filter(self, elem, doc):
        if doc.format == 'html' and isinstance(elem, pf.Image) and not urlparse(elem.url).hostname and not self._skip(elem):
            self.log.info(f"Inserting liquid tag for {elem.url}")
            # No quotes around --alt
            return pf.RawInline('{%% picture "%s" --title %s %%}' % \
                                (elem.url, elem.title or pf.stringify(elem)))

class ImageCacheFilter(Filter):
    """I'm not crazy about this, but I needed to do something to make building this not suck on my phone."""
    REQUESTS_CACHE='requests-cache'

    def __init__(self, cache_dir, cache_formats=('latex',)):
        super().__init__()
        self.cache_formats = cache_formats
        self.cache_dir = cache_dir

        os.makedirs(cache_dir, exist_ok=True)
        cache_file = os.path.join(cache_dir, self.REQUESTS_CACHE)
        self._session = CachedSession(cache_name=cache_file, cache_control=True)

    def filter(self, elem, doc):
        if doc.format in self.cache_formats and isinstance(elem, pf.Image):
            url = urlparse(elem.url)
            if url.hostname:
                resp = self._session.get(elem.url)
                if not resp:
                    raise Exception(f"Error getting image: {elem.url}")

                extension = os.path.split(url.path)[1].split('.')[-1]
                filename = '.'.join([sha1(resp.content).hexdigest(), extension])
                path = os.path.join(self.cache_dir, filename)

                with open(path, 'wb') as f:
                    f.write(resp.content)

                self.log.info(f"Image '{elem.url}' cached at '{path}'")
                elem.url = path
                return elem

class EmptyHyperlinkFilter(Filter):
    """Replace empty hyperlinks with their URL"""
    def filter(self, elem, doc):
        if isinstance(elem, pf.Link) and len(elem.content) == 0 and elem.url:
            self.log.info(f"Null hyperlink: {elem.url}")
            elem.content = [pf.Str(elem.url)]
            return elem

class TableDivFilter(Filter):
    """Wrap tables inside a div"""
    def __init__(self, classes=['table-container']):
        super().__init__()
        self.classes = classes

    def filter(self, elem, doc):
        if isinstance(elem, pf.Table):
            return pf.Div(elem, classes=self.classes)

class UnbalancedDelimiterFilter(Filter):
    """Print a warning if it looks like there might be mismatched bold/italic/underline delimiters"""
    DELIMITERS = ('*', '_')

    def filter(self, elem, doc):
        if isinstance(elem, pf.Str):
            s = pf.stringify(elem).strip()
            if s[0] in self.DELIMITERS or s[-1] in self.DELIMITERS:
                self.log.warning(f"WARNING! POSSIBLE MISMATCHED DELIMITERS: {s}")

class MinTableWidthFilter(Filter):
    """Finds the widest column, and narrows it if necessary to make all columns at least min_width.

    I kinda hate this, but it works well enough with all the actual tables I have."""
    def __init__(self, min_width=0.22):
        super().__init__()
        self.min_width = min_width

    def filter(self, elem, doc):
        def change_width(index, new_width):
            elem.colspec[index] = (elem.colspec[index][0], new_width)

        if isinstance(elem, pf.Table) and doc.format == 'latex':
            self.log.debug(f"Table: {pf.stringify(elem.head)}\n\tTable spec: {elem.colspec}")

            adjust = 0
            widest_pct = 0
            widest_index = None

            for i in range(len(elem.colspec)):
                width = elem.colspec[i][1]
                if isinstance(width, float):
                    if width < self.min_width:
                        self.log.warning(f"Column {i} is narrower than {self.min_width} minimum ({width})")
                        adjust += self.min_width - width
                        change_width(i, self.min_width)
                    elif width > widest_pct:
                        widest_pct = width
                        widest_index = i

            if adjust > 0:
                if widest_index:
                    self.log.warning(f"Resizing column {widest_index} by {adjust}")
                    change_width(widest_index, widest_pct - adjust)
                else:
                    self.log.warning(f"All columns narrower than minimum width! {pf.stringify(elem.head)}")

class EmojiFilter(Filter):
    def filter(self, elem, doc):
        r = []
        char_index = 0

        if doc.format == 'latex' and isinstance(elem, pf.Str):
            for emoji in analyze(elem.text):
                r.extend([
                    pf.Str(elem.text[char_index:emoji.value.start]),
                    pf.RawInline(r'{\emojifont %s}' % emoji.chars, 'latex')
                ])
                char_index = emoji.value.end

        return r or None

class ImplicitHyperlinkedFigures(Filter):
    """Like the built-in implicit_figures extension, except it also works if the image is a hyperlink"""
    def _search(self, elem: pf.Element):
        if isinstance(elem, pf.Para) and len(elem.content) == 1:
            child = elem.content[0]
            if isinstance(child, pf.Link) and len(child.content) == 1:
                grandchild = child.content[0]
                if isinstance(grandchild, pf.Image):
                    return child, grandchild

        return None, None

    def filter(self, elem, doc):
        link, image = self._search(elem)

        if image:
            self.log.info(f"Image -> Figure: {elem}")

            caption = pf.Caption(pf.Plain(*image.content))
            image.title = pf.stringify(image)
            image.content.clear()

            return pf.Figure(pf.Plain(link), caption=caption)

class ImageWidthFilter(Filter):
    FORMATS = ('latex',)
    WIDTH = '80%'

    def filter(self, elem, doc):
        if isinstance(elem, pf.Image) and doc.format in self.FORMATS:
            elem.attributes['width'] = self.WIDTH
            return elem

class ParaCommand(Filter):
    """Intercept paragraphs that begin with a special command string

    Commands:
        Accept a list of panflute Elements in the matching Para object
            (minus the command and trailing space)
        Return a panflute Block, an iterable of Blocks, or None to suppress output

    Hooks:
        start-block: Invoked before the first occurrance of a command following normal content
        end-block: Invoked after the last occurrance of a command proceeding normal content
    """
    HOOK_TYPES = ('start-block', 'end-block')

    def __init__(self):
        super().__init__()
        self.doc = None
        self._in_block = False
        self._hooks = { hook : [] for hook in self.HOOK_TYPES }
        self._commands = {}

    def _call_hooks(self, hook):
        r = []
        for func in self._hooks[hook]:
            r.extend(self.normalize(func()))
        return r

    @staticmethod
    def normalize(input):
        if isinstance(input, Iterable):
            return input
        elif input:
            return [ input ]
        return []

    def add_command(self, cmd: str, func):
        self._commands[cmd] = func

    def add_hook(self, hook: str, func):
        self._hooks[hook].append(func)

    def filter(self, elem, doc):
        self.doc = doc

        if type(elem) == pf.Para:
            cmd = None
            words = pf.stringify(elem).split()
            if words:
                cmd = self._commands.get(words[0])

            if cmd:
                r = []
                if not self._in_block:
                    self._in_block = True
                    r.extend(self._call_hooks('start-block'))

                r.extend(self.normalize(cmd(elem.content[2:])))  # Skip command and space
                return r or pf.Plain()

        if self._in_block and isinstance(elem, pf.Block):
            self._in_block = False
            return [
                *self._call_hooks('end-block'),
                elem
            ]

class BarQuoteFilter(ParaCommand):
    """LaTeX documents will use the barquote environment, while other formats
    should simply render this as a blockquote."""
    def __init__(self):
        super().__init__()
        self.buffer = []

        self.add_command('!barquote', self._cmd_barquote)
        self.add_hook('start-block', self._begin)
        self.add_hook('end-block', self._end)

    def _begin(self):
        return pf.RawBlock(r'\begin{barquote}', 'latex')

    def _end(self):
        r = []
        if len(self.buffer) > 0:
            r.append(pf.BlockQuote(*self.buffer))
            self.buffer.clear()
        r.append(pf.RawBlock(r'\end{barquote}', 'latex'))
        return r

    def _cmd_barquote(self, els):
        self.log.debug("Processing !barquote")
        para = pf.Para(*els)

        if self.doc.format == 'latex':
            return para
        self.buffer.append(para)

class OEmbedFilter(ParaCommand):
    """Use the industry-standard OEmbed API to get embed code and thumbnails
    from sites like YouTube"""
    def __init__(self, providers, cache_file):
        super().__init__()
        self.providers = providers

        self._formatters = {
            'html': self._to_html,
            'latex': self._to_latex
        }

        self._session = CachedSession(cache_name=cache_file, cache_control=True)
        self.add_command('!embed', self._cmd_embed)

    @property
    def formatter(self):
        return self._formatters.get(self.doc.format)

    def _cmd_embed(self, els):
        if len(els) != 1:
            self.log.error(f"Expected URL, got {pf.stringify(els)}")
        else:
            url_pf = els[0]
            if isinstance(url_pf, pf.Str):
                url = url_pf.text
                self.log.info(f"Embedding URL: {url}")

                if self.formatter:
                    resp = self._get_oembed(url)
                    if resp:
                        return self.formatter(resp.json(), url)
                    else:
                        self.log.error(f"Error querying the OEmbed API for {url}")
                else:
                    self.log.warning(f"No formatter found for {self.doc.format}")

    def _get_oembed(self, url):
        parsed = urlparse(url)
        if parsed.netloc:
            query_url = self.providers.get(parsed.netloc)
            if query_url:
                self.log.debug(f"Doing OEmbed query with base URL {query_url}")
                return self._session.get(query_url, params={
                    'url': url,
                    'format': 'json'
                })
            else:
                self.log.error(f"No OEmbed provider found: {url}")
        else:
            self.log.error(f"Not a URL: {url}")

    def _to_html(self, resp, raw_url):
        code = resp['html']
        self.log.debug(f"Pasting embed code for {raw_url}: {code}")
        return pf.Div(pf.RawBlock(code), classes=['youtube'])

    def _to_latex(self, resp, raw_url):
        thumb_url, title = resp['thumbnail_url'], pf.Str(resp['title'])

        def link_factory(*args, **kwargs):
            return pf.Link(url=raw_url, *args, **kwargs)

        text_hyperlink = link_factory(pf.Str(raw_url))
        caption = pf.Caption(pf.Plain(title, pf.Space(), pf.Str('('), text_hyperlink, pf.Str(')')))
        return pf.Figure(pf.Plain(link_factory(pf.Image(title, url=thumb_url))), caption=caption)

class BlockHyperlinkFilter(ParaCommand):
    def __init__(self):
        super().__init__()

        self.add_command('!blocklink', self._cmd)

    def _cmd(self, els):
        url = pf.stringify(els.pop(0))
        self.log.info(f"Hyperlinking paragraph: {url}")

        return pf.BlockQuote(pf.Plain(pf.Link(*els, url=url)))

class MessageBubbleFilter(ParaCommand):
    """Universal message bubbles
    Currently supported output formats: HTML and LaTeX (others should be ignored)

    Commands (use at the start of a paragraph):

    ![l|r]bubble
        Put the following text in a left or right-aligned bubble
    ![l|r]bubbleis
        Set the bottom info text for left or right bubbles
    ![l|r]bubble {Saturday, May 09 2020 07:31AM}
        Optional timestamp, etc.
    """
    def __init__(self):
        super().__init__()

        self.conversing = False
        self._bubbleinfo = {}

        self._formatters = {
            'html': self._to_html,
            'latex': self._to_latex,
        }

        self.add_command('!lbubble', lambda els: self._cmd_bubble(els, 'left'))
        self.add_command('!rbubble', lambda els: self._cmd_bubble(els, 'right'))
        self.add_command('!lbubbleis', lambda els: self._cmd_bubbleis(els, 'left'))
        self.add_command('!rbubbleis', lambda els: self._cmd_bubbleis(els, 'right'))
        self.add_hook('end-block', self._end_conversation)

    @property
    def formatter(self):
        return self._formatters.get(self.doc.format)

    def _get_bubbleinfo(self, side):
        r = self._bubbleinfo.get(side)
        if not r:
            default = self.doc.metadata.content.get(f'default-{side[0]}bubbleis')
            if default:
                r = default.content

        return r or []

    def _set_bubbleinfo(self, side, info):
        self._bubbleinfo[side] = info

    def _start_conversation(self):
        self.conversing = True
        return pf.RawBlock(r'\begin{messagebubbleconv}', 'latex')

    def _end_conversation(self):
        if self.conversing:
            self.conversing = False
            return pf.RawBlock(r'\end{messagebubbleconv}', 'latex')

    def _cmd_bubbleis(self, els, side):
        self.log.debug(f"Processing !xbubbleis ({side})")
        self._set_bubbleinfo(side, els)

    def _get_timestamp(self, els: list[pf.Element]):
        timestamp = []
        el = els[0]
        if isinstance(el, pf.Str) and el.text[0] == '{':
            timestamp.append(pf.Str(el.text[1:]))  # Drop {
            els.pop(0)
            while len(els) > 0:
                el = els.pop(0)
                if isinstance(el, pf.Str) and el.text[-1] == '}':
                    timestamp.append(pf.Str(el.text[:-1]))  # Drop }
                    break
                else:
                    timestamp.append(el)
            if len(els) == 0:
                raise Exception("Unmatched '{'")

            return (timestamp, els)
        return (None, els)

    def _cmd_bubble(self, els, side):
        self.log.debug(f"Processing !xbubble ({side})")
        out = []

        if not self.conversing:
            out.extend(self.normalize(self._start_conversation()))

        timestamp, els = self._get_timestamp(els)
        info = [*self._get_bubbleinfo(side)]
        if info and timestamp:
            info.extend([
                pf.Space(),
                pf.Str('|'),
                pf.Space()
            ])
        if timestamp:
            info.append(pf.Emph(*timestamp))

        if self.formatter:
            out.extend(self.normalize(self.formatter(els, info, side)))
        else:
            self.log.warning(f"No formatter found for type {self.doc.format}")

        return out

    def _filter_els(self, els):
        """Make blockquotes work inside bubbles"""
        buffer = []

        factory = lambda *els: pf.Para(*els)
        if isinstance(els[0], pf.Str) and els[0].text == '>':
            factory = lambda *els: pf.BlockQuote(pf.Para(*els))
            els.pop(0)

        while len(els) > 0:
            el = els.pop(0)
            if isinstance(el, pf.LineBreak) or isinstance(el, pf.SoftBreak):
                break

            buffer.append(el)

        if buffer:
            yield factory(*buffer)
        if els:
            yield from self._filter_els(els)

    def _to_latex(self, els, info, side):
        def raw_latex(t): return pf.RawInline(t, 'latex')

        # Undo image width filter for message bubbles. I kinda hate this.
        for el in els:
            if isinstance(el, pf.Image):
                el.attributes.pop('width', None)

        return pf.Para(
            raw_latex('\\messagebubble%s{' % side),
            *info,
            raw_latex('}{'),
            *els,
            raw_latex('}')
        )

    def _to_html(self, els, info, side):
        els = [e for e in self._filter_els(els)]    #TODO Doesn't work for latex
        r = [ pf.Div(*els, classes=[f'{side}-bubble-content']) ]

        if info:
            r.append(pf.Div(pf.Para(*info), classes=[f'{side}-bubble-info']))

        return pf.Div(*r, classes=[f'{side}-bubble'])


if __name__ == '__main__':
    fmt = '%(levelname)s:%(lineno)d:%(name)s.%(funcName)s():%(message)s'
    logging.basicConfig(level=logging.WARNING, format=fmt)

    pf.run_filters([
        BoldTableHeadingsFilter().filter,
        SuperOrdinalFilter().filter,
        NotesAfterPunctuationFilter().filter,
        TranslateEntitiesFilter(ENTITY_FORMATS, ENTITY_TABLE).filter,
        TrademarkFilter().filter,
        InlineHeadingsFilter().filter,
        HashtagFilter().filter,
        TitleFilter().filter,
        OutlineFilter().filter,
        ImageURLFilter(BASE_URLS).filter,
        HyperlinkFilter(BASE_URLS).filter,
        LiquidImageFilter().filter,             # Needs to happen after ImageURLFilter
        EmptyHyperlinkFilter().filter,
        TableDivFilter().filter,
        UnbalancedDelimiterFilter().filter,
        MinTableWidthFilter().filter,
        EmojiFilter().filter,
        ImplicitHyperlinkedFigures().filter,
        OEmbedFilter(OEMBED_PROVIDERS, OEMBED_CACHE).filter,
        BlockHyperlinkFilter().filter,
        ImageCacheFilter(IMAGE_CACHE).filter,   # Needs to happen after OEmbedFilter for best results
        ImageWidthFilter().filter,              # Needs to happen after images and before bubbles
        MessageBubbleFilter().filter,
        BarQuoteFilter().filter,
        ])
