Skip to content

preprocessors

PRE-PROCESSORS.

Preprocessors work on source text before we start doing anything too complicated.

Classes

Preprocessor(markdown_instance=None)

Bases: Processor

Preprocessors are run after the text is broken into lines.

Each preprocessor implements a "run" method that takes a pointer to a list of lines of the document, modifies it as necessary and returns either the same pointer or a pointer to a new list.

Preprocessors must extend markdown.Preprocessor.

Source code in pyrevitlib/pyrevit/coreutils/markdown/util.py
def __init__(self, markdown_instance=None):
    if markdown_instance:
        self.markdown = markdown_instance

Attributes

markdown = markdown_instance instance-attribute

Functions

run(lines)

Main preprocessor task.

Each subclass of Preprocessor should override the run method, which takes the document as a list of strings split by newlines and returns the (possibly modified) list of lines.

Source code in pyrevitlib/pyrevit/coreutils/markdown/preprocessors.py
def run(self, lines):
    """Main preprocessor task.

    Each subclass of Preprocessor should override the `run` method, which
    takes the document as a list of strings split by newlines and returns
    the (possibly modified) list of lines.

    """
    pass  # pragma: no cover

NormalizeWhitespace(markdown_instance=None)

Bases: Preprocessor

Normalize whitespace for consistant parsing.

Source code in pyrevitlib/pyrevit/coreutils/markdown/util.py
def __init__(self, markdown_instance=None):
    if markdown_instance:
        self.markdown = markdown_instance

Attributes

markdown = markdown_instance instance-attribute

Functions

run(lines)
Source code in pyrevitlib/pyrevit/coreutils/markdown/preprocessors.py
def run(self, lines):
    source = '\n'.join(lines)
    source = source.replace(util.STX, "").replace(util.ETX, "")
    source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
    source = source.expandtabs(self.markdown.tab_length)
    source = re.sub(r'(?<=\n) +\n', '\n', source)
    return source.split('\n')

HtmlBlockPreprocessor(markdown_instance=None)

Bases: Preprocessor

Remove html blocks from the text and store them for later retrieval.

Source code in pyrevitlib/pyrevit/coreutils/markdown/util.py
def __init__(self, markdown_instance=None):
    if markdown_instance:
        self.markdown = markdown_instance

Attributes

markdown = markdown_instance instance-attribute
right_tag_patterns = ['</%s>', '%s>'] class-attribute instance-attribute
attrs_pattern = '\n \\s+(?P<attr>[^>"\'/= ]+)=(?P<q>[\'"])(?P<value>.*?)(?P=q) # attr="value"\n | # OR\n \\s+(?P<attr1>[^>"\'/= ]+)=(?P<value1>[^> ]+) # attr=value\n | # OR\n \\s+(?P<attr2>[^>"\'/= ]+) # attr\n ' class-attribute instance-attribute
left_tag_pattern = '^\\<(?P<tag>[^> ]+)(?P<attrs>(%s)*)\\s*\\/?\\>?' % attrs_pattern class-attribute instance-attribute
attrs_re = re.compile(attrs_pattern, re.VERBOSE) class-attribute instance-attribute
left_tag_re = re.compile(left_tag_pattern, re.VERBOSE) class-attribute instance-attribute
markdown_in_raw = False class-attribute instance-attribute

Functions

run(lines)
Source code in pyrevitlib/pyrevit/coreutils/markdown/preprocessors.py
def run(self, lines):
    text = "\n".join(lines)
    new_blocks = []
    text = text.rsplit("\n\n")
    items = []
    left_tag = ''
    right_tag = ''
    in_tag = False  # flag

    while text:
        block = text[0]
        if block.startswith("\n"):
            block = block[1:]
        text = text[1:]

        if block.startswith("\n"):
            block = block[1:]

        if not in_tag:
            if block.startswith("<") and len(block.strip()) > 1:

                if block[1:4] == "!--":
                    # is a comment block
                    left_tag, left_index, attrs = "--", 2, {}
                else:
                    left_tag, left_index, attrs = self._get_left_tag(block)
                right_tag, data_index = self._get_right_tag(left_tag,
                                                            left_index,
                                                            block)
                # keep checking conditions below and maybe just append

                if data_index < len(block) and (
                    util.isBlockLevel(left_tag) or left_tag == '--'):
                    text.insert(0, block[data_index:])
                    block = block[:data_index]

                if not (util.isBlockLevel(left_tag) or block[1] in ["!", "?", "@", "%"]):
                    new_blocks.append(block)
                    continue

                if self._is_oneliner(left_tag):
                    new_blocks.append(block.strip())
                    continue

                if block.rstrip().endswith(">") \
                        and self._equal_tags(left_tag, right_tag):
                    if self.markdown_in_raw and 'markdown' in attrs.keys():
                        block = block[left_index:-len(right_tag) - 2]
                        new_blocks.append(self.markdown.htmlStash.
                                          store_tag(left_tag, attrs, 0, 2))
                        new_blocks.extend([block])
                    else:
                        new_blocks.append(
                            self.markdown.htmlStash.store(block.strip()))
                    continue
                else:
                    # if is block level tag and is not complete
                    if (not self._equal_tags(left_tag, right_tag)) and \
                       (util.isBlockLevel(left_tag) or left_tag == "--"):
                        items.append(block.strip())
                        in_tag = True
                    else:
                        new_blocks.append(
                            self.markdown.htmlStash.store(block.strip())
                        )
                    continue

            else:
                new_blocks.append(block)

        else:
            items.append(block)

            # Need to evaluate all items so we can calculate relative to the left index.
            right_tag, data_index = self._get_right_tag(left_tag, left_index, ''.join(items))
            # Adjust data_index: relative to items -> relative to last block
            prev_block_length = 0
            for item in items[:-1]:
                prev_block_length += len(item)
            data_index -= prev_block_length

            if self._equal_tags(left_tag, right_tag):
                # if find closing tag

                if data_index < len(block):
                    # we have more text after right_tag
                    items[-1] = block[:data_index]
                    text.insert(0, block[data_index:])

                in_tag = False
                if self.markdown_in_raw and 'markdown' in attrs.keys():
                    items[0] = items[0][left_index:]
                    items[-1] = items[-1][:-len(right_tag) - 2]
                    if items[len(items) - 1]:  # not a newline/empty string
                        right_index = len(items) + 3
                    else:
                        right_index = len(items) + 2
                    new_blocks.append(self.markdown.htmlStash.store_tag(
                        left_tag, attrs, 0, right_index))
                    placeholderslen = len(self.markdown.htmlStash.tag_data)
                    new_blocks.extend(
                        self._nested_markdown_in_html(items))
                    nests = len(self.markdown.htmlStash.tag_data) - \
                        placeholderslen
                    self.markdown.htmlStash.tag_data[-1 - nests][
                        'right_index'] += nests - 2
                else:
                    new_blocks.append(
                        self.markdown.htmlStash.store('\n\n'.join(items)))
                items = []

    if items:
        if self.markdown_in_raw and 'markdown' in attrs.keys():
            items[0] = items[0][left_index:]
            items[-1] = items[-1][:-len(right_tag) - 2]
            if items[len(items) - 1]:  # not a newline/empty string
                right_index = len(items) + 3
            else:
                right_index = len(items) + 2
            new_blocks.append(
                self.markdown.htmlStash.store_tag(
                    left_tag, attrs, 0, right_index))
            placeholderslen = len(self.markdown.htmlStash.tag_data)
            new_blocks.extend(self._nested_markdown_in_html(items))
            nests = len(self.markdown.htmlStash.tag_data) - placeholderslen
            self.markdown.htmlStash.tag_data[-1 - nests][
                'right_index'] += nests - 2
        else:
            new_blocks.append(
                self.markdown.htmlStash.store('\n\n'.join(items)))
        new_blocks.append('\n')

    new_text = "\n\n".join(new_blocks)
    return new_text.split("\n")

ReferencePreprocessor(markdown_instance=None)

Bases: Preprocessor

Remove reference definitions from text and store for later use.

Source code in pyrevitlib/pyrevit/coreutils/markdown/util.py
def __init__(self, markdown_instance=None):
    if markdown_instance:
        self.markdown = markdown_instance

Attributes

markdown = markdown_instance instance-attribute
TITLE = '[ ]*(\\"(.*)\\"|\\\'(.*)\\\'|\\((.*)\\))[ ]*' class-attribute instance-attribute
RE = re.compile('^[ ]{0,3}\\[([^\\]]*)\\]:\\s*([^ ]*)[ ]*(%s)?$' % TITLE, re.DOTALL) class-attribute instance-attribute
TITLE_RE = re.compile('^%s$' % TITLE) class-attribute instance-attribute

Functions

run(lines)
Source code in pyrevitlib/pyrevit/coreutils/markdown/preprocessors.py
def run(self, lines):
    new_text = []
    while lines:
        line = lines.pop(0)
        m = self.RE.match(line)
        if m:
            id = m.group(1).strip().lower()
            link = m.group(2).lstrip('<').rstrip('>')
            t = m.group(5) or m.group(6) or m.group(7)
            if not t:
                # Check next line for title
                tm = self.TITLE_RE.match(lines[0])
                if tm:
                    lines.pop(0)
                    t = tm.group(2) or tm.group(3) or tm.group(4)
            self.markdown.references[id] = (link, t)
        else:
            new_text.append(line)

    return new_text  # + "\n"

Functions

build_preprocessors(md_instance, **kwargs)

Build the default set of preprocessors used by Markdown.

Source code in pyrevitlib/pyrevit/coreutils/markdown/preprocessors.py
def build_preprocessors(md_instance, **kwargs):
    """Build the default set of preprocessors used by Markdown."""
    preprocessors = odict.OrderedDict()
    preprocessors['normalize_whitespace'] = NormalizeWhitespace(md_instance)
    if md_instance.safeMode != 'escape':
        preprocessors["html_block"] = HtmlBlockPreprocessor(md_instance)
    preprocessors["reference"] = ReferencePreprocessor(md_instance)
    return preprocessors