Hatta Devel

changeset 633:6d47f7177445

don't use sorted dicts in in markup patterns, use list of tuples instead
author sheep@ghostwheel
date Fri Dec 04 21:55:48 2009 +0100 (2009-12-04)
parents 422d30e693d5
children 541b9b77637c
files hatta.py tests/test_parser.txt
line diff
     1.1 --- a/hatta.py	Fri Dec 04 14:19:49 2009 +0100
     1.2 +++ b/hatta.py	Fri Dec 04 21:55:48 2009 +0100
     1.3 @@ -658,19 +658,19 @@
     1.4      bullets_pat = ur"^\s*[*]+\s+"
     1.5      heading_pat = ur"^\s*=+"
     1.6      quote_pat = ur"^[>]+\s+"
     1.7 -    block = {
     1.8 -        "bullets": bullets_pat,
     1.9 -        "code": ur"^[{][{][{]+\s*$",
    1.10 -        "conflict": ur"^<<<<<<< local\s*$",
    1.11 -        "empty": ur"^\s*$",
    1.12 -        "heading": heading_pat,
    1.13 -        "indent": ur"^[ \t]+",
    1.14 -        "macro": ur"^<<\w+\s*$",
    1.15 -        "quote": quote_pat,
    1.16 -        "rule": ur"^\s*---+\s*$",
    1.17 -        "syntax": ur"^\{\{\{\#![\w+#.-]+\s*$",
    1.18 -        "table": ur"^\|",
    1.19 -    } # note that the priority is alphabetical
    1.20 +    block = [
    1.21 +        ("bullets", bullets_pat),
    1.22 +        ("code", ur"^[{][{][{]+\s*$"),
    1.23 +        ("conflict", ur"^<<<<<<< local\s*$"),
    1.24 +        ("empty", ur"^\s*$"),
    1.25 +        ("heading", heading_pat),
    1.26 +        ("indent", ur"^[ \t]+"),
    1.27 +        ("macro", ur"^<<\w+\s*$"),
    1.28 +        ("quote", quote_pat),
    1.29 +        ("rule", ur"^\s*---+\s*$"),
    1.30 +        ("syntax", ur"^\{\{\{\#![\w+#.-]+\s*$"),
    1.31 +        ("table", ur"^\|"),
    1.32 +    ]
    1.33      image_pat = (ur"\{\{(?P<image_target>([^|}]|}[^|}])*)"
    1.34                   ur"(\|(?P<image_text>([^}]|}[^}])*))?}}")
    1.35      smilies = {
    1.36 @@ -700,28 +700,28 @@
    1.37      camel_link = ur"\w+[%s]\w+" % re.escape(
    1.38          u''.join(unichr(i) for i in xrange(sys.maxunicode)
    1.39          if unicodedata.category(unichr(i))=='Lu'))
    1.40 -    markup = {
    1.41 -        "bold": ur"[*][*]",
    1.42 -        "camel_link": camel_link,
    1.43 -        "camel_nolink": ur"[!~](?P<camel_text>%s)" % camel_link,
    1.44 -        "code": ur"[{][{][{](?P<code_text>([^}]|[^}][}]|[^}][}][}])"
    1.45 -                ur"*[}]*)[}][}][}]",
    1.46 -        "free_link": ur"""(http|https|ftp)://\S+[^\s.,:;!?()'"=+<>-]""",
    1.47 -        "italic": ur"//",
    1.48 -        "link": ur"\[\[(?P<link_target>([^|\]]|\][^|\]])+)"
    1.49 -                ur"(\|(?P<link_text>([^\]]|\][^\]])+))?\]\]",
    1.50 -        "image": image_pat,
    1.51 -        "linebreak": ur"\\\\",
    1.52 -        "macro": ur"[<][<](?P<macro_name>\w+)\s+"
    1.53 -                 ur"(?P<macro_text>([^>]|[^>][>])+)[>][>]",
    1.54 -        "mail": ur"""(mailto:)?\S+@\S+(\.[^\s.,:;!?()'"/=+<>-]+)+""",
    1.55 -        "math": ur"\$\$(?P<math_text>[^$]+)\$\$",
    1.56 -        "newline": ur"\n",
    1.57 -        "punct": (ur'(^|\b|(?<=\s))(%s)((?=[\s.,:;!?)/&=+])|\b|$)' %
    1.58 -                  ur"|".join(re.escape(k) for k in punct)),
    1.59 -        "table": ur"=?\|=?",
    1.60 -        "text": ur".+?",
    1.61 -    } # note that the priority is alphabetical
    1.62 +    markup = [
    1.63 +        ("bold", ur"[*][*]"),
    1.64 +        ("camel_link", camel_link),
    1.65 +        ("camel_nolink", ur"[!~](?P<camel_text>%s)" % camel_link),
    1.66 +        ("code", ur"[{][{][{](?P<code_text>([^}]|[^}][}]|[^}][}][}])"
    1.67 +                ur"*[}]*)[}][}][}]"),
    1.68 +        ("free_link", ur"""(http|https|ftp)://\S+[^\s.,:;!?()'"=+<>-]"""),
    1.69 +        ("italic", ur"//"),
    1.70 +        ("link", ur"\[\[(?P<link_target>([^|\]]|\][^|\]])+)"
    1.71 +                ur"(\|(?P<link_text>([^\]]|\][^\]])+))?\]\]"),
    1.72 +        ("image", image_pat),
    1.73 +        ("linebreak", ur"\\\\"),
    1.74 +        ("macro", ur"[<][<](?P<macro_name>\w+)\s+"
    1.75 +                 ur"(?P<macro_text>([^>]|[^>][>])+)[>][>]"),
    1.76 +        ("mail", ur"""(mailto:)?\S+@\S+(\.[^\s.,:;!?()'"/=+<>-]+)+"""),
    1.77 +        ("math", ur"\$\$(?P<math_text>[^$]+)\$\$"),
    1.78 +        ("newline", ur"\n"),
    1.79 +        ("punct", (ur'(^|\b|(?<=\s))(%s)((?=[\s.,:;!?)/&=+])|\b|$)' %
    1.80 +                  ur"|".join(re.escape(k) for k in punct))),
    1.81 +        ("table", ur"=?\|=?"),
    1.82 +        ("text", ur".+?"),
    1.83 +    ]
    1.84  
    1.85  
    1.86      def __init__(self, lines, wiki_link, wiki_image,
    1.87 @@ -743,19 +743,21 @@
    1.88          self.heading_re = re.compile(self.heading_pat, re.U)
    1.89          self.bullets_re = re.compile(self.bullets_pat, re.U)
    1.90          self.block_re = re.compile(ur"|".join("(?P<%s>%s)" % kv
    1.91 -                                   for kv in sorted(self.block.iteritems())))
    1.92 +                                   for kv in self.block))
    1.93          self.code_close_re = re.compile(ur"^\}\}\}\s*$", re.U)
    1.94          self.macro_close_re = re.compile(ur"^>>\s*$", re.U)
    1.95          self.conflict_close_re = re.compile(ur"^>>>>>>> other\s*$", re.U)
    1.96          self.conflict_sep_re = re.compile(ur"^=======\s*$", re.U)
    1.97          self.image_re = re.compile(self.image_pat, re.U)
    1.98 -        self.markup['smiley'] = (ur"(^|\b|(?<=\s))"
    1.99 +        self.markup = [(name, pattern) for (name, pattern) in self.markup
   1.100 +                       if name != 'smiley']
   1.101 +        self.markup.insert(-2, ('smiley', ur"(^|\b|(?<=\s))"
   1.102                                   ur"(?P<smiley_face>%s)"
   1.103                                   ur"((?=[\s.,:;!?)/&=+-])|$)"
   1.104                                   % ur"|".join(re.escape(k)
   1.105 -                                              for k in self.smilies))
   1.106 +                                              for k in self.smilies)))
   1.107          self.markup_re = re.compile(ur"|".join("(?P<%s>%s)" % kv
   1.108 -                                    for kv in sorted(self.markup.iteritems())))
   1.109 +                                    for kv in self.markup))
   1.110  
   1.111      def __iter__(self):
   1.112          return self.parse()
   1.113 @@ -840,7 +842,7 @@
   1.114  
   1.115      def _line_smiley(self, groups):
   1.116          smiley = groups["smiley_face"]
   1.117 -        return self.wiki_image(self.smilies[smiley], alt=smiley,
   1.118 +        return self.wiki_image(self.smilies[smiley], smiley,
   1.119                                 class_="smiley")
   1.120  
   1.121      def _line_bold(self, groups):
     2.1 --- a/tests/test_parser.txt	Fri Dec 04 14:19:49 2009 +0100
     2.2 +++ b/tests/test_parser.txt	Fri Dec 04 21:55:48 2009 +0100
     2.3 @@ -201,3 +201,6 @@
     2.4  
     2.5  >>> parse(u'no !wikiWord here, ~wikiWord')
     2.6  <p id="line_0">no wikiWord here, wikiWord</p>
     2.7 +
     2.8 +>>> parse(u'lol:)')
     2.9 +<p id="line_0">lol<img src="smile.png" alt=":)"></p>