Hatta Devel
changeset 633:6d47f7177445
don't use sorted dicts in in markup patterns, use list of tuples instead
| author | sheep@ghostwheel |
|---|---|
| date | Fri Dec 04 21:55:48 2009 +0100 (2009-12-04) |
| parents | 422d30e693d5 |
| children | 541b9b77637c |
| files | hatta.py tests/test_parser.txt |
line diff
1.1 --- a/hatta.py Fri Dec 04 14:19:49 2009 +0100 1.2 +++ b/hatta.py Fri Dec 04 21:55:48 2009 +0100 1.3 @@ -658,19 +658,19 @@ 1.4 bullets_pat = ur"^\s*[*]+\s+" 1.5 heading_pat = ur"^\s*=+" 1.6 quote_pat = ur"^[>]+\s+" 1.7 - block = { 1.8 - "bullets": bullets_pat, 1.9 - "code": ur"^[{][{][{]+\s*$", 1.10 - "conflict": ur"^<<<<<<< local\s*$", 1.11 - "empty": ur"^\s*$", 1.12 - "heading": heading_pat, 1.13 - "indent": ur"^[ \t]+", 1.14 - "macro": ur"^<<\w+\s*$", 1.15 - "quote": quote_pat, 1.16 - "rule": ur"^\s*---+\s*$", 1.17 - "syntax": ur"^\{\{\{\#![\w+#.-]+\s*$", 1.18 - "table": ur"^\|", 1.19 - } # note that the priority is alphabetical 1.20 + block = [ 1.21 + ("bullets", bullets_pat), 1.22 + ("code", ur"^[{][{][{]+\s*$"), 1.23 + ("conflict", ur"^<<<<<<< local\s*$"), 1.24 + ("empty", ur"^\s*$"), 1.25 + ("heading", heading_pat), 1.26 + ("indent", ur"^[ \t]+"), 1.27 + ("macro", ur"^<<\w+\s*$"), 1.28 + ("quote", quote_pat), 1.29 + ("rule", ur"^\s*---+\s*$"), 1.30 + ("syntax", ur"^\{\{\{\#![\w+#.-]+\s*$"), 1.31 + ("table", ur"^\|"), 1.32 + ] 1.33 image_pat = (ur"\{\{(?P<image_target>([^|}]|}[^|}])*)" 1.34 ur"(\|(?P<image_text>([^}]|}[^}])*))?}}") 1.35 smilies = { 1.36 @@ -700,28 +700,28 @@ 1.37 camel_link = ur"\w+[%s]\w+" % re.escape( 1.38 u''.join(unichr(i) for i in xrange(sys.maxunicode) 1.39 if unicodedata.category(unichr(i))=='Lu')) 1.40 - markup = { 1.41 - "bold": ur"[*][*]", 1.42 - "camel_link": camel_link, 1.43 - "camel_nolink": ur"[!~](?P<camel_text>%s)" % camel_link, 1.44 - "code": ur"[{][{][{](?P<code_text>([^}]|[^}][}]|[^}][}][}])" 1.45 - ur"*[}]*)[}][}][}]", 1.46 - "free_link": ur"""(http|https|ftp)://\S+[^\s.,:;!?()'"=+<>-]""", 1.47 - "italic": ur"//", 1.48 - "link": ur"\[\[(?P<link_target>([^|\]]|\][^|\]])+)" 1.49 - ur"(\|(?P<link_text>([^\]]|\][^\]])+))?\]\]", 1.50 - "image": image_pat, 1.51 - "linebreak": ur"\\\\", 1.52 - "macro": ur"[<][<](?P<macro_name>\w+)\s+" 1.53 - ur"(?P<macro_text>([^>]|[^>][>])+)[>][>]", 1.54 - "mail": ur"""(mailto:)?\S+@\S+(\.[^\s.,:;!?()'"/=+<>-]+)+""", 1.55 - "math": ur"\$\$(?P<math_text>[^$]+)\$\$", 1.56 - "newline": ur"\n", 1.57 - "punct": (ur'(^|\b|(?<=\s))(%s)((?=[\s.,:;!?)/&=+])|\b|$)' % 1.58 - ur"|".join(re.escape(k) for k in punct)), 1.59 - "table": ur"=?\|=?", 1.60 - "text": ur".+?", 1.61 - } # note that the priority is alphabetical 1.62 + markup = [ 1.63 + ("bold", ur"[*][*]"), 1.64 + ("camel_link", camel_link), 1.65 + ("camel_nolink", ur"[!~](?P<camel_text>%s)" % camel_link), 1.66 + ("code", ur"[{][{][{](?P<code_text>([^}]|[^}][}]|[^}][}][}])" 1.67 + ur"*[}]*)[}][}][}]"), 1.68 + ("free_link", ur"""(http|https|ftp)://\S+[^\s.,:;!?()'"=+<>-]"""), 1.69 + ("italic", ur"//"), 1.70 + ("link", ur"\[\[(?P<link_target>([^|\]]|\][^|\]])+)" 1.71 + ur"(\|(?P<link_text>([^\]]|\][^\]])+))?\]\]"), 1.72 + ("image", image_pat), 1.73 + ("linebreak", ur"\\\\"), 1.74 + ("macro", ur"[<][<](?P<macro_name>\w+)\s+" 1.75 + ur"(?P<macro_text>([^>]|[^>][>])+)[>][>]"), 1.76 + ("mail", ur"""(mailto:)?\S+@\S+(\.[^\s.,:;!?()'"/=+<>-]+)+"""), 1.77 + ("math", ur"\$\$(?P<math_text>[^$]+)\$\$"), 1.78 + ("newline", ur"\n"), 1.79 + ("punct", (ur'(^|\b|(?<=\s))(%s)((?=[\s.,:;!?)/&=+])|\b|$)' % 1.80 + ur"|".join(re.escape(k) for k in punct))), 1.81 + ("table", ur"=?\|=?"), 1.82 + ("text", ur".+?"), 1.83 + ] 1.84 1.85 1.86 def __init__(self, lines, wiki_link, wiki_image, 1.87 @@ -743,19 +743,21 @@ 1.88 self.heading_re = re.compile(self.heading_pat, re.U) 1.89 self.bullets_re = re.compile(self.bullets_pat, re.U) 1.90 self.block_re = re.compile(ur"|".join("(?P<%s>%s)" % kv 1.91 - for kv in sorted(self.block.iteritems()))) 1.92 + for kv in self.block)) 1.93 self.code_close_re = re.compile(ur"^\}\}\}\s*$", re.U) 1.94 self.macro_close_re = re.compile(ur"^>>\s*$", re.U) 1.95 self.conflict_close_re = re.compile(ur"^>>>>>>> other\s*$", re.U) 1.96 self.conflict_sep_re = re.compile(ur"^=======\s*$", re.U) 1.97 self.image_re = re.compile(self.image_pat, re.U) 1.98 - self.markup['smiley'] = (ur"(^|\b|(?<=\s))" 1.99 + self.markup = [(name, pattern) for (name, pattern) in self.markup 1.100 + if name != 'smiley'] 1.101 + self.markup.insert(-2, ('smiley', ur"(^|\b|(?<=\s))" 1.102 ur"(?P<smiley_face>%s)" 1.103 ur"((?=[\s.,:;!?)/&=+-])|$)" 1.104 % ur"|".join(re.escape(k) 1.105 - for k in self.smilies)) 1.106 + for k in self.smilies))) 1.107 self.markup_re = re.compile(ur"|".join("(?P<%s>%s)" % kv 1.108 - for kv in sorted(self.markup.iteritems()))) 1.109 + for kv in self.markup)) 1.110 1.111 def __iter__(self): 1.112 return self.parse() 1.113 @@ -840,7 +842,7 @@ 1.114 1.115 def _line_smiley(self, groups): 1.116 smiley = groups["smiley_face"] 1.117 - return self.wiki_image(self.smilies[smiley], alt=smiley, 1.118 + return self.wiki_image(self.smilies[smiley], smiley, 1.119 class_="smiley") 1.120 1.121 def _line_bold(self, groups):
2.1 --- a/tests/test_parser.txt Fri Dec 04 14:19:49 2009 +0100 2.2 +++ b/tests/test_parser.txt Fri Dec 04 21:55:48 2009 +0100 2.3 @@ -201,3 +201,6 @@ 2.4 2.5 >>> parse(u'no !wikiWord here, ~wikiWord') 2.6 <p id="line_0">no wikiWord here, wikiWord</p> 2.7 + 2.8 +>>> parse(u'lol:)') 2.9 +<p id="line_0">lol<img src="smile.png" alt=":)"></p>
