Planet
エンティティの変換で不完全な部分が目立つので、Hotfixを書いてみた。
hotfix.py
1 import re
2
3 splitter = re.compile('(<.+?>)')
4 amp = re.compile('&|&(?!#)'), '&'
5 lt = re.compile('<|<'), '<'
6 gt = re.compile('>|>'), '>'
7 quot = re.compile('"|"'), '"'
8
9 def safe_entity(s):
10 for src, dest in amp, lt, gt, quot:
11 s = src.sub(dest, s)
12 return s
13
14 def filter(doc):
15 build = []
16 for tok in splitter.split(doc):
17 if splitter.match(tok):
18 build.append(tok)
19 else:
20 build.append(safe_entity(tok))
21 return ''.join(build)
planet.pyのパッチ
--- /home/bravo/planet-nightly/planet.py 2006-01-04 09:29:43.000000000 +0900
+++ planet.py 2006-02-28 13:44:04.000000000 +0900
@@ -20,6 +20,7 @@
import locale
import planet
+import hotfix
from ConfigParser import ConfigParser
@@ -235,15 +236,15 @@
output_fd = open(output_file, "w")
if encoding.lower() in ("utf-8", "utf8"):
# UTF-8 output is the default because we use that internally
- output_fd.write(tp.process(template))
+ output_fd.write(hotfix.filter(tp.process(template)))
elif encoding.lower() in ("xml", "html", "sgml"):
# Magic for Python 2.3 users
output = tp.process(template).decode("utf-8")
- output_fd.write(output.encode("ascii", "xmlcharrefreplace"))
+ output_fd.write(hotfix.filter(output.encode("ascii", "xmlcharrefreplace")))
else:
# Must be a "known" encoding
output = tp.process(template).decode("utf-8")
- output_fd.write(output.encode(encoding, "replace"))
+ output_fd.write(hotfix.filter(output.encode(encoding, "replace")))
output_fd.close()
except KeyboardInterrupt:
raise