Planet

エンティティの変換で不完全な部分が目立つので、Hotfixを書いてみた。

hotfix.py

   1 import re
   2 
   3 splitter = re.compile('(<.+?>)')
   4 amp = re.compile('&amp;|&(?!#)'), '&amp;'
   5 lt = re.compile('&lt;|<'), '&lt;'
   6 gt = re.compile('&gt;|>'), '&gt;'
   7 quot = re.compile('&quot;|"'), '&quot;'
   8 
   9 def safe_entity(s):
  10     for src, dest in amp, lt, gt, quot:
  11         s = src.sub(dest, s)
  12     return s
  13 
  14 def filter(doc):
  15     build = []
  16     for tok in splitter.split(doc):
  17         if splitter.match(tok):
  18             build.append(tok)
  19         else:
  20             build.append(safe_entity(tok))
  21     return ''.join(build)

planet.pyのパッチ

--- /home/bravo/planet-nightly/planet.py        2006-01-04 09:29:43.000000000 +0900
+++ planet.py   2006-02-28 13:44:04.000000000 +0900
@@ -20,6 +20,7 @@
 import locale

 import planet
+import hotfix

 from ConfigParser import ConfigParser

@@ -235,15 +236,15 @@
             output_fd = open(output_file, "w")
             if encoding.lower() in ("utf-8", "utf8"):
                 # UTF-8 output is the default because we use that internally
-                output_fd.write(tp.process(template))
+                output_fd.write(hotfix.filter(tp.process(template)))
             elif encoding.lower() in ("xml", "html", "sgml"):
                 # Magic for Python 2.3 users
                 output = tp.process(template).decode("utf-8")
-                output_fd.write(output.encode("ascii", "xmlcharrefreplace"))
+                output_fd.write(hotfix.filter(output.encode("ascii", "xmlcharrefreplace")))
             else:
                 # Must be a "known" encoding
                 output = tp.process(template).decode("utf-8")
-                output_fd.write(output.encode(encoding, "replace"))
+                output_fd.write(hotfix.filter(output.encode(encoding, "replace")))
             output_fd.close()
         except KeyboardInterrupt:
             raise

Python/Planet (last edited 2006-02-28 09:27:06 by KeisukeUrago)