import py |
import sys, os, traceback |
import re |
|
if hasattr(sys.stdout, 'fileno') and os.isatty(sys.stdout.fileno()): |
def log(msg): |
print msg |
else: |
def log(msg): |
pass |
|
def convert_rest_html(source, source_path, stylesheet=None, encoding='latin1'): |
from py.__.rest import directive |
""" return html latin1-encoded document for the given input. |
source a ReST-string |
sourcepath where to look for includes (basically) |
stylesheet path (to be used if any) |
""" |
from docutils.core import publish_string |
directive.set_backend_and_register_directives("html") |
kwargs = { |
'stylesheet' : stylesheet, |
'stylesheet_path': None, |
'traceback' : 1, |
'embed_stylesheet': 0, |
'output_encoding' : encoding, |
|
'halt_level' : 2, |
} |
|
source_path = os.path.abspath(str(source_path)) |
prevdir = os.getcwd() |
try: |
os.chdir(os.path.dirname(source_path)) |
return publish_string(source, source_path, writer_name='html', |
settings_overrides=kwargs) |
finally: |
os.chdir(prevdir) |
|
def process(txtpath, encoding='latin1'): |
""" process a textfile """ |
log("processing %s" % txtpath) |
assert txtpath.check(ext='.txt') |
if isinstance(txtpath, py.path.svnwc): |
txtpath = txtpath.localpath |
htmlpath = txtpath.new(ext='.html') |
|
|
style = txtpath.dirpath('style.css') |
if style.check(): |
stylesheet = style.basename |
else: |
stylesheet = None |
content = unicode(txtpath.read(), encoding) |
doc = convert_rest_html(content, txtpath, stylesheet=stylesheet, encoding=encoding) |
htmlpath.write(doc) |
|
|
|
|
|
rex1 = re.compile(ur'.*<body>(.*)</body>.*', re.MULTILINE | re.DOTALL) |
rex2 = re.compile(ur'.*<div class="document">(.*)</div>.*', re.MULTILINE | re.DOTALL) |
|
def strip_html_header(string, encoding='utf8'): |
""" return the content of the body-tag """ |
uni = unicode(string, encoding) |
for rex in rex1,rex2: |
match = rex.search(uni) |
if not match: |
break |
uni = match.group(1) |
return uni |
|