import os import urllib from urlparse import urlparse import lxml.html def analyze(filename): try: d = lxml.html.fromstring(open(filename).read()) for l in d.iterlinks(): target = l[0].attrib[l[1]] if target == "#": continue parsed = urlparse(target) if parsed.scheme: continue if parsed.fragment: target = target.split('#')[0] target_filename = os.path.abspath(os.path.join(os.path.dirname(filename), urllib.unquote(target))) if not os.path.exists(target_filename): print "In %s broken link: " % filename, target except Exception as exc: print "Error with:", filename, exc for task in os.popen('doit list --all', 'r').readlines(): task = task.strip() if task.split(':')[0] in ( 'render_tags', 'render_archive', 'render_galleries', 'render_indexes', 'render_pages', 'render_site') and '.html' in task: analyze(task.split(":")[-1])