bin/old/rss.py (view raw)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
#!/usr/bin/env python3 # generate an rss item import html from myrkdown import markdown import sys import os from collections import namedtuple import re import arrow import operator import warnings # warnings.simplefilter("ignore", arrow.factory.ArrowParseWarning) items_raw = [] from lxml import etree as ET def convert_date(d): return arrow.get(d, "YYYY-MM-DD").format("ddd, DD MMM YYYY HH:mm:ss Z") PREFIX_URL = "https://icyphox.sh/blog/" link_extractor = re.compile("\/([^\/]*)\.md$") def generate_node(rendered, path): item = ET.Element("item") title = ET.SubElement(item, "title") title.text = rendered.metadata["title"] description = ET.SubElement(item, "description") description.text = ET.CDATA(str(rendered)) link = ET.SubElement(item, "link") link.text = PREFIX_URL + link_extractor.search(path).group(1) pubData = ET.SubElement(item, "pubDate") pubData.text = convert_date(rendered.metadata["date"]) guid = ET.SubElement(item, "guid") guid.text = PREFIX_URL + link_extractor.search(path).group(1) return item def parse_article(path): with open(path) as f: rendered = markdown( f.read(), extras=[ "metadata", "fenced-code-blocks", "header-ids", "footnotes", "smarty-pants", "link-patterns", ], link_patterns=[ ( re.compile( r"((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(:[0-9]+)?|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)" ), r"\1", ) ], ) return (arrow.get(rendered.metadata["date"]), rendered, path) tree = ET.parse(os.path.join("templates", "feed.xml")) articles = [] for f in os.listdir("pages/blog/"): if f not in ["_index.md", "feed.xml"]: articles.append(parse_article(os.path.join("pages/blog", f))) articles.sort(key=operator.itemgetter(0), reverse=True) chan = tree.find("channel") for article in articles: chan.append(generate_node(article[1], article[2])) out = ET.tostring(tree, encoding="unicode") with open("build/blog/feed.xml", "w") as f: f.write(out) |