bin/old/rss.py (view raw)
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
#!/usr/bin/env python3
# generate an rss item
import html
from myrkdown import markdown
import sys
import os
from collections import namedtuple
import re
import arrow
import operator
import warnings
# warnings.simplefilter("ignore", arrow.factory.ArrowParseWarning)
items_raw = []
from lxml import etree as ET
def convert_date(d):
return arrow.get(d, "YYYY-MM-DD").format("ddd, DD MMM YYYY HH:mm:ss Z")
PREFIX_URL = "https://icyphox.sh/blog/"
link_extractor = re.compile("\/([^\/]*)\.md$")
def generate_node(rendered, path):
item = ET.Element("item")
title = ET.SubElement(item, "title")
title.text = rendered.metadata["title"]
description = ET.SubElement(item, "description")
description.text = ET.CDATA(str(rendered))
link = ET.SubElement(item, "link")
link.text = PREFIX_URL + link_extractor.search(path).group(1)
pubData = ET.SubElement(item, "pubDate")
pubData.text = convert_date(rendered.metadata["date"])
guid = ET.SubElement(item, "guid")
guid.text = PREFIX_URL + link_extractor.search(path).group(1)
return item
def parse_article(path):
with open(path) as f:
rendered = markdown(
f.read(),
extras=[
"metadata",
"fenced-code-blocks",
"header-ids",
"footnotes",
"smarty-pants",
"link-patterns",
],
link_patterns=[
(
re.compile(
r"((([A-Za-z]{3,9}:(?:\/\/)?)(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(:[0-9]+)?|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)((?:\/[\+~%\/\.\w\-_]*)?\??(?:[\-\+=&;%@\.\w_]*)#?(?:[\.\!\/\\\w]*))?)"
),
r"\1",
)
],
)
return (arrow.get(rendered.metadata["date"]), rendered, path)
tree = ET.parse(os.path.join("templates", "feed.xml"))
articles = []
for f in os.listdir("pages/blog/"):
if f not in ["_index.md", "feed.xml"]:
articles.append(parse_article(os.path.join("pages/blog", f)))
articles.sort(key=operator.itemgetter(0), reverse=True)
chan = tree.find("channel")
for article in articles:
chan.append(generate_node(article[1], article[2]))
out = ET.tostring(tree, encoding="unicode")
with open("build/blog/feed.xml", "w") as f:
f.write(out)
|