
Parsing Bible References with Python#
I like reading the bible and writing about it, but I don’t like manually copying and pasting bible verses. So, I write little parsers that can take a bible reference (e.g. “Genesis 1:1 (ESV)”) and convert it into the referenced verses.
To make the parsing easy, I use parsy which is a lovely parser combinator library. Here is how I do it:
Note
This code is pulled directly from the source file that powers bible references on this blog, so it may change over time.
from typing import TypeVar, Optional, Iterable, Callable
import xml.etree.ElementTree as ET
from dataclasses import dataclass
from pathlib import Path
import parsy
books = [
"Genesis", "Exodus", "Leviticus", "Numbers", "Deuteronomy",
"Joshua", "Judges", "Ruth", "1 Samuel", "2 Samuel", "1 Kings",
"2 Kings", "1 Chronicles", "2 Chronicles", "Ezra", "Nehemiah",
"Esther", "Job", "Psalms", "Proverbs", "Ecclesiastes",
"Song of Songs", "Isaiah", "Jeremiah", "Lamentations",
"Ezekiel", "Daniel", "Hosea", "Joel", "Amos", "Obadiah", "Jonah",
"Micah", "Nahum", "Habakkuk", "Zephaniah", "Haggai", "Zechariah",
"Malachi", "Matthew", "Mark", "Luke", "John", "Acts", "Romans",
"1 Corinthians", "2 Corinthians", "Galatians", "Ephesians",
"Philippians", "Colossians", "1 Thessalonians",
"2 Thessalonians", "1 Timothy", "2 Timothy", "Titus", "Philemon",
"Hebrews", "James", "1 Peter", "2 Peter", "1 John", "2 John",
"3 John", "Jude", "Revelation"
]
book = parsy.alt(*[parsy.string(x) for x in books])
number = parsy.regex('[0-9]+').map(int)
chapter = number
verse_single = number
verse_range = parsy.seq(
start = number,
_dash = parsy.string('-'),
end = number,
).map(lambda x: range(x['start'], x['end'] + 1))
verse = parsy.alt(verse_range, verse_single)
def surrounded_by(start: str, inner: parsy.Parser, end: str) -> parsy.Parser:
return parsy.seq(
_before = parsy.string(start),
content = inner,
_after = parsy.string(end),
).map(lambda x: x['content'])
version = surrounded_by('(', parsy.regex('[a-zA-Z]*'), ')')
@dataclass
class ResolvedVerse:
number: int
content: str
@dataclass
class ResolvedChapter:
version: str
book: str
number: int
verses: list[ResolvedVerse]
T = TypeVar('T')
def find(f: Callable[[T], bool], iter: Iterable[T]) -> Optional[T]:
return next((x for x in iter if f(x)), None)
@dataclass
class BibleReference:
book: str
chapter: int
verse: int | range
version: str
def resolve(self) -> ResolvedChapter:
tree = ET.parse(Path('bible-translations', f'{self.version}.xml'))
bible = tree.getroot()
book_num = str(self.book_number())
book = find(lambda x: x.attrib['number'] == book_num, bible.iter('book'))
assert book is not None
chapter = find(lambda x: x.attrib['number'] == str(self.chapter), book.iter('chapter'))
assert chapter is not None
if isinstance(self.verse, int):
verse = find(lambda x: int(x.attrib['number']) == self.verse, chapter.iter('verse'))
assert verse is not None
verses = [verse]
else:
verses = list(filter(lambda x: int(x.attrib['number']) in self.verse, chapter.iter('verse')))
return ResolvedChapter(
version = self.version,
book = self.book,
number = self.chapter,
verses = [ResolvedVerse(int(x.attrib['number']), str(x.text)) for x in verses],
)
def book_number(self) -> int:
return books.index(self.book) + 1
bible_reference = parsy.seq(
book = book,
_ws1 = parsy.string(' '),
chapter = chapter,
_colon = parsy.string(':'),
verse = verse,
_ws2 = parsy.string(' '),
version = version,
).map(lambda x: BibleReference(x['book'], x['chapter'], x['verse'], x['version']))
def parse(input: str) -> BibleReference:
return bible_reference.parse(input)
Example usage:
>>> import bible_ref_parser
>>> bible_ref_parser.parse('Genesis 1:1 (CSB)')
BibleReference(book='Genesis', chapter=1, verse=1, version='CSB')
Putting Agency Back in My Design Process