Skip to content

Resources Module

The bead.resources module provides lexicons, templates, and adapters for external linguistic databases.

This guide walks through the EXACT workflow from gallery/eng/argument_structure/generate_lexicons.py, showing how to extract VerbNet verbs, get morphological forms, and load controlled lexicons from CSV.

Complete Lexicon Generation Workflow

The gallery example generates 7 lexicons. Here's the complete workflow with 3 verb classes:

from pathlib import Path

from bead.resources.adapters.cache import AdapterCache
from bead.resources.lexicon import Lexicon
from bead.resources.loaders import from_csv

# Set up paths (adapted from generate_lexicons.py lines 27-33)
# Note: tests cd to fixtures dir, so paths are relative to tests/fixtures/api_docs/
base_dir = Path(".")
lexicons_dir = base_dir / "lexicons"
resources_dir = base_dir / "resources"

# Ensure directories exist
lexicons_dir.mkdir(exist_ok=True)

# Initialize adapter cache for VerbNet and UniMorph
cache = AdapterCache()

Extracting VerbNet Verbs

Use the gallery's VerbNetExtractor to fetch verbs with frame information:

from pathlib import Path

from utils.morphology import MorphologyExtractor
from utils.verbnet_parser import VerbNetExtractor

from bead.resources.adapters.cache import AdapterCache
from bead.resources.lexical_item import LexicalItem

# Set up cache and paths
cache = AdapterCache()
base_dir = Path(".")
lexicons_dir = base_dir / "lexicons"

# Initialize extractors with caching (lines 36-38)
verbnet = VerbNetExtractor(cache=cache)
morph = MorphologyExtractor(cache=cache)

# Extract all VerbNet verbs (line 47)
base_verbs = verbnet.extract_all_verbs()
print(f"Found {len(base_verbs)} verb-class pairs from VerbNet")

# Limit to 3 verbs for testing (lines 52-54)
base_verbs = base_verbs[:3]
print(f"Using first {len(base_verbs)} verbs")

# Get inflected forms for each verb (lines 58-78)
verb_items_dict: dict[str, LexicalItem] = {}

for base_verb in base_verbs:
    lemma = base_verb.lemma
    print(f"Processing {lemma}...")

    # Get all inflected forms (base, 3sg, past, progressive, past participle)
    forms = morph.get_all_required_forms(lemma)

    # Add VerbNet metadata to each form
    for form_item in forms:
        form_item.features.update(
            {
                "verbnet_class": base_verb.features.get("verbnet_class", ""),
                "themroles": base_verb.features.get("themroles", []),
                "frame_count": base_verb.features.get("frame_count", 0),
            }
        )
        verb_items_dict[str(form_item.id)] = form_item

print(f"Created {len(verb_items_dict)} verb form entries")

Creating and Saving Lexicons

Create Lexicon objects and save to JSONL:

from pathlib import Path

from utils.morphology import MorphologyExtractor
from utils.verbnet_parser import VerbNetExtractor

from bead.resources.adapters.cache import AdapterCache
from bead.resources.lexicon import Lexicon

# Set up cache and paths
cache = AdapterCache()
base_dir = Path(".")
lexicons_dir = base_dir / "lexicons"

# Extract and process verbs (abbreviated version)
verbnet = VerbNetExtractor(cache=cache)
morph = MorphologyExtractor(cache=cache)
base_verbs = verbnet.extract_all_verbs()[:3]

verb_items_dict = {}
for base_verb in base_verbs:
    forms = morph.get_all_required_forms(base_verb.lemma)
    for form_item in forms:
        form_item.features.update(
            {
                "verbnet_class": base_verb.features.get("verbnet_class", ""),
                "themroles": base_verb.features.get("themroles", []),
                "frame_count": base_verb.features.get("frame_count", 0),
            }
        )
        verb_items_dict[str(form_item.id)] = form_item

# Create VerbNet verbs lexicon (lines 82-91)
verb_lexicon = Lexicon(
    name="verbnet_verbs",
    description="All VerbNet verbs with inflected forms",
    language_code="eng",
    items=verb_items_dict,
)

output_path = lexicons_dir / "verbnet_verbs.jsonl"
verb_lexicon.to_jsonl(str(output_path))
print(f"Saved to {output_path}")

Loading Lexicons from CSV

Load controlled lexicons from CSV files (lines 100-114):

from pathlib import Path

from bead.resources.loaders import from_csv

# Set up paths
base_dir = Path(".")
lexicons_dir = base_dir / "lexicons"
resources_dir = base_dir / "resources"

# Load bleached nouns from CSV
csv_path = resources_dir / "bleached_nouns.csv"

noun_lexicon = from_csv(
    path=csv_path,
    name="bleached_nouns",
    feature_columns=["number", "countability", "semantic_class"],
    language_code="eng",
    description="Controlled noun inventory for templates",
    pos="NOUN",
)

print(f"Loaded {len(noun_lexicon.items)} bleached nouns")

# Save to JSONL
output_path = lexicons_dir / "bleached_nouns.jsonl"
noun_lexicon.to_jsonl(str(output_path))

CSV format (resources/bleached_nouns.csv):

word,number,countability,semantic_class
person,singular,count,animate
people,plural,count,animate
thing,singular,count,inanimate
things,plural,count,inanimate
place,singular,count,location

The gallery's MorphologyExtractor wraps UniMorphAdapter to get all required verb forms:

from utils.morphology import MorphologyExtractor

from bead.resources.adapters.cache import AdapterCache

# Initialize morphology extractor with cache
cache = AdapterCache()
morph = MorphologyExtractor(cache=cache)

# All required forms: base, 3sg present, past, progressive, past participle
forms = morph.get_all_required_forms("annihilate")

for form in forms:
    print(f"{form.form} - {form.features}")

# Output:
# annihilate - {'pos': 'V', 'tense': 'base'}
# annihilates - {'pos': 'V', 'tense': 'present', 'person': '3', 'number': 'sg'}
# annihilated - {'pos': 'V', 'tense': 'past'}
# annihilating - {'pos': 'V', 'aspect': 'progressive'}
# annihilated - {'pos': 'V', 'aspect': 'perfect'}

See gallery/eng/argument_structure/utils/morphology.py for implementation.

Loading Existing Lexicons

Load previously generated lexicons from JSONL:

from pathlib import Path

from bead.resources.lexicon import Lexicon

# Set up paths
base_dir = Path(".")
lexicons_dir = base_dir / "lexicons"

# Load VerbNet verbs lexicon
verb_lexicon_path = lexicons_dir / "verbnet_verbs.jsonl"
verb_lexicon = Lexicon.from_jsonl(verb_lexicon_path, "verbnet_verbs")

print(f"Loaded {len(verb_lexicon.items)} verb forms")

# Access specific items
for _item_id, item in list(verb_lexicon.items.items())[:3]:
    print(f"{item.lemma}{item.form}")
    print(f"  VerbNet class: {item.features.get('verbnet_class')}")
    print(f"  Thematic roles: {item.features.get('themroles', [])}")

Next Steps

For template generation and filling, see: - Templates module: Generate VerbNet templates and fill with lexicons - Gallery example: Full working script