In this example, we’ll create a dataset to track companies, their founders, and investors, then enrich it with data from various sources.

Step 1: Define Your Schema

First, we need to create a dataset with tables for companies, founders, and investors, plus relationships between them.
from structify import Structify
from structify.types.table import Table, Property
from structify.types.dataset_descriptor import Relationship, RelationshipProperty
from structify.types.property_type import Enum

client = Structify()

# Define tables with strongly typed properties
tables = [
    Table(
        name="company",
        description="a private company that is interested in raising capital",
        properties=[
            Property(name="name", description="The name of the company"),
            Property(name="description", description="What the company does"),
            Property(name="website", description="Company website", prop_type="URL"),
            Property(name="founded_year", description="Year company was founded", prop_type="Integer"),
            Property(name="location", description="Company headquarters location"),
            Property(
                name="industry",
                description="Primary industry",
                prop_type=Enum(Enum=["Technology", "Healthcare", "Finance", "Consumer", "B2B Software", "Other"])
            )
        ]
    ),
    Table(
        name="founder",
        description="person who founded a company",
        properties=[
            Property(name="name", description="Full name of the founder"),
            Property(name="bio", description="Professional background"),
            Property(name="linkedin", description="LinkedIn profile URL", prop_type="URL"),
            Property(name="previous_companies", description="Companies previously founded or worked at")
        ]
    ),
    Table(
        name="investor",
        description="venture capital firm or angel investor",
        properties=[
            Property(name="name", description="Name of the investor or firm"),
            Property(name="type", description="Type of investor", prop_type=Enum(Enum=["VC", "Angel", "Corporate", "PE"])),
            Property(name="portfolio_size", description="Number of portfolio companies", prop_type="Integer"),
            Property(name="website", description="Investor website", prop_type="URL")
        ]
    )
]

# Define relationships with properties
relationships = [
    Relationship(
        name="founded_by",
        description="connects a company to its founders",
        source_table="company",
        target_table="founder",
        properties=[
            RelationshipProperty(name="role", description="Current role at company"),
            RelationshipProperty(name="equity_percentage", description="Ownership percentage", prop_type="Float")
        ]
    ),
    Relationship(
        name="invested_in",
        description="connects an investor to a company they invested in",
        source_table="investor",
        target_table="company",
        properties=[
            RelationshipProperty(name="amount", description="Investment amount", prop_type="Money"),
            RelationshipProperty(name="date", description="Investment date", prop_type="Date"),
            RelationshipProperty(name="round", description="Funding round", prop_type=Enum(Enum=["Seed", "Series A", "Series B", "Series C+"]))
        ]
    )
]

# Create the dataset
client.datasets.create(
    name="startup_ecosystem",
    description="Track startups, their founders, and investors",
    tables=tables,
    relationships=relationships
)

Step 2: Add Initial Entities

Start with some seed companies that we want to track:
from structify.types import KnowledgeGraphParam, EntityParam

# Add some well-known startups
companies = [
    {"name": "OpenAI", "website": "https://openai.com"},
    {"name": "Anthropic", "website": "https://anthropic.com"},
    {"name": "Stripe", "website": "https://stripe.com"}
]

for company in companies:
    client.entities.add(
        dataset="startup_ecosystem",
        kg=KnowledgeGraphParam(
            entities=[
                EntityParam(
                    id=0,
                    type="company",
                    properties=company
                )
            ]
        )
    )

Step 3: Enrich with Web Data

Use Structify’s AI agents to find and extract information about these companies:
from structify.types import SourceWeb, SourceWebWeb

# For each company, enrich with data from their website
for company in client.datasets.view_table(dataset="startup_ecosystem", table="company"):
    # Get company details from their website
    client.structure.enhance_property(
        entity_id=company.id,
        property_name="description"
    )

    client.structure.enhance_property(
        entity_id=company.id,
        property_name="founded_year"
    )

    # Find founders
    client.structure.enhance_relationship(
        entity_id=company.id,
        relationship_name="founded_by"
    )

Step 4: Upload Documents for Processing

If you have pitch decks or reports, you can extract structured data from them:
# Upload a pitch deck
doc = client.documents.upload(
    file_path="acme_corp_pitch_deck.pdf",
    dataset_name="startup_ecosystem"
)

# Extract structured information
job = client.documents.structure(
    document_id=doc.id,
    dataset_name="startup_ecosystem",
    extraction_prompt="""
    Extract:
    - Company name and description
    - Founder names and backgrounds
    - Investor names if mentioned
    - Funding amounts and dates
    """
)

# Check job status
status = client.jobs.get(job_id=job.id)
print(f"Extraction status: {status.status}")

Step 5: Query Your Dataset

Once populated, you can query your dataset in various ways:
# Search for companies
results = client.entities.search(
    dataset_name="startup_ecosystem",
    query="AI companies founded after 2020",
    limit=10
)

# Get a company's network
company_id = results[0].id
subgraph = client.entities.get_local_subgraph(
    entity_id=company_id,
    radius=2  # Get entities within 2 hops
)

print(f"Company: {results[0].properties['name']}")
print(f"Connected entities: {len(subgraph.entities)}")

# Export to CSV for analysis
csv_data = client.datasets.export_to_csv(name="startup_ecosystem")
with open("startups.csv", "wb") as f:
    f.write(csv_data)

Step 6: Set Up Monitoring

Track changes and updates to your dataset:
# Schedule regular enrichment
client.structure.run_async(
    dataset="startup_ecosystem",
    source=SourceWeb(
        web=SourceWebWeb(
            starting_urls=[
                "https://techcrunch.com/",
                "https://venturebeat.com/"
            ]
        )
    ),
    save_requirement=[
        {"table_name": "company", "property_name": "name"}
    ]
)

Complete Code

Here’s the full example in one script:
from structify import Structify
from structify.types import *
from structify.types.table import Table, Property
from structify.types.dataset_descriptor import Relationship, RelationshipProperty
from structify.types.property_type import Enum

def create_startup_tracker():
    client = Structify()

    # Create dataset with schema
    setup_dataset(client)

    # Add seed companies
    add_seed_companies(client)

    # Enrich with web data
    enrich_companies(client)

    # Query and export
    export_results(client)

def setup_dataset(client):
    """Create the dataset schema"""
    # Schema definition here
    pass

def add_seed_companies(client):
    """Add initial companies to track"""
    # Entity creation here
    pass

def enrich_companies(client):
    """Enhance entities with web data"""
    # Enhancement logic here
    pass

def export_results(client):
    """Query and export the dataset"""
    # Export logic here
    pass

if __name__ == "__main__":
    create_startup_tracker()

Next Steps

  • Add more entity types (products, competitors, news articles)
  • Set up scheduled enrichment to keep data fresh
  • Build visualizations of the company network
  • Create alerts for new investments or founder changes