Company Intelligence

In this example, we’ll create a dataset to track companies, their founders, and investors, then enrich it with data from various sources.

Step 1: Define Your Schema

First, we need to create a dataset with tables for companies, founders, and investors, plus relationships between them.

from structify import Structify
from structify.types.table import Table, Property
from structify.types.dataset_descriptor import Relationship, RelationshipProperty
from structify.types.property_type import Enum

client = Structify()

# Define tables with strongly typed properties
tables = [
    Table(
        name="company",
        description="a private company that is interested in raising capital",
        properties=[
            Property(name="name", description="The name of the company"),
            Property(name="description", description="What the company does"),
            Property(name="website", description="Company website", prop_type="URL"),
            Property(name="founded_year", description="Year company was founded", prop_type="Integer"),
            Property(name="location", description="Company headquarters location"),
            Property(
                name="industry",
                description="Primary industry",
                prop_type=Enum(Enum=["Technology", "Healthcare", "Finance", "Consumer", "B2B Software", "Other"])
            )
        ]
    ),
    Table(
        name="founder",
        description="person who founded a company",
        properties=[
            Property(name="name", description="Full name of the founder"),
            Property(name="bio", description="Professional background"),
            Property(name="linkedin", description="LinkedIn profile URL", prop_type="URL"),
            Property(name="previous_companies", description="Companies previously founded or worked at")
        ]
    ),
    Table(
        name="investor",
        description="venture capital firm or angel investor",
        properties=[
            Property(name="name", description="Name of the investor or firm"),
            Property(name="type", description="Type of investor", prop_type=Enum(Enum=["VC", "Angel", "Corporate", "PE"])),
            Property(name="portfolio_size", description="Number of portfolio companies", prop_type="Integer"),
            Property(name="website", description="Investor website", prop_type="URL")
        ]
    )
]

# Define relationships with properties
relationships = [
    Relationship(
        name="founded_by",
        description="connects a company to its founders",
        source_table="company",
        target_table="founder",
        properties=[
            RelationshipProperty(name="role", description="Current role at company"),
            RelationshipProperty(name="equity_percentage", description="Ownership percentage", prop_type="Float")
        ]
    ),
    Relationship(
        name="invested_in",
        description="connects an investor to a company they invested in",
        source_table="investor",
        target_table="company",
        properties=[
            RelationshipProperty(name="amount", description="Investment amount", prop_type="Money"),
            RelationshipProperty(name="date", description="Investment date", prop_type="Date"),
            RelationshipProperty(name="round", description="Funding round", prop_type=Enum(Enum=["Seed", "Series A", "Series B", "Series C+"]))
        ]
    )
]

# Create the dataset
client.datasets.create(
    name="startup_ecosystem",
    description="Track startups, their founders, and investors",
    tables=tables,
    relationships=relationships
)

Step 2: Add Initial Entities

Start with some seed companies that we want to track:

from structify.types import KnowledgeGraphParam, EntityParam

# Add some well-known startups
companies = [
    {"name": "OpenAI", "website": "https://openai.com"},
    {"name": "Anthropic", "website": "https://anthropic.com"},
    {"name": "Stripe", "website": "https://stripe.com"}
]

for company in companies:
    client.entities.add(
        dataset="startup_ecosystem",
        kg=KnowledgeGraphParam(
            entities=[
                EntityParam(
                    id=0,
                    type="company",
                    properties=company
                )
            ]
        )
    )

Step 3: Enrich with Web Data

Use Structify’s AI agents to find and extract information about these companies:

from structify.types import SourceWeb, SourceWebWeb

# For each company, enrich with data from their website
for company in client.datasets.view_table(dataset="startup_ecosystem", table="company"):
    # Get company details from their website
    client.structure.enhance_property(
        entity_id=company.id,
        property_name="description"
    )

    client.structure.enhance_property(
        entity_id=company.id,
        property_name="founded_year"
    )

    # Find founders
    client.structure.enhance_relationship(
        entity_id=company.id,
        relationship_name="founded_by"
    )

Step 4: Upload Documents for Processing

If you have pitch decks or reports, you can extract structured data from them:

# Upload a pitch deck
doc = client.documents.upload(
    file_path="acme_corp_pitch_deck.pdf",
    dataset_name="startup_ecosystem"
)

# Extract structured information
job = client.documents.structure(
    document_id=doc.id,
    dataset_name="startup_ecosystem",
    extraction_prompt="""
    Extract:
    - Company name and description
    - Founder names and backgrounds
    - Investor names if mentioned
    - Funding amounts and dates
    """
)

# Check job status
status = client.jobs.get(job_id=job.id)
print(f"Extraction status: {status.status}")

Step 5: Query Your Dataset

Once populated, you can query your dataset in various ways:

# Search for companies
results = client.entities.search(
    dataset_name="startup_ecosystem",
    query="AI companies founded after 2020",
    limit=10
)

# Get a company's network
company_id = results[0].id
subgraph = client.entities.get_local_subgraph(
    entity_id=company_id,
    radius=2  # Get entities within 2 hops
)

print(f"Company: {results[0].properties['name']}")
print(f"Connected entities: {len(subgraph.entities)}")

# Export to CSV for analysis
csv_data = client.datasets.export_to_csv(name="startup_ecosystem")
with open("startups.csv", "wb") as f:
    f.write(csv_data)

Step 6: Set Up Monitoring

Track changes and updates to your dataset:

# Schedule regular enrichment
client.structure.run_async(
    dataset="startup_ecosystem",
    source=SourceWeb(
        web=SourceWebWeb(
            starting_urls=[
                "https://techcrunch.com/",
                "https://venturebeat.com/"
            ]
        )
    ),
    save_requirement=[
        {"table_name": "company", "property_name": "name"}
    ]
)

Complete Code

Here’s the full example in one script:

from structify import Structify
from structify.types import *
from structify.types.table import Table, Property
from structify.types.dataset_descriptor import Relationship, RelationshipProperty
from structify.types.property_type import Enum

def create_startup_tracker():
    client = Structify()

    # Create dataset with schema
    setup_dataset(client)

    # Add seed companies
    add_seed_companies(client)

    # Enrich with web data
    enrich_companies(client)

    # Query and export
    export_results(client)

def setup_dataset(client):
    """Create the dataset schema"""
    # Schema definition here
    pass

def add_seed_companies(client):
    """Add initial companies to track"""
    # Entity creation here
    pass

def enrich_companies(client):
    """Enhance entities with web data"""
    # Enhancement logic here
    pass

def export_results(client):
    """Query and export the dataset"""
    # Export logic here
    pass

if __name__ == "__main__":
    create_startup_tracker()

Next Steps

Add more entity types (products, competitors, news articles)
Set up scheduled enrichment to keep data fresh
Build visualizations of the company network
Create alerts for new investments or founder changes

Get Started

Tips and Tricks

Connectors

Deployments

Core Concepts

Schema Cookbook

Examples

Company Intelligence

Step 1: Define Your Schema

Step 2: Add Initial Entities

Step 3: Enrich with Web Data

Step 4: Upload Documents for Processing

Step 5: Query Your Dataset

Step 6: Set Up Monitoring

Complete Code

Next Steps

Get Started

Tips and Tricks

Connectors

Deployments

Core Concepts

Schema Cookbook

Examples

​Step 1: Define Your Schema

​Step 2: Add Initial Entities

​Step 3: Enrich with Web Data

​Step 4: Upload Documents for Processing

​Step 5: Query Your Dataset

​Step 6: Set Up Monitoring

​Complete Code

​Next Steps

Step 1: Define Your Schema

Step 2: Add Initial Entities

Step 3: Enrich with Web Data

Step 4: Upload Documents for Processing

Step 5: Query Your Dataset

Step 6: Set Up Monitoring

Complete Code

Next Steps