Step 1: Define Your Schema
First, we need to create a dataset with tables for companies, founders, and investors, plus relationships between them.Copy
Ask AI
from structify import Structify
from structify.types.table import Table, Property
from structify.types.dataset_descriptor import Relationship, RelationshipProperty
from structify.types.property_type import Enum
client = Structify()
# Define tables with strongly typed properties
tables = [
Table(
name="company",
description="a private company that is interested in raising capital",
properties=[
Property(name="name", description="The name of the company"),
Property(name="description", description="What the company does"),
Property(name="website", description="Company website", prop_type="URL"),
Property(name="founded_year", description="Year company was founded", prop_type="Integer"),
Property(name="location", description="Company headquarters location"),
Property(
name="industry",
description="Primary industry",
prop_type=Enum(Enum=["Technology", "Healthcare", "Finance", "Consumer", "B2B Software", "Other"])
)
]
),
Table(
name="founder",
description="person who founded a company",
properties=[
Property(name="name", description="Full name of the founder"),
Property(name="bio", description="Professional background"),
Property(name="linkedin", description="LinkedIn profile URL", prop_type="URL"),
Property(name="previous_companies", description="Companies previously founded or worked at")
]
),
Table(
name="investor",
description="venture capital firm or angel investor",
properties=[
Property(name="name", description="Name of the investor or firm"),
Property(name="type", description="Type of investor", prop_type=Enum(Enum=["VC", "Angel", "Corporate", "PE"])),
Property(name="portfolio_size", description="Number of portfolio companies", prop_type="Integer"),
Property(name="website", description="Investor website", prop_type="URL")
]
)
]
# Define relationships with properties
relationships = [
Relationship(
name="founded_by",
description="connects a company to its founders",
source_table="company",
target_table="founder",
properties=[
RelationshipProperty(name="role", description="Current role at company"),
RelationshipProperty(name="equity_percentage", description="Ownership percentage", prop_type="Float")
]
),
Relationship(
name="invested_in",
description="connects an investor to a company they invested in",
source_table="investor",
target_table="company",
properties=[
RelationshipProperty(name="amount", description="Investment amount", prop_type="Money"),
RelationshipProperty(name="date", description="Investment date", prop_type="Date"),
RelationshipProperty(name="round", description="Funding round", prop_type=Enum(Enum=["Seed", "Series A", "Series B", "Series C+"]))
]
)
]
# Create the dataset
client.datasets.create(
name="startup_ecosystem",
description="Track startups, their founders, and investors",
tables=tables,
relationships=relationships
)
Step 2: Add Initial Entities
Start with some seed companies that we want to track:Copy
Ask AI
from structify.types import KnowledgeGraphParam, EntityParam
# Add some well-known startups
companies = [
{"name": "OpenAI", "website": "https://openai.com"},
{"name": "Anthropic", "website": "https://anthropic.com"},
{"name": "Stripe", "website": "https://stripe.com"}
]
for company in companies:
client.entities.add(
dataset="startup_ecosystem",
kg=KnowledgeGraphParam(
entities=[
EntityParam(
id=0,
type="company",
properties=company
)
]
)
)
Step 3: Enrich with Web Data
Use Structify’s AI agents to find and extract information about these companies:Copy
Ask AI
from structify.types import SourceWeb, SourceWebWeb
# For each company, enrich with data from their website
for company in client.datasets.view_table(dataset="startup_ecosystem", table="company"):
# Get company details from their website
client.structure.enhance_property(
entity_id=company.id,
property_name="description"
)
client.structure.enhance_property(
entity_id=company.id,
property_name="founded_year"
)
# Find founders
client.structure.enhance_relationship(
entity_id=company.id,
relationship_name="founded_by"
)
Step 4: Upload Documents for Processing
If you have pitch decks or reports, you can extract structured data from them:Copy
Ask AI
# Upload a pitch deck
doc = client.documents.upload(
file_path="acme_corp_pitch_deck.pdf",
dataset_name="startup_ecosystem"
)
# Extract structured information
job = client.documents.structure(
document_id=doc.id,
dataset_name="startup_ecosystem",
extraction_prompt="""
Extract:
- Company name and description
- Founder names and backgrounds
- Investor names if mentioned
- Funding amounts and dates
"""
)
# Check job status
status = client.jobs.get(job_id=job.id)
print(f"Extraction status: {status.status}")
Step 5: Query Your Dataset
Once populated, you can query your dataset in various ways:Copy
Ask AI
# Search for companies
results = client.entities.search(
dataset_name="startup_ecosystem",
query="AI companies founded after 2020",
limit=10
)
# Get a company's network
company_id = results[0].id
subgraph = client.entities.get_local_subgraph(
entity_id=company_id,
radius=2 # Get entities within 2 hops
)
print(f"Company: {results[0].properties['name']}")
print(f"Connected entities: {len(subgraph.entities)}")
# Export to CSV for analysis
csv_data = client.datasets.export_to_csv(name="startup_ecosystem")
with open("startups.csv", "wb") as f:
f.write(csv_data)
Step 6: Set Up Monitoring
Track changes and updates to your dataset:Copy
Ask AI
# Schedule regular enrichment
client.structure.run_async(
dataset="startup_ecosystem",
source=SourceWeb(
web=SourceWebWeb(
starting_urls=[
"https://techcrunch.com/",
"https://venturebeat.com/"
]
)
),
save_requirement=[
{"table_name": "company", "property_name": "name"}
]
)
Complete Code
Here’s the full example in one script:Copy
Ask AI
from structify import Structify
from structify.types import *
from structify.types.table import Table, Property
from structify.types.dataset_descriptor import Relationship, RelationshipProperty
from structify.types.property_type import Enum
def create_startup_tracker():
client = Structify()
# Create dataset with schema
setup_dataset(client)
# Add seed companies
add_seed_companies(client)
# Enrich with web data
enrich_companies(client)
# Query and export
export_results(client)
def setup_dataset(client):
"""Create the dataset schema"""
# Schema definition here
pass
def add_seed_companies(client):
"""Add initial companies to track"""
# Entity creation here
pass
def enrich_companies(client):
"""Enhance entities with web data"""
# Enhancement logic here
pass
def export_results(client):
"""Query and export the dataset"""
# Export logic here
pass
if __name__ == "__main__":
create_startup_tracker()
Next Steps
- Add more entity types (products, competitors, news articles)
- Set up scheduled enrichment to keep data fresh
- Build visualizations of the company network
- Create alerts for new investments or founder changes