-
Notifications
You must be signed in to change notification settings - Fork 2
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
24 changed files
with
1,721 additions
and
421 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,3 @@ | ||
*venv* | ||
*.ipynb_checkpoints* | ||
.vscode | ||
|
||
node_modules | ||
test-results | ||
.vscode |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
# Use Case Datasets | ||
|
||
This directory includes datasets for use on the "use cases" landing pages for mathesar.org. | ||
|
||
## Loading into Mathesar | ||
|
||
Each dataset has a `schema.sql` and `generated_data.sql` file which can be loaded into Mathesar. Each `schema.sql` file will drop an existing schema with the same name and create a new one. | ||
|
||
Here's an example of loading these into a locally-running Mathesar instance. | ||
|
||
```shell | ||
# (Optional) Generate the data | ||
python {use_case_name}/generate_data.py | ||
|
||
# First load the schema and tables | ||
docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < {use_case_name}/schema.sql | ||
# Then the sample data | ||
docker exec -i mathesar_dev_db bash -c 'psql -U mathesar' < {use_case_name}/generated_data.sql | ||
``` | ||
|
||
## Philosophy | ||
|
||
These datasets use a mix of "low fidelity" faker data and more domain-specific hardcoded strings to create fake, but plausible, datasets for various Mathesar use cases. | ||
|
||
Timestamp columns that would be used for auditing, soft deletes, and so on have been omitted to reduce clutter. | ||
|
||
Column IDs are always `BIGINT PRIMARY KEY GENERATED ALWAYS AS IDENTITY`. | ||
|
||
## Development | ||
|
||
The only requirement is to install dependencies with `pip install -r requirements.txt`. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
# Bike Shop sample data | ||
|
||
This sample dataset represents a bicycle shop managing their customer service requests. | ||
|
||
```mermaid | ||
%% https://mermaid.js.org/syntax/entityRelationshipDiagram.html | ||
erDiagram | ||
Customers { | ||
BIGINT id PK | ||
TEXT first_name "NOT NULL" | ||
TEXT last_name "NOT NULL" | ||
TEXT email | ||
TEXT phone | ||
} | ||
EquipmentTypes { | ||
BIGINT id PK | ||
TEXT name "NOT NULL" | ||
} | ||
Equipment { | ||
BIGINT id PK | ||
TEXT serial_number "NOT NULL UNIQUE" | ||
TEXT notes | ||
BIGINT type_id FK | ||
} | ||
Mechanics { | ||
BIGINT id PK | ||
TEXT first_name "NOT NULL" | ||
TEXT last_name "NOT NULL" | ||
} | ||
ServiceStatuses { | ||
BIGINT id PK | ||
TEXT name "NOT NULL UNIQUE" | ||
} | ||
ServiceRequests { | ||
BIGINT id PK | ||
BIGINT customer_id FK | ||
BIGINT equipment_id FK | ||
BIGINT mechanic_id FK | ||
TEXT request_description "NOT NULL" | ||
NUMERIC_10_2 cost | ||
TIMESTAMP time_in | ||
TIMESTAMP time_out | ||
} | ||
ServiceMilestones { | ||
BIGINT id PK | ||
BIGINT service_request_id FK | ||
BIGINT status_id FK | ||
TIMESTAMP update_time "DEFAULT NOW()" | ||
TEXT notes | ||
} | ||
Equipment ||--|| EquipmentTypes : "type_id" | ||
ServiceRequests ||--|| Customers : "customer_id" | ||
ServiceRequests ||--|| Equipment : "equipment_id" | ||
ServiceRequests ||--|| Mechanics : "mechanic_id" | ||
ServiceMilestones ||--|| ServiceRequests : "service_request_id" | ||
ServiceMilestones ||--|| ServiceStatuses : "status_id" | ||
``` |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,205 @@ | ||
import os | ||
import random | ||
from datetime import timedelta, datetime | ||
from faker import Faker | ||
|
||
fake = Faker() | ||
|
||
# Helper functions | ||
def clean_value(value): | ||
"""Clean a value for SQL COPY operations.""" | ||
if value is None: | ||
return r"\N" | ||
if isinstance(value, str): | ||
return value.replace("\t", " ").replace("\n", " ") | ||
return str(value) | ||
|
||
def write_to_sql_file(output_path, search_path, tables): | ||
"""Write the generated data to an SQL file.""" | ||
with open(output_path, "w") as f: | ||
f.write(f'SET search_path="{search_path}";\n\n') | ||
for table_name, generator in tables.items(): | ||
f.write(f'COPY "{table_name}" FROM stdin;\n') | ||
for row in generator: | ||
cleaned_row = "\t".join(map(clean_value, row)) | ||
f.write(f"{cleaned_row}\n") | ||
f.write("\\.\n\n") | ||
print(f"SQL file generated: {output_path}") | ||
|
||
def get_output_file_path(filename): | ||
"""Get the output file path relative to the current script's directory.""" | ||
current_file_dir = os.path.dirname(os.path.abspath(__file__)) | ||
return os.path.join(current_file_dir, filename) | ||
|
||
# Constants | ||
NUM_CUSTOMERS = 20 | ||
NUM_MECHANICS = 5 | ||
NUM_EQUIPMENT_TYPES = 8 | ||
NUM_EQUIPMENT = 50 | ||
NUM_SERVICE_REQUESTS = 30 | ||
NUM_SERVICE_MILESTONES = 100 | ||
|
||
EQUIPMENT_TYPES = [ | ||
"Mountain Bike", | ||
"Road Bike", | ||
"Hybrid Bike", | ||
"Electric Bike", | ||
"BMX Bike", | ||
"Cyclocross Bike", | ||
"Folding Bike", | ||
"Touring Bike" | ||
] | ||
|
||
PARTS_AND_NOTES = { | ||
"Frame": [ | ||
"Small dent on the top tube identified during inspection.", | ||
"Frame cleaned and polished; customer commented on how shiny it looked.", | ||
"Noticed a crack near the bottom bracket; recommended a replacement.", | ||
], | ||
"Wheels": [ | ||
"Bent rear rim; trued the wheel successfully.", | ||
"Replaced a broken spoke on the front wheel.", | ||
"Customer pleased with how smooth the wheels now spin.", | ||
], | ||
"Tires": [ | ||
"Replaced a worn-out rear tire; customer opted for puncture-resistant model.", | ||
"Front tire inflated; slow leak detected and patched.", | ||
"Customer appreciated advice on tire pressure for road biking.", | ||
], | ||
"Brakes": [ | ||
"Adjusted brake pads for better stopping power.", | ||
"Rear brake cable frayed; replaced with a new one.", | ||
"Customer remarked how responsive the brakes feel now.", | ||
], | ||
"Gears": [ | ||
"Shifted gears sticking; replaced derailleur hanger.", | ||
"Customer reported skipping gears; adjusted indexing.", | ||
"Lubricated drivetrain; customer noticed quieter pedaling.", | ||
], | ||
"Handlebars": [ | ||
"Re-wrapped handlebar tape; customer loved the color choice.", | ||
"Handlebar alignment corrected; was slightly off-center.", | ||
"Installed new ergonomic grips; customer was excited about the comfort.", | ||
], | ||
"Pedals": [ | ||
"Left pedal bearings replaced due to grinding noise.", | ||
"Upgraded pedals to a clipless system; customer very happy.", | ||
"Mechanic noticed loose threads on right pedal spindle; tightened securely.", | ||
], | ||
"Seat": [ | ||
"Seatpost adjusted for proper height; customer reported better comfort.", | ||
"Replaced torn saddle with a new gel-padded seat.", | ||
"Customer commented that the saddle now feels like new.", | ||
], | ||
} | ||
|
||
REQUEST_DESCRIPTIONS = [ | ||
"Bike makes a clicking noise while pedaling.", | ||
"Brakes feel soft and don't stop effectively.", | ||
"Gears are not shifting smoothly.", | ||
"Rear wheel wobbles; possible rim issue.", | ||
"Flat tire; needs replacement or repair.", | ||
"Customer wants a full tune-up before a race.", | ||
"Looking to upgrade to tubeless tires.", | ||
"Front fork feels stiff; possible suspension issue.", | ||
"Customer complained about an uncomfortable saddle.", | ||
"Handlebars feel loose and need adjustment.", | ||
"Chain keeps falling off during rides.", | ||
"Rear derailleur seems bent after a crash.", | ||
"Customer wants clipless pedals installed.", | ||
"Headset creaks when turning the handlebars.", | ||
"Electric bike battery isn't holding charge.", | ||
"Customer wants help installing accessories (e.g., lights, rack).", | ||
"Bike feels heavy and sluggish; might need a drivetrain cleaning.", | ||
"Suspension setup needs adjusting for rider weight.", | ||
"Customer reported squeaky brakes after riding in wet conditions.", | ||
"Child seat needs to be installed securely on the frame.", | ||
] | ||
|
||
SERVICE_STATUSES = ["Received", "In Progress", "Awaiting Part", "Completed"] | ||
|
||
def generate_customers(): | ||
for i in range(1, NUM_CUSTOMERS + 1): | ||
yield [ | ||
i, | ||
fake.first_name(), | ||
fake.last_name(), | ||
fake.email(), | ||
fake.phone_number() | ||
] | ||
|
||
def generate_equipment_types(): | ||
for i, name in enumerate(EQUIPMENT_TYPES, start=1): | ||
yield [i, name] | ||
|
||
def generate_mechanics(): | ||
for i in range(1, NUM_MECHANICS + 1): | ||
yield [ | ||
i, | ||
fake.first_name(), | ||
fake.last_name() | ||
] | ||
|
||
def generate_service_statuses(): | ||
for i, name in enumerate(SERVICE_STATUSES, start=1): | ||
yield [i, name] | ||
|
||
def generate_equipment(equipment_type_ids): | ||
for i in range(1, NUM_EQUIPMENT + 1): | ||
yield [ | ||
i, | ||
random.choice(equipment_type_ids), # Valid type_id | ||
fake.unique.ean13(), # serial number | ||
"" | ||
] | ||
|
||
def generate_service_requests(customer_ids, equipment_ids, mechanic_ids): | ||
for i in range(1, NUM_SERVICE_REQUESTS + 1): | ||
yield [ | ||
i, | ||
random.choice(customer_ids), # Valid customer_id | ||
random.choice(equipment_ids), # Valid equipment_id | ||
random.choice(mechanic_ids), # Valid mechanic_id | ||
random.choice(REQUEST_DESCRIPTIONS), # Realistic request description | ||
round(random.uniform(20, 500), 2), | ||
fake.date_time_this_year(), | ||
fake.date_time_this_year() if random.random() < 0.5 else None | ||
] | ||
|
||
def generate_service_milestones(service_request_ids, status_ids): | ||
for i in range(1, NUM_SERVICE_MILESTONES + 1): | ||
part, notes = random.choice(list(PARTS_AND_NOTES.items())) | ||
yield [ | ||
i, | ||
random.choice(service_request_ids), # Valid service_request_id | ||
random.choice(status_ids), # Valid status_id | ||
fake.date_time_this_year(), | ||
f"{part}: {random.choice(notes)}" # Realistic service note | ||
] | ||
|
||
if __name__ == "__main__": | ||
# Generate valid IDs based on schema | ||
customer_ids = list(range(1, NUM_CUSTOMERS + 1)) | ||
equipment_type_ids = list(range(1, NUM_EQUIPMENT_TYPES + 1)) | ||
equipment_ids = list(range(1, NUM_EQUIPMENT + 1)) | ||
mechanic_ids = list(range(1, NUM_MECHANICS + 1)) | ||
service_request_ids = list(range(1, NUM_SERVICE_REQUESTS + 1)) | ||
status_ids = list(range(1, len(SERVICE_STATUSES) + 1)) | ||
|
||
# Generate tables | ||
equipment = list(generate_equipment(equipment_type_ids)) | ||
service_requests = list(generate_service_requests(customer_ids, equipment_ids, mechanic_ids)) | ||
service_milestones = list(generate_service_milestones(service_request_ids, status_ids)) | ||
|
||
tables = { | ||
"Customers": generate_customers(), | ||
"Equipment Types": generate_equipment_types(), | ||
"Equipment": iter(equipment), # Pre-generated equipment | ||
"Mechanics": generate_mechanics(), | ||
"Service Statuses": generate_service_statuses(), | ||
"Service Requests": iter(service_requests), # Pre-generated service requests | ||
"Service Milestones": iter(service_milestones), # Pre-generated milestones | ||
} | ||
|
||
sql_file = get_output_file_path("generated_data.sql") | ||
write_to_sql_file(sql_file, "Bike Shop", tables) |
Oops, something went wrong.