Skip to content

Commit

Permalink
checkpoint first development for #50
Browse files Browse the repository at this point in the history
  • Loading branch information
pekasen committed Apr 24, 2023
1 parent 1ffffd0 commit a1458e7
Show file tree
Hide file tree
Showing 4 changed files with 220 additions and 95 deletions.
8 changes: 6 additions & 2 deletions ponyexpress/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,14 @@ def cli(ctx):

@cli.command()
@click.argument("config", type=click.Path(path_type=Path, exists=True))
@click.option("--reuse/--create",
default=False,
help="Create a new job or reuse an existing one. [REUSE]"
)
@click.pass_context
def start(ctx: click.Context, config: Path):
def start(ctx: click.Context, config: Path, reuse: bool):
"""start a job"""
ctx.obj.start(config)
ctx.obj.start(config, reuse=reuse)


@cli.command()
Expand Down
22 changes: 16 additions & 6 deletions ponyexpress/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
It keeps track of the crawled data, the configuration and the current state of the crawler.
"""
import datetime
import uuid
from typing import Any, Callable, Dict, List, Tuple, Type

import sqlalchemy as sql
Expand Down Expand Up @@ -39,13 +40,15 @@ class AppMetaData(Base):
id: orm.Mapped[str] = orm.mapped_column(primary_key=True, index=True)
version: orm.Mapped[int] = orm.mapped_column()
iteration: orm.Mapped[int] = orm.mapped_column()
created_at: orm.Mapped[datetime.datetime] = orm.mapped_column()


class SeedList(Base):
"""Table of seed nodes for each iteration."""

__tablename__ = "seed_list"

job_id: orm.Mapped[str] = orm.mapped_column(primary_key=True, index=True)
id: orm.Mapped[str] = orm.mapped_column(primary_key=True, index=True)
status: orm.Mapped[str] = orm.mapped_column()
iteration: orm.Mapped[int] = orm.mapped_column()
Expand All @@ -57,7 +60,8 @@ class TaskList(Base):

__tablename__ = "task_list"

id: orm.Mapped[int] = orm.mapped_column(primary_key=True, autoincrement=True)
job_id: orm.Mapped[str] = orm.mapped_column(primary_key=True, index=True, unique=False)
id: orm.Mapped[str] = orm.mapped_column(primary_key=True, default=lambda: str(uuid.uuid4()))
node_id: orm.Mapped[str] = orm.mapped_column()
status: orm.Mapped[str] = orm.mapped_column()
connector: orm.Mapped[str] = orm.mapped_column()
Expand All @@ -76,7 +80,7 @@ def _(data: Dict[str, Any]) -> Type[Any]:
**{
key: data.get(key)
for key in [column.name for column in spec_fixed]
+ list(spec_variadic.keys())
+ list(spec_variadic.keys()) + ["job_id"]
}
)

Expand All @@ -96,7 +100,10 @@ def create_raw_edge_table(
table: the table
"""
spec_fixed = [
sql.Column("id", sql.Integer, primary_key=True, index=True, autoincrement=True),
sql.Column("job_id", sql.Text, primary_key=True, index=True),
sql.Column("id", sql.Text, primary_key=True, index=True, default=lambda: str(uuid.uuid4(

))),
sql.Column("source", sql.Text, index=True, unique=False),
sql.Column("target", sql.Text, index=True, unique=False),
sql.Column("iteration", sql.Integer, index=True, unique=False),
Expand Down Expand Up @@ -138,11 +145,13 @@ def create_aggregated_edge_table(
table: the table
"""
spec_fixed = [
sql.Column("job_id", sql.Text, primary_key=True, index=True),
sql.Column("source", sql.Text, primary_key=True, index=True),
sql.Column("target", sql.Text, primary_key=True, index=True),
sql.Column(
"iteration", sql.Integer, primary_key=True, index=True, unique=False
),
sql.Column("is_dense", sql.Boolean, primary_key=True, index=True, unique=False),
# sql.Column(
# "iteration", sql.Integer, primary_key=True, index=True, unique=False
# ),
sql.Column("weight", sql.Integer),
]

Expand Down Expand Up @@ -179,6 +188,7 @@ def create_node_table(
table: the table
"""
spec_fixed = [
sql.Column("job_id", sql.Text, primary_key=True, index=True),
sql.Column("name", sql.Text, primary_key=True, index=True),
sql.Column("iteration", sql.Integer, index=True, unique=False),
]
Expand Down
Loading

0 comments on commit a1458e7

Please sign in to comment.