Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: introduce support for generating observations for circumvention nettests #48

Draft
wants to merge 5 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions oonidata/models/nettests/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
from .signal import Signal
from .telegram import Telegram
from .tor import Tor
from .psiphon import Psiphon
from .vanilla_tor import VanillaTor
from .web_connectivity import WebConnectivity
from .whatsapp import Whatsapp
from .http_invalid_request_line import HTTPInvalidRequestLine
Expand All @@ -16,6 +18,8 @@
WebConnectivity,
Telegram,
Tor,
Psiphon,
VanillaTor,
DNSCheck,
Signal,
Whatsapp,
Expand All @@ -27,6 +31,8 @@
WebConnectivity,
Telegram,
Tor,
Psiphon,
VanillaTor,
DNSCheck,
Signal,
Whatsapp,
Expand Down
35 changes: 35 additions & 0 deletions oonidata/models/nettests/psiphon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
from dataclasses import dataclass
from typing import List, Optional
from oonidata.compat import add_slots
from oonidata.models.base_model import BaseModel
from oonidata.models.dataformats import (
DNSQuery,
Failure,
HTTPTransaction,
NetworkEvent,
TLSHandshake,
)

from .base_measurement import BaseMeasurement


@add_slots
@dataclass
class PsiphonTestKeys(BaseModel):
failure: Failure = None
max_runtime: Optional[int] = None
bootstrap_time: Optional[int] = None

socksproxy: Optional[str] = None
network_events: Optional[List[NetworkEvent]] = None
tls_handshakes: Optional[List[TLSHandshake]] = None
queries: Optional[List[DNSQuery]] = None
requests: Optional[List[HTTPTransaction]] = None


@add_slots
@dataclass
class Psiphon(BaseMeasurement):
__test_name__ = "psiphon"

test_keys: PsiphonTestKeys
38 changes: 38 additions & 0 deletions oonidata/models/nettests/vanilla_tor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
from dataclasses import dataclass
from typing import List, Optional
from oonidata.compat import add_slots
from oonidata.models.base_model import BaseModel
from oonidata.models.dataformats import (
DNSQuery,
Failure,
HTTPTransaction,
NetworkEvent,
TLSHandshake,
)

from .base_measurement import BaseMeasurement


@add_slots
@dataclass
class VanillaTorTestKeys(BaseModel):
failure: Failure = None
error: Optional[str] = None
success: Optional[bool] = None
bootstrap_time: Optional[int] = None
timeout: Optional[int] = None

tor_logs: Optional[List[str]] = None
tor_progress: Optional[int] = None
tor_progress_tag: Optional[str] = None
tor_progress_summary: Optional[str] = None
tor_version: Optional[str] = None
transport_name: Optional[str] = None


@add_slots
@dataclass
class VanillaTor(BaseMeasurement):
__test_name__ = "vanila_tor"

test_keys: VanillaTorTestKeys
31 changes: 31 additions & 0 deletions oonidata/models/observations.py
Original file line number Diff line number Diff line change
Expand Up @@ -378,3 +378,34 @@ class HTTPMiddleboxObservation(MeasurementMeta):
hfm_diff: Optional[str] = None
hfm_failure: Optional[str] = None
hfm_success: Optional[bool] = None


@add_slots
@dataclass
class CircumventionToolObservation(MeasurementMeta):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

While this looks really good, I think we should invest a bit of time to define this Observation data model such that's flexible enough for all future circumvention tools.

Ideally it would be something that can adapt nicely to new circumvention tools as we put them out and it's probably worth checking with @ainghazal what his thoughts are on the topic.

Some of the considerations to keep in mind are the following:

  • Schema migrations are a pain, so the less we do the better it is
  • It's easier to add new columns, than it is to change an existing column
  • We should factor in schema evolution in such a way where we make it as future proof as possible, but where we anticipate changes, they are done through the addition of new columns, rather than changes to existing ones

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I lack broader context about data design for observations, but in principle, I think I'd go for a generic circumvention observation (with perhaps method family and optional flavor or configuration parameters) rather than a flat observation table that tries to accommodate all of them.

A couple of quick thoughts though:

  1. If schema migrations are painful, wouldn't it be a good idea to spend some effort and try to come up with a solution that automates them? (thinking in the equivalent for django's south). I guess basically we'd need version and a way to convert semantically equivalent data for each field, plus the ability to mark a change as backwards incompatible (NA before a version cut).
  2. One thought I've been entertaining is to draw a "family tree" of circumvention tools (proxy, VPN, onion routing) that captures at least broad aspects of protocols, and then allows to specify changing parameters (for example, Tor over an Obfs4 bridge, with endpoint E, where obfs4 has a version that allows us to compare breaking changes etc). Same for VPN, is_vpn=True but proto=wireguard && transport=tcp && obfuscation=foo.

__table_name__ = "obs_circumvention_tool"
__table_index__ = ("measurement_uid", "observation_id", "measurement_start_time")

observation_id: str = ""
bucket_date: Optional[str] = None
created_at: Optional[datetime] = None

bootstrap_time: Optional[int] = None

# psiphon observation
psiphon_failure: Failure = None
psiphon_max_runtime: Optional[int] = None
psiphon_socksproxy: Optional[str] = None

# vanilla_tor observation
tor_failure: Failure = None
tor_error: Optional[str] = None
tor_success: Optional[bool] = None
tor_timeout: Optional[int] = None

tor_logs: Optional[List[str]] = None
tor_progress: Optional[int] = None
tor_progress_tag: Optional[str] = None
tor_progress_summary: Optional[str] = None
tor_version: Optional[str] = None
tor_transport_name: Optional[str] = None
4 changes: 4 additions & 0 deletions oonidata/transforms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@
from oonidata.transforms.nettests.signal import SignalTransformer
from oonidata.transforms.nettests.telegram import TelegramTransformer
from oonidata.transforms.nettests.tor import TorTransformer
from oonidata.transforms.nettests.psiphon import PsiphonTransformer
from oonidata.transforms.nettests.vanilla_tor import VanillaTorTransformer
from oonidata.transforms.nettests.web_connectivity import WebConnectivityTransformer
from oonidata.transforms.nettests.http_invalid_request_line import (
HTTPInvalidRequestLineTransformer,
Expand All @@ -17,6 +19,8 @@
"signal": SignalTransformer,
"telegram": TelegramTransformer,
"tor": TorTransformer,
"psiphon": PsiphonTransformer,
"vanilla_tor": VanillaTorTransformer,
"http_header_field_manipulation": HTTPHeaderFieldManipulationTransformer,
"http_invalid_request_line": HTTPInvalidRequestLineTransformer,
"web_connectivity": WebConnectivityTransformer,
Expand Down
23 changes: 23 additions & 0 deletions oonidata/transforms/nettests/psiphon.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from typing import List, Tuple
from oonidata.models.nettests import Psiphon
from oonidata.models.observations import WebObservation
from oonidata.transforms.nettests.measurement_transformer import MeasurementTransformer


class PsiphonTransformer(MeasurementTransformer):
def make_observations(self, msmt: Psiphon) -> Tuple[List[WebObservation]]:

dns_observations = self.make_dns_observations(msmt.test_keys.queries)
tls_observations = self.make_tls_observations(
msmt.test_keys.tls_handshakes,
msmt.test_keys.network_events
)
http_observations = self.make_http_observations(msmt.test_keys.requests)

return (
self.consume_web_observations(
dns_observations=dns_observations,
tls_observations=tls_observations,
http_observations=http_observations,
)
)
29 changes: 29 additions & 0 deletions oonidata/transforms/nettests/vanilla_tor.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import dataclasses
from typing import List, Tuple
from oonidata.models.nettests import VanillaTor
from oonidata.models.observations import CircumventionToolObservation
from oonidata.transforms.nettests.measurement_transformer import MeasurementTransformer


class VanillaTorTransformer(MeasurementTransformer):
def make_observations(self, msmt: VanillaTor) -> Tuple[List[CircumventionToolObservation]]:
ct_obs = CircumventionToolObservation(
observation_id=f"{msmt.measurement_uid}_0",
created_at=datetime.utcnow().replace(microsecond=0),
**dataclasses.asdict(self.measurement_meta),
)

ct_obs.bootstrap_time = msmt.test_keys.bootstrap_time,
ct.tor_failure = msmt.test_keys.failure
ct.tor_error = msmt.test_keys.error
ct.tor_success = msmt.test_keys.success
ct.tor_timeout = msmt.test_keys.timeout

ct.tor_logs = msmt.test_keys.tor_logs
ct.tor_progress = msmt.test_keys.tor_progress
ct.tor_progress_tag = msmt.test_keys.tor_progress_tag
ct.tor_progress_summary = msmt.test_keys.tor_progress_summary
ct.tor_version = msmt.test_keys.tor_version
ct.tor_transport_name = msmt.test_keys.transport_name

return ([ct_obs],)