-
Notifications
You must be signed in to change notification settings - Fork 20
/
Copy pathlists2webkit.py
159 lines (136 loc) · 5.95 KB
/
lists2webkit.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
import json
import os
from settings import config
from publish2rs import publish2rs
from constants import (
DNT_SECTIONS,
PRE_DNT_SECTIONS,
ENTITYLIST_SECTIONS,
WEBKIT_LISTS_DIR,
WEBKIT_BLOCK_ALL,
WEBKIT_BLOCK_COOKIES
)
from utils import (
get_blocked_domains,
add_domain_to_list,
load_json_from_url
)
def process_and_sort_domains(domains):
"""Sorts and adds domains to the list, returning successfully added domains."""
added_domains = []
previous_domain = None
for domain in sorted(domains):
if add_domain_to_list(domain, domain, previous_domain, None, []):
added_domains.append(domain)
previous_domain = domain
return added_domains
def get_tracker_lists(section):
"""Retrieves and processes tracker lists for a given section."""
blocked_domains = get_blocked_domains(config, section)
processed_domains = process_and_sort_domains(blocked_domains)
print(f"Processing tracker list: {section}")
return {section: processed_domains}
def get_entity_list(section):
"""Retrieves the entity whitelist for a given section."""
entity_data = load_json_from_url(config, section, 'entity_url')
list_name = config.get(section, 'output')
print(f"Processing entity list: {list_name}")
return {
res: {"properties": details.get("properties", [])}
for details in entity_data.get("entities", {}).values()
for res in details.get("resources", [])
}
def build_url_filter(resource):
escaped_resource = resource.replace('.', '\\.')
return f"^https?://([^/]+\\.)?{escaped_resource}"
def find_entity_for_resource(resource, entities):
for key_resource, entity in entities.items():
if key_resource in resource:
return entity
return None
def build_rule(resource, action_type, entities):
"""
Builds a content blocking rule for WebKit based on the given resource, action type, and associated entities.
Content blocking rules in WebKit follow a declarative format.
Each rule consists of a `trigger` defining when the rule activates and an `action` specifying what
happens when it is activated.
- `resource`: The URL to block (used to create a `url-filter`).
- `action_type`: The action type, e.g., "block" or "block-cookies".
- `entities`: A mapping of resources to their associated entities and properties.
The `url-filter` is derived from the `resource`, specifying the URL pattern to match.
If an entity is found for the resource, its properties are used to populate `unless-domain`,
which specifies domains exempted from this rule.
The `load-type` is set to `["third-party"]` to limit the rule to third-party resources.
NOTE: We can support first-party later by including `["first-party"]`.
Example of a WebKit rule:
{
"trigger": {
"url-filter": "evil-tracker\\.js",
"unless-domain": ["trusted.com"]
},
"action": {
"type": "block"
}
}
For more information, see: https://webkit.org/blog/3476/content-blockers-first-look/
"""
url_filter = build_url_filter(resource)
entity = entities.get(resource) or find_entity_for_resource(resource, entities)
unless_domains = [f"*{domain}" for domain in entity["properties"]] if entity and isinstance(entity.get("properties"), list) else []
return {
"action": {"type": action_type},
"trigger": {
"url-filter": url_filter,
"load-type": ["third-party"],
**({"unless-domain": unless_domains} if unless_domains else {})
},
}
def generate_content_blocker_list(resources, action_type, entities):
"""Generates a list of content blocker rules for a category."""
return [build_rule(resource, action_type, entities) for resource in resources]
def write_to_file(content, output_file):
# NOTE: This function mimics the behavior of the Swift implementation.
# We intentionally generate a compact JSON format using separators instead of pretty-printing
# with json.dumps(indent=2/4). The goal is to create files that are small yet still readable.
os.makedirs(os.path.dirname(output_file), exist_ok=True)
with open(output_file, "w") as f:
f.write("[\n")
for i, rule in enumerate(content):
f.write(json.dumps(rule, separators=(',', ':')))
if i < len(content) - 1:
f.write(",\n")
f.write("\n]")
def generate_webkit_lists(domains, action_type, name, entities):
"""Generates and writes WebKit-compatible JSON lists."""
rules = generate_content_blocker_list(domains, action_type, entities)
output_file = f"{WEBKIT_LISTS_DIR}/disconnect-{action_type}-{name}.json"
write_to_file(rules, output_file)
def main():
tracker_lists = {}
entities = {}
# Process each section in the configuration
for name in config.sections():
section = config[name]
ios_include_as = section.get('ios_include_as')
if not ios_include_as:
continue
print(f"Processing section: {name}")
if name in PRE_DNT_SECTIONS or name in DNT_SECTIONS:
tracker_lists.update(get_tracker_lists(name))
if name in ENTITYLIST_SECTIONS:
entities = get_entity_list(name)
for name, domains in tracker_lists.items():
# Generate WebKit block-all lists for all
# sections with `ios_include_as``
ios_include_as = config[name].get('ios_include_as')
generate_webkit_lists(domains, WEBKIT_BLOCK_ALL, ios_include_as, entities)
# Optionally generate block-cookies WebKit lists for all
# sections with `ios_block_cookies`
if config[name].getboolean('ios_block_cookies', False):
generate_webkit_lists(domains, WEBKIT_BLOCK_COOKIES, ios_include_as, entities)
print("All content blocker rules have been generated successfully.")
# Push to remote settings
publish2rs()
return os.EX_OK
if __name__ == "__main__":
main()