mdn · wingman-jr-addon · Nov 5, 2020 · Nov 5, 2020 · Nov 5, 2020 · Nov 5, 2020
diff --git a/http-response/README.md b/http-response/README.md
@@ -2,10 +2,26 @@
 
 ## What it does
 
-Listens to HTTP Responses from example.com and changes the body of the response as it comes through. So that the word "Example" on https://example.com becomes "WebExtension Example".
+Listens to HTTP Responses from w3.org and changes "Test" to "WebExtension Check" in the web pages' contents. 
 
 ## What it shows
 
-How to use the response parser on bytes.
+A real-world example of WebRequest that shows four important details not always found in beginning examples:
+ - The accumulation of data through multiple calls to `.ondata`.
+ - The decoding of binary data to text in a streaming fashion.
+ - Text decoding that tries to respect the page's reported encoding via Content-Type.
+ - The encoding of replaced data back to `filter.write` in a streaming fashion.
+
+ Note that both correctly detecting the character encoding and performing streaming replacements are deeper subjects
+ than can be fully covered in a small example but that this code provides a starting point for solving these problems
+ in your own solution.
+
+The domain w3.org is included in the list of domains to allow testing against [this suite of standardized tests](https://www.w3.org/2006/11/mwbp-tests/index.xhtml)
+regarding text encoding. Tests #1-8 pass, test #9 currently fails.
+
+For inspiration about how to make the charset detection more robust, see:
+https://github.com/Rob--W/open-in-browser/commit/a6b926ea9522b35298632e5e6a2c89ddb456c5d9
+
+## Credits
 
 Icon is from: https://www.iconfinder.com/icons/763339/draw_edit_editor_pen_pencil_tool_write_icon#size=128
diff --git a/http-response/background.js b/http-response/background.js
@@ -1,22 +1,118 @@
 function listener(details) {
+  // If the HTTP response code is not OK, just let it flow through normally.
+  if (details.statusCode < 200 || 300 <= details.statusCode) {
+    console.log('HTTP Status Code was '+details.statusCode+' not 2XX for '+details.url+', skipping filtering.');
+    return;
+  }
+
+  // The received data is a stream of bytes. In order to do text-based
+  // modifications, it is necessary to decode the bytes into a string
+  // using the proper character encoding, do any modifications, then
+  // encode back into a stream of bytes.
+  //
+  // In order to use the correct decoding, one needs to detect the charset.
+  // Please note that there are many complex rules to detect the charset,
+  // and no approach with scanning only the response headers will be
+  // fully accurate. The simplified approach here is to find the
+  // Content-Type and extract the charset if found.
+
+  let {responseHeaders} = details;
+
+  // Find the last Content-Type header.
+  let contentTypeHeader = responseHeaders
+        .slice().reverse()
+        .find(h => h.name.toLowerCase() == "content-type");
+
+  // If Content-Type header is not set, the browser is going to do content-sniffing,
+  // and we should also return to avoid trouble (e.g. breaking downloads, PDFs, videos, ...).
+  if (contentTypeHeader === undefined) {
+    console.log('Content-Type header not found for '+details.url+', skipping filtering');
+    return;
+  }
+
+  // If it not a supported content type, we will return rather than guess.
+  let baseType;
+  let contentType = contentTypeHeader.value.trim();
+  if(contentType.startsWith('text/html')) {
+    baseType = 'text/html';
+  } else if (contentType.startsWith('application/xhtml+xml')) {
+    baseType = 'application/xhtml+xml';
+  } else {
+    console.log('Content type '+contentType+' not supported for '+details.url+', skipping filtering.');
+    return;
+  }
+
+  // Set up TextDecoder
+  console.log('Initial checks passed, beginning charset detection for '+details.url);
+  let charset = detectCharset(contentType) || 'utf-8';
+  let decoder = new TextDecoder(charset);
+  console.log('The detected charset was '+charset+' for '+details.url);
+
+  // While TextDecoder supports most charset encodings, TextEncoder does NOT support
+  // other than 'utf-8', so it is necessary to change the Content-Type on the header
+  // to UTF-8. If modifying this block of code, ensure that the tests at
+  // https://www.w3.org/2006/11/mwbp-tests/index.xhtml
+  // pass - current implementation only fails on #9 but this detection ensures
+  // tests #3, 4, 5, and 8 pass.
+  let encoder = new TextEncoder(); 
+  contentTypeHeader.value = baseType+';charset=utf-8';
+
+
+  // Now the actual filtering can begin!
   let filter = browser.webRequest.filterResponseData(details.requestId);
-  let decoder = new TextDecoder("utf-8");
-  let encoder = new TextEncoder();
-
-  filter.ondata = event => {
-    let str = decoder.decode(event.data, {stream: true});
-    // Just change any instance of Example in the HTTP response
-    // to WebExtension Example.
-    str = str.replace(/Example/g, 'WebExtension Example');
-    filter.write(encoder.encode(str));
-    filter.disconnect();
+  let unprocessedStr = '';
+  let searchString = 'Test';
+  let leaveUnprocessedLength = searchString.length - 1;
+
+  filter.ondata = e => {
+    // Note that the event's data may break in the middle of an encoded
+    // character - the stream parameter is critical for success as this
+    // method gets called multiple times.
+    unprocessedStr += decoder.decode(e.data, {stream: true});
+    // Process the received data as far as possible.
+    // Note this replacement is rather naive but demonstrates the idea
+    // If the search string was contained in the replacement string, 
+    // for instance, the repeated replacement like this could be bad.
+    unprocessedStr = unprocessedStr.replace(/Test/g, 'WebExtension Check');
+    if(unprocessedStr.length > leaveUnprocessedLength) {
+      let processedStr = unprocessedStr.substr(0, leaveUnprocessedLength);
+      unprocessedStr = unprocessedStr.substr(leaveUnprocessedLength);
+      filter.write(encoder.encode(processedStr));
+    }
+  }
+
+  filter.onstop = async _ => {
+    // Flush the decoding buffer
+    unprocessedStr += decoder.decode();
+    // Flush our replacement buffer
+    let processedStr = unprocessedStr.replace(/Test/g, 'WebExtension Check');
+    filter.write(encoder.encode(processedStr));
+    filter.close();
   }
 
-  return {};
+  // Because details response headers have been mutated, return them
+  return details;
+}
+
+// This code tries to snag the last charset indicated
+// but is still not robust to poorly formed inputs.
+function detectCharset(contentType) {
+  let charsetMarker = "charset=";
+  let foundIndex = contentType.lastIndexOf(charsetMarker);
+  if (foundIndex == -1) {
+      return undefined;
+  }
+  let charsetMaybeQuoted = contentType.substr(foundIndex+charsetMarker.length).trim().toLowerCase();
+  let charset = charsetMaybeQuoted.replace(/"/g, '');
+  return charset;
 }
 
-browser.webRequest.onBeforeRequest.addListener(
+// Set up the actual webRequest hook
+browser.webRequest.onHeadersReceived.addListener(
   listener,
-  {urls: ["https://example.com/*"], types: ["main_frame"]},
-  ["blocking"]
-);
+  {
+    urls: ["https://www.w3.org/*"], // Include W3 for testing charset detection.
+    types: ["main_frame"]
+  },
+  ["blocking","responseHeaders"]
+);
diff --git a/http-response/manifest.json b/http-response/manifest.json
@@ -10,7 +10,7 @@
   },
 
   "permissions": [
-    "webRequest", "webRequestBlocking", "https://example.com/*"
+    "webRequest", "webRequestBlocking", "https://www.w3.org/*"
   ],
 
   "background": {