From 1f902e20de0354b400e7f48b3de91854219326eb Mon Sep 17 00:00:00 2001 From: Dan King Date: Thu, 26 Oct 2023 10:55:44 -0400 Subject: [PATCH 1/4] [query] avoid hanging the JVM in Dataproc Non-daemon threads [keep a JVM alive](https://docs.oracle.com/javase/8/docs/api/java/lang/Thread.html): > When a Java Virtual Machine starts up, there is usually a single non-daemon thread (which > typically calls the method named main of some designated class). The Java Virtual Machine > continues to execute threads until either of the following occurs: > > The exit method of class Runtime has been called and the security manager has permitted the exit > operation to take place. > > All threads that are not daemon threads have died, either by returning from the call to the run > method or by throwing an exception that propagates beyond the run method. Spark appears to wait for the JVM to terminate before it considers a job complete. --- .../main/scala/is/hail/backend/BackendServer.scala | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/hail/src/main/scala/is/hail/backend/BackendServer.scala b/hail/src/main/scala/is/hail/backend/BackendServer.scala index d730ca67611..ad6fb5f0a75 100644 --- a/hail/src/main/scala/is/hail/backend/BackendServer.scala +++ b/hail/src/main/scala/is/hail/backend/BackendServer.scala @@ -2,6 +2,7 @@ package is.hail.backend import java.net.InetSocketAddress import java.nio.charset.StandardCharsets +import java.util.concurrent._ import com.sun.net.httpserver.{HttpContext, HttpExchange, HttpHandler, HttpServer} import org.json4s._ @@ -26,12 +27,22 @@ class BackendServer(backend: Backend) { // 0 => let the OS pick an available port private[this] val httpServer = HttpServer.create(new InetSocketAddress(0), 10) private[this] val handler = new BackendHttpHandler(backend) + private[this] val executor = Executors.newFixedThreadPool(1, + new ThreadFactory() { + private[this] val childFactory = Executors.defaultThreadFactory() + + def newThread(r: Runnable): Thread = { + val t = childFactory.newThread(r) + t.setDaemon(true) + t + } + }) def port = httpServer.getAddress.getPort def start(): Unit = { httpServer.createContext("/", handler) - httpServer.setExecutor(null) + httpServer.setExecutor(executor) httpServer.start() } From 94ea90d750a493475b4d77a3237194ef1125292b Mon Sep 17 00:00:00 2001 From: Dan King Date: Thu, 26 Oct 2023 12:46:22 -0400 Subject: [PATCH 2/4] fix changelog --- hail/python/hail/docs/change_log.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hail/python/hail/docs/change_log.md b/hail/python/hail/docs/change_log.md index 70e87e03db8..34d6ad76e5c 100644 --- a/hail/python/hail/docs/change_log.md +++ b/hail/python/hail/docs/change_log.md @@ -54,7 +54,7 @@ critically depend on experimental functionality.** ## Version 0.2.125 -Released 2023-10-25 +Released 2023-10-26 ### New Features @@ -100,6 +100,7 @@ Released 2023-10-25 - (hail#13894) Fix #13837 in which Hail could break a Spark installation if the Hail JAR appears on the classpath before the Scala JARs. +- (hail#13919) Fix #13915 which prevented using a glob pattern in `hl.import_vcf`. ## Version 0.2.124 From 7325edaaf40cf65c90c6bd7fea47deb450c150ae Mon Sep 17 00:00:00 2001 From: Dan King Date: Thu, 26 Oct 2023 13:31:31 -0400 Subject: [PATCH 3/4] [hailtop] avoid errors on rare transient errors `aiohttp.ClientOSError` inherits from `OSError`, so we can just use `errno` or `strerror` directly. We should not directly use the `args` because one of the subclasses of `ClientOSError` sets them to *its* arguments after initializing its super classes with the expected arguments: ```python3 class ClientConnectorError(ClientOSError): """Client connector error. Raised in :class:`aiohttp.connector.TCPConnector` if a connection can not be established. """ def __init__(self, connection_key: ConnectionKey, os_error: OSError) -> None: self._conn_key = connection_key self._os_error = os_error super().__init__(os_error.errno, os_error.strerror) self.args = (connection_key, os_error) ``` I also tried to remove `e.args` from the `ClientPayloadError` case (the one right above this, and the only one still using `e.args`), but neither that class nor any super class sets a field with the error message (in fact, no fields are ever set so we can only use `e.args`). --- hail/python/hailtop/utils/utils.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py index aa5deddcfe5..f161cc988cf 100644 --- a/hail/python/hailtop/utils/utils.py +++ b/hail/python/hailtop/utils/utils.py @@ -638,8 +638,7 @@ def is_transient_error(e: BaseException) -> bool: and e.args[0] == "Response payload is not completed"): return True if (isinstance(e, aiohttp.ClientOSError) - and len(e.args) >= 2 - and 'sslv3 alert bad record mac' in e.args[1]): + and 'sslv3 alert bad record mac' in e.strerror): # aiohttp.client_exceptions.ClientOSError: [Errno 1] [SSL: SSLV3_ALERT_BAD_RECORD_MAC] sslv3 alert bad record mac (_ssl.c:2548) # # This appears to be a symptom of Google rate-limiting as of 2023-10-15 From 14b5d3e14f72667a18d35faf6afa59396520202a Mon Sep 17 00:00:00 2001 From: Dan King Date: Thu, 26 Oct 2023 14:16:13 -0400 Subject: [PATCH 4/4] fix hail_event_loop in test --- hail/python/test/hail/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hail/python/test/hail/conftest.py b/hail/python/test/hail/conftest.py index 1320b74bfef..933b522181c 100644 --- a/hail/python/test/hail/conftest.py +++ b/hail/python/test/hail/conftest.py @@ -9,6 +9,7 @@ from hail import current_backend, init, reset_global_randomness from hail.backend.service_backend import ServiceBackend +from hailtop.hail_event_loop import hail_event_loop from hailtop.utils import secret_alnum_string from .helpers import hl_init_for_test, hl_stop_for_test