Skip to content

Commit

Permalink
Try refactoring
Browse files Browse the repository at this point in the history
  • Loading branch information
mfisherlevine committed Nov 25, 2024
1 parent 79e787e commit 61d5af2
Showing 1 changed file with 85 additions and 12 deletions.
97 changes: 85 additions & 12 deletions python/lsst/summit/extras/soarScraping.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,14 +20,20 @@
# along with this program. If not, see <https://www.gnu.org/licenses/>.

import logging
import os
import re
import shutil
import tempfile
import time
import traceback
from dataclasses import dataclass
from datetime import datetime

import easyocr
import numpy as np
import pandas as pd
import requests
import tables # noqa: F401 required for HDFStore append mode
from packaging import version

# Check Pillow version to determine the correct resampling filter
Expand Down Expand Up @@ -66,12 +72,52 @@ def __repr__(self):
)


class SoarScraper:
class SoarSeeingMonitor:

def getSeeingAtTime(self, time):
raise NotImplementedError

def getSeeingForDataId(self, dataId):
raise NotImplementedError


class SoarDatabaseBuiler:
STORE_FILE = "seeing_conditions.h5"
ERROR_FILE = "seeing_errors.log"
FAILED_FILES_DIR = "failed_files"

def __init__(self):
logging.getLogger("easyocr").setLevel(logging.ERROR)
self.reader = easyocr.Reader(["en"])

# Ensure the HDFStore file exists
if not os.path.exists(self.STORE_FILE):
with pd.HDFStore(self.STORE_FILE, mode="w") as store: # noqa: F841
pass # Create an empty store

def getCurrentSeeingFromWebsite(self):
with tempfile.NamedTemporaryFile() as temp_file:
with requests.get(SOAR_IMAGE_URL, stream=True) as response:
response.raise_for_status()
for chunk in response.iter_content(chunk_size=8192):
temp_file.write(chunk)

temp_file.flush()
temp_file.seek(0)

try:
seeingConditions = self.getSeeingConditionsFromFile(temp_file.name)
return seeingConditions
except Exception:
with open(self.ERROR_FILE, "a") as f:
f.write(f"Exception at {datetime.now()}:\n")
traceback.print_exc(file=f)
# copy file to error directory
filename = datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
shutil.copy(temp_file.name, os.path.join(self.FAILED_FILES_DIR, filename + ".png"))

return None

@staticmethod
def adjust_coords(coords, image_height):
(x0, y0), (x1, y1) = coords
Expand Down Expand Up @@ -103,17 +149,44 @@ def getSeeingConditionsFromFile(self, filename):

return SeeingConditions(date, seeing, freeAtmSeeing, groundLayer)

def getCurrentSeeing(self):
with tempfile.NamedTemporaryFile() as temp_file:
with requests.get(SOAR_IMAGE_URL, stream=True) as response:
response.raise_for_status()
for chunk in response.iter_content(chunk_size=8192):
temp_file.write(chunk)

# Call the parsing method within the same context
seeing_conditions = self.getSeeingConditionsFromFile(temp_file.name)

return seeing_conditions
def get_last_timestamp(self):
"""Retrieve the last timestamp from the HDFStore."""
with pd.HDFStore(self.STORE_FILE, mode="r") as store:
if "/data" in store.keys():
last_row = store.select("data", start=-1)
if not last_row.empty:
return last_row.index[-1]
return None

def run(self):
last_timestamp = self.get_last_timestamp()
while True:
# Fetch the current seeing conditions
seeing = self.getCurrentSeeingFromWebsite()
if seeing is None:
time.sleep(30)
continue
new_timestamp = seeing.timestamp

# Check if the new timestamp is newer than the last recorded one
if last_timestamp is None or new_timestamp > last_timestamp:
# Create a DataFrame for the new data
new_data = pd.DataFrame(
{
"seeing": [seeing.seeing],
"freeAtmSeeing": [seeing.freeAtmSeeing],
"groundLayer": [seeing.groundLayer],
},
index=[new_timestamp],
)

# Append the new data to the HDFStore
with pd.HDFStore(self.STORE_FILE, mode="a") as store:
store.append("data", new_data, format="table", data_columns=True)

last_timestamp = new_timestamp

time.sleep(30)

def _getDateTime(self, dateImage):
dateImage_np = np.array(dateImage)
Expand Down

0 comments on commit 61d5af2

Please sign in to comment.