Skip to content

Commit

Permalink
make xray gather script easier to use
Browse files Browse the repository at this point in the history
  • Loading branch information
dalekreitler-bnl committed Apr 10, 2024
1 parent 5942c46 commit 2ab4b80
Showing 1 changed file with 21 additions and 4 deletions.
25 changes: 21 additions & 4 deletions gather_xray_data.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/usr/bin/env python3
#!/nsls2/conda/envs/2023-1.1-py39/bin/python3
# -*- coding: utf-8 -*-
"""
Created on Thu Aug 17 09:00:15 2023
Expand All @@ -9,9 +9,23 @@
import os
import pandas
import subprocess
import datetime

"""
INSTRUCTIONS:
Edit DATA_DIRECTORY to include path to xray data, e.g.
/nsls2/data/amx/proposals/2023-3/pass-313937/313937-20231105-dtime
Edit SAMPLE_NAME to include unique sample id in master.h5 filename
for including in the dataset
"""

DATA_DIRECTORY = ""
SAMPLE_NAME = ""

df = pandas.DataFrame()
find_cmd = ["find", "/data", "-maxdepth", "4", "-name", "*summary.csv"]
find_cmd = ["find", f"{DATA_DIRECTORY}", "-maxdepth", "4", "-name", "*summary.csv"]
find_output = subprocess.check_output(find_cmd, universal_newlines=True)
csv_files = find_output.splitlines()

Expand All @@ -32,15 +46,15 @@
for index, row in df.iterrows():
df.at[index, "xtal_id"] = row["Sample_Path"].split("/")[0]

df_filtered = df[df["xtal_id"].str.contains("sample_name")]
df_filtered = df[df["xtal_id"].str.contains(f"{SAMPLE_NAME}")]
final_df = df_filtered.groupby("xtal_id").apply(
lambda group: group.loc[group["Hi"].idxmin()]
)

# get list of all reflection files
find_cmd = [
"find",
"/data",
f"{DATA_DIRECTORY}",
"-name",
"truncate-unique.mtz",
"-o",
Expand All @@ -61,3 +75,6 @@
final_df.at[index, "filepath"] = f

print(final_df)
final_df.to_csv(
f"{SAMPLE_NAME}.{datetime.datetime.now().strftime('%Y%m%d')}.filtered.csv"
)

0 comments on commit 2ab4b80

Please sign in to comment.