diff --git a/.gitignore b/.gitignore
index 09da277..96a430d 100644
--- a/.gitignore
+++ b/.gitignore
@@ -163,4 +163,7 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
+# rialto-airflow
 data/
+.DS_Store
+
diff --git a/rialto_airflow/harvest/dimensions.py b/rialto_airflow/harvest/dimensions.py
index d308b4b..fe69bab 100644
--- a/rialto_airflow/harvest/dimensions.py
+++ b/rialto_airflow/harvest/dimensions.py
@@ -36,7 +36,8 @@ def dois_from_orcid(orcid):
         logging.warning("Truncated results for ORCID %s", orcid)
     for pub in result["publications"]:
         if pub.get("doi"):
-            yield pub["doi"]
+            doi_id = pub["doi"].replace("https://doi.org/", "")
+            yield doi_id
 
 
 def doi_orcids_pickle(authors_csv, pickle_file, limit=None) -> None:
diff --git a/rialto_airflow/harvest/openalex.py b/rialto_airflow/harvest/openalex.py
index 269f3d4..30c3da1 100644
--- a/rialto_airflow/harvest/openalex.py
+++ b/rialto_airflow/harvest/openalex.py
@@ -62,7 +62,8 @@ def dois_from_orcid(orcid: str):
             # not all publications have DOIs
             doi = pub.get("doi")
             if doi:
-                yield doi
+                doi_id = doi.replace("https://doi.org/", "")
+                yield doi_id
 
 
 def works_from_author_id(author_id, limit=None):
diff --git a/rialto_airflow/harvest/sul_pub.py b/rialto_airflow/harvest/sul_pub.py
index 6d0bd2d..02c3e41 100644
--- a/rialto_airflow/harvest/sul_pub.py
+++ b/rialto_airflow/harvest/sul_pub.py
@@ -4,7 +4,7 @@
 import requests
 
 
-sul_pub_fields = [
+SUL_PUB_FIELDS = [
     "authorship",
     "title",
     "abstract",
@@ -35,7 +35,7 @@
 
 def sul_pub_csv(csv_file, host, key, since=None, limit=None):
     with open(csv_file, "w") as csvfile:
-        writer = csv.DictWriter(csvfile, fieldnames=sul_pub_fields)
+        writer = csv.DictWriter(csvfile, fieldnames=SUL_PUB_FIELDS)
         writer.writeheader()
         for row in harvest(host, key, since, limit):
             writer.writerow(row)
@@ -73,7 +73,7 @@ def harvest(host, key, since, limit):
                 more = False
                 break
 
-            pub = {key: record[key] for key in record if key in sul_pub_fields}
+            pub = {key: record[key] for key in record if key in SUL_PUB_FIELDS}
             pub["doi"] = extract_doi(record)
 
             yield pub
@@ -82,5 +82,6 @@ def harvest(host, key, since, limit):
 def extract_doi(record):
     for id in record.get("identifier"):
         if id["type"] == "doi":
-            return id["id"]
+            doi_id = id["id"].replace("https://doi.org/", "")
+            return doi_id
     return None
diff --git a/test/harvest/test_dimensions.py b/test/harvest/test_dimensions.py
index f7007bd..9aff083 100644
--- a/test/harvest/test_dimensions.py
+++ b/test/harvest/test_dimensions.py
@@ -22,6 +22,7 @@ def test_doi_orcids_dict(tmpdir):
 
     assert len(doi_orcids) > 0
     assert doi_orcids["10.1109/lra.2018.2890209"] == ["0000-0002-0770-2940"]
+    assert "https://doi.org/" not in list(doi_orcids.keys())[0], "doi is an ID"
 
 
 def test_publications_from_dois():
diff --git a/test/harvest/test_openalex.py b/test/harvest/test_openalex.py
index 4b0d1bc..2516876 100644
--- a/test/harvest/test_openalex.py
+++ b/test/harvest/test_openalex.py
@@ -30,6 +30,7 @@ def test_doi_orcids_pickle(tmp_path):
     assert len(mapping) > 0
 
     doi = list(mapping.keys())[0]
+    assert "https://doi.org/" not in doi, "doi is an ID"
     assert "/" in doi
 
     orcids = mapping[doi]