diff --git a/examples/tutorial-noaa.ipynb b/examples/tutorial-noaa.ipynb
index e48d2dc7..013bc1f1 100644
--- a/examples/tutorial-noaa.ipynb
+++ b/examples/tutorial-noaa.ipynb
@@ -74,21 +74,23 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2025-12-11 10:28:33,466][INFO] - search_studies: Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.\n"
+ "[2026-03-24 12:41:47,812][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n",
+ "[2026-03-24 12:41:47,814][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?NOAAStudyId=18316&dataPublisher=NOAA\n"
+ "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&NOAAStudyId=18316&limit=100\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 1582.76it/s]\n"
+ "Parsing NOAA studies: 100%|██████████| 1/1 [00:00, ?it/s]\n",
+ "[2026-03-24 12:41:48,750][INFO] - Retrieved 1 studies.\n"
]
},
{
@@ -120,6 +122,7 @@
"
MostRecentYearBP | \n",
" EarliestYearCE | \n",
" MostRecentYearCE | \n",
+ " Coverage [S, N, W, E] | \n",
" StudyNotes | \n",
" ScienceKeywords | \n",
" Investigators | \n",
@@ -139,6 +142,7 @@
" 11 | \n",
" 733 | \n",
" 1939 | \n",
+ " (-79.47, -79.47, -112.13, -112.13) | \n",
" CO2 concentration and Stable Isotopic Composit... | \n",
" [carbon cycle] | \n",
" Thomas Bauska, Fortunat Joos, Alan Mix, Raphae... | \n",
@@ -157,11 +161,14 @@
" DataType EarliestYearBP MostRecentYearBP EarliestYearCE \\\n",
"0 ICE CORES 1217 11 733 \n",
"\n",
- " MostRecentYearCE StudyNotes \\\n",
- "0 1939 CO2 concentration and Stable Isotopic Composit... \n",
+ " MostRecentYearCE Coverage [S, N, W, E] \\\n",
+ "0 1939 (-79.47, -79.47, -112.13, -112.13) \n",
"\n",
- " ScienceKeywords Investigators \\\n",
- "0 [carbon cycle] Thomas Bauska, Fortunat Joos, Alan Mix, Raphae... \n",
+ " StudyNotes ScienceKeywords \\\n",
+ "0 CO2 concentration and Stable Isotopic Composit... [carbon cycle] \n",
+ "\n",
+ " Investigators \\\n",
+ "0 Thomas Bauska, Fortunat Joos, Alan Mix, Raphae... \n",
"\n",
" Publications \\\n",
"0 [{'Author': 'Ahn, J., E. J. Brook, L. Mitchell... \n",
@@ -224,7 +231,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2025-12-11 10:28:33,981][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n"
+ "[2026-03-24 12:41:48,799][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n",
+ "[2026-03-24 12:41:48,801][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n"
]
},
{
@@ -239,8 +247,10 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Parsing NOAA studies: 100%|██████████| 24/24 [00:00<00:00, 6960.54it/s]\n",
- "[2025-12-11 10:28:34,807][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n"
+ "Parsing NOAA studies: 100%|██████████| 24/24 [00:00<00:00, 2378.51it/s]\n",
+ "[2026-03-24 12:41:49,974][INFO] - Retrieved 24 studies.\n",
+ "[2026-03-24 12:41:49,991][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n",
+ "[2026-03-24 12:41:49,995][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n"
]
},
{
@@ -257,7 +267,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Parsing NOAA studies: 100%|██████████| 2/2 [00:00<00:00, 1997.76it/s]"
+ "Parsing NOAA studies: 100%|██████████| 2/2 [00:00<00:00, 1000.19it/s]\n",
+ "[2026-03-24 12:41:50,765][INFO] - Retrieved 2 studies.\n"
]
},
{
@@ -266,13 +277,6 @@
"text": [
"Found 2 studies.\n"
]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n"
- ]
}
],
"source": [
@@ -318,64 +322,22 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2025-12-11 10:28:35,380][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n"
+ "[2026-03-24 12:41:50,808][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n",
+ "[2026-03-24 12:41:50,809][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n",
+ "[2026-03-24 12:41:51,435][WARNING] - No studies found for investigator(s): E.R., Wahl. NOAA expects 'LastName, Initials'. Try variations like:\n",
+ " - 'LastName, Initials'\n",
+ " - 'LastName'\n",
+ " - 'Initials'\n",
+ "[2026-03-24 12:41:51,438][INFO] - Retrieved 0 studies.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&limit=100&investigators=E.R.%2C+Wahl\n"
+ "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&limit=100&investigators=E.R.%2C+Wahl\n",
+ "Found 0 studies.\n"
]
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:452: UserWarning: No studies found for investigator(s): E.R., Wahl. NOAA expects 'LastName, Initials'. Try variations like:\n",
- " - 'LastName, Initials'\n",
- " - 'LastName'\n",
- " - 'Initials'\n",
- " warnings.warn(\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "Empty DataFrame\n",
- "Columns: []\n",
- "Index: []"
- ]
- },
- "execution_count": null,
- "metadata": {},
- "output_type": "execute_result"
}
],
"source": [
@@ -412,7 +374,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2025-12-11 10:28:35,835][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n"
+ "[2026-03-24 12:41:51,498][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n",
+ "[2026-03-24 12:41:51,501][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n"
]
},
{
@@ -426,9 +389,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Parsing NOAA studies: 100%|██████████| 100/100 [00:00<00:00, 4172.69it/s]\n",
- "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:501: UserWarning: Retrieved 100 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n",
- " warnings.warn(\n"
+ "Parsing NOAA studies: 100%|██████████| 100/100 [00:00<00:00, 2330.91it/s]\n",
+ "[2026-03-24 12:41:53,526][WARNING] - Retrieved 100 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n",
+ "[2026-03-24 12:41:53,527][INFO] - Retrieved 100 studies.\n"
]
},
{
@@ -460,6 +423,7 @@
" MostRecentYearBP | \n",
" EarliestYearCE | \n",
" MostRecentYearCE | \n",
+ " Coverage [S, N, W, E] | \n",
" StudyNotes | \n",
" ScienceKeywords | \n",
" Investigators | \n",
@@ -479,9 +443,10 @@
" -45.0 | \n",
" 1000.0 | \n",
" 1995.0 | \n",
+ " (0.0, 90.0, -180.0, 180.0) | \n",
" Calibration ensemble reconstructions of existi... | \n",
" [carbon cycle, sensitivity, Air Temperature Re... | \n",
- " David Frank, Valerie Trouet, Jan Esper, Christ... | \n",
+ " David Frank, Jan Esper, Christoph Raible, Ulf ... | \n",
" [{'Author': 'Frank, D.C., J. Esper, C.C. Raibl... | \n",
" [[{'DataTableID': '19235', 'DataTableName': 'F... | \n",
" [{'fundingAgency': 'Swiss National Science Fou... | \n",
@@ -496,9 +461,10 @@
" -55.0 | \n",
" 1000.0 | \n",
" 2005.0 | \n",
+ " (0.0, 90.0, -180.0, 180.0) | \n",
" None | \n",
" [Atmospheric and Oceanic Circulation Patterns ... | \n",
- " Kai Kornhuber, Ellie Broadman, Valerie Trouet | \n",
+ " Ellie Broadman, Valerie Trouet, Kai Kornhuber | \n",
" [{'Author': 'Broadman, Ellie, Kai Kornhuber, I... | \n",
" [[{'DataTableID': '56946', 'DataTableName': 'W... | \n",
" [{'fundingAgency': 'US National Science Founda... | \n",
@@ -513,9 +479,10 @@
" -5.0 | \n",
" 50.0 | \n",
" 1955.0 | \n",
+ " (-90.0, 90.0, -180.0, 180.0) | \n",
" Reconstruction of a precipitation-based Southe... | \n",
" [Atmospheric and Oceanic Circulation Patterns ... | \n",
- " Liguang Sun, Yuhong Wang, Wen Huang, Shican Qi... | \n",
+ " Hong Yan, Liguang Sun, Yuhong Wang, Wen Huang,... | \n",
" [{'Author': 'Yan, H., L. Sun, Y. Wang, W. Huan... | \n",
" [[{'DataTableID': '20526', 'DataTableName': 'S... | \n",
" [{'fundingAgency': 'National Natural Science F... | \n",
@@ -530,6 +497,7 @@
" -50.0 | \n",
" -50.0 | \n",
" 2000.0 | \n",
+ " (-30.0, 30.0, -180.0, 180.0) | \n",
" Composite reconstruction of low latitude rainf... | \n",
" [Precipitation Reconstruction] | \n",
" Franziska Lechleitner, Sebastian Breitenbach, ... | \n",
@@ -547,9 +515,10 @@
" -27.0 | \n",
" -7439.0 | \n",
" 1977.0 | \n",
+ " (-90.0, 90.0, -180.0, 180.0) | \n",
" Records of common production rate of cosmogeni... | \n",
" [Solar Forcing Reconstruction] | \n",
- " Irene Brunner, Marcus Christl, Hubertus Fische... | \n",
+ " Friedhelm Steinhilber, Jose Abreu, Jürg Beer, ... | \n",
" [{'Author': 'Steinhilber, F., J.A. Abreu, J. B... | \n",
" [[{'DataTableID': '21230', 'DataTableName': 'T... | \n",
" [{'fundingAgency': 'Swiss National Science Fou... | \n",
@@ -570,6 +539,7 @@
" ... | \n",
" ... | \n",
" ... | \n",
+ " ... | \n",
" \n",
" \n",
" | 95 | \n",
@@ -581,11 +551,12 @@
" 0.0 | \n",
" -18050.0 | \n",
" 1950.0 | \n",
+ " (-45.5, 78.49, -163.25, 176.73) | \n",
" Lake status determined at 1000-year intervals ... | \n",
" [hydrology, trends] | \n",
" Frances Alayne Street-Perrott, None Marchand, ... | \n",
" [{'Author': 'Street-Perrott, F.A., D.S. Marcha... | \n",
- " [[{'DataTableID': '9036', 'DataTableName': 'Qu... | \n",
+ " [[{'DataTableID': '9005', 'DataTableName': 'Mo... | \n",
" [] | \n",
"
\n",
" \n",
@@ -598,9 +569,10 @@
" | 0.0 | \n",
" -50169.0 | \n",
" 1950.0 | \n",
+ " (-80.0, 90.0, -180.0, 180.0) | \n",
" None | \n",
" None | \n",
- " William Gray, Sophia Hines, Andrea Burke, Kass... | \n",
+ " Patrick Rafter, William Gray, Sophia Hines, An... | \n",
" [{'Author': 'Rafter, Patrick A., William R. Gr... | \n",
" [[{'DataTableID': '49382', 'DataTableName': 'G... | \n",
" [{'fundingAgency': 'US National Science Founda... | \n",
@@ -615,6 +587,7 @@
" -41.0 | \n",
" 1957.0 | \n",
" 1991.0 | \n",
+ " (-90.0, 90.0, -180.0, 180.0) | \n",
" | \n",
" None | \n",
" Kazimierz Rozanski, Luis Araguás-Araguás, Robe... | \n",
@@ -632,6 +605,7 @@
" 0.0 | \n",
" -23050.0 | \n",
" 1950.0 | \n",
+ " (-80.0, 90.0, -180.0, 180.0) | \n",
" Transient simulation of ocean carbonate chemis... | \n",
" [carbon cycle] | \n",
" Jun Shao, Lowell Stott, William Gray, Rosanna ... | \n",
@@ -649,6 +623,7 @@
" 18000.0 | \n",
" -98050.0 | \n",
" -16050.0 | \n",
+ " (-90.0, 90.0, -180.0, 180.0) | \n",
" Tables and Table Notes \\nTable S1. Change i... | \n",
" [biogeochemical cycles] | \n",
" Karen Kohfeld, Corinne Le Quéré, Sandy Harriso... | \n",
@@ -658,7 +633,7 @@
"
\n",
" \n",
"\n",
- "100 rows × 14 columns
\n",
+ "100 rows × 15 columns
\n",
""
],
"text/plain": [
@@ -688,18 +663,31 @@
"98 PALEOCLIMATIC MODELING 25000.0 0.0 -23050.0 \n",
"99 PALEOCEANOGRAPHY 100000.0 18000.0 -98050.0 \n",
"\n",
- " MostRecentYearCE StudyNotes \\\n",
- "0 1995.0 Calibration ensemble reconstructions of existi... \n",
- "1 2005.0 None \n",
- "2 1955.0 Reconstruction of a precipitation-based Southe... \n",
- "3 2000.0 Composite reconstruction of low latitude rainf... \n",
- "4 1977.0 Records of common production rate of cosmogeni... \n",
- ".. ... ... \n",
- "95 1950.0 Lake status determined at 1000-year intervals ... \n",
- "96 1950.0 None \n",
- "97 1991.0 \n",
- "98 1950.0 Transient simulation of ocean carbonate chemis... \n",
- "99 -16050.0 Tables and Table Notes \\nTable S1. Change i... \n",
+ " MostRecentYearCE Coverage [S, N, W, E] \\\n",
+ "0 1995.0 (0.0, 90.0, -180.0, 180.0) \n",
+ "1 2005.0 (0.0, 90.0, -180.0, 180.0) \n",
+ "2 1955.0 (-90.0, 90.0, -180.0, 180.0) \n",
+ "3 2000.0 (-30.0, 30.0, -180.0, 180.0) \n",
+ "4 1977.0 (-90.0, 90.0, -180.0, 180.0) \n",
+ ".. ... ... \n",
+ "95 1950.0 (-45.5, 78.49, -163.25, 176.73) \n",
+ "96 1950.0 (-80.0, 90.0, -180.0, 180.0) \n",
+ "97 1991.0 (-90.0, 90.0, -180.0, 180.0) \n",
+ "98 1950.0 (-80.0, 90.0, -180.0, 180.0) \n",
+ "99 -16050.0 (-90.0, 90.0, -180.0, 180.0) \n",
+ "\n",
+ " StudyNotes \\\n",
+ "0 Calibration ensemble reconstructions of existi... \n",
+ "1 None \n",
+ "2 Reconstruction of a precipitation-based Southe... \n",
+ "3 Composite reconstruction of low latitude rainf... \n",
+ "4 Records of common production rate of cosmogeni... \n",
+ ".. ... \n",
+ "95 Lake status determined at 1000-year intervals ... \n",
+ "96 None \n",
+ "97 \n",
+ "98 Transient simulation of ocean carbonate chemis... \n",
+ "99 Tables and Table Notes \\nTable S1. Change i... \n",
"\n",
" ScienceKeywords \\\n",
"0 [carbon cycle, sensitivity, Air Temperature Re... \n",
@@ -715,14 +703,14 @@
"99 [biogeochemical cycles] \n",
"\n",
" Investigators \\\n",
- "0 David Frank, Valerie Trouet, Jan Esper, Christ... \n",
- "1 Kai Kornhuber, Ellie Broadman, Valerie Trouet \n",
- "2 Liguang Sun, Yuhong Wang, Wen Huang, Shican Qi... \n",
+ "0 David Frank, Jan Esper, Christoph Raible, Ulf ... \n",
+ "1 Ellie Broadman, Valerie Trouet, Kai Kornhuber \n",
+ "2 Hong Yan, Liguang Sun, Yuhong Wang, Wen Huang,... \n",
"3 Franziska Lechleitner, Sebastian Breitenbach, ... \n",
- "4 Irene Brunner, Marcus Christl, Hubertus Fische... \n",
+ "4 Friedhelm Steinhilber, Jose Abreu, Jürg Beer, ... \n",
".. ... \n",
"95 Frances Alayne Street-Perrott, None Marchand, ... \n",
- "96 William Gray, Sophia Hines, Andrea Burke, Kass... \n",
+ "96 Patrick Rafter, William Gray, Sophia Hines, An... \n",
"97 Kazimierz Rozanski, Luis Araguás-Araguás, Robe... \n",
"98 Jun Shao, Lowell Stott, William Gray, Rosanna ... \n",
"99 Karen Kohfeld, Corinne Le Quéré, Sandy Harriso... \n",
@@ -747,7 +735,7 @@
"3 [[{'DataTableID': '33444', 'DataTableName': 'L... \n",
"4 [[{'DataTableID': '21230', 'DataTableName': 'T... \n",
".. ... \n",
- "95 [[{'DataTableID': '9036', 'DataTableName': 'Qu... \n",
+ "95 [[{'DataTableID': '9005', 'DataTableName': 'Mo... \n",
"96 [[{'DataTableID': '49382', 'DataTableName': 'G... \n",
"97 [[{'DataTableID': '32472', 'DataTableName': 'G... \n",
"98 [[{'DataTableID': '44097', 'DataTableName': 'S... \n",
@@ -766,7 +754,7 @@
"98 [{'fundingAgency': 'US National Science Founda... \n",
"99 [] \n",
"\n",
- "[100 rows x 14 columns]"
+ "[100 rows x 15 columns]"
]
},
"execution_count": null,
@@ -829,21 +817,23 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2025-12-11 10:28:37,851][INFO] - search_studies: Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.\n"
+ "[2026-03-24 12:41:53,910][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n",
+ "[2026-03-24 12:41:53,912][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?NOAAStudyId=18316&dataPublisher=NOAA\n"
+ "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&NOAAStudyId=18316&limit=100\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 1730.32it/s]\n"
+ "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 1000.79it/s]\n",
+ "[2026-03-24 12:41:54,527][INFO] - Retrieved 1 studies.\n"
]
},
{
@@ -875,6 +865,7 @@
" MostRecentYearBP | \n",
" EarliestYearCE | \n",
" MostRecentYearCE | \n",
+ " Coverage [S, N, W, E] | \n",
" StudyNotes | \n",
" ScienceKeywords | \n",
" Investigators | \n",
@@ -894,6 +885,7 @@
" 11 | \n",
" 733 | \n",
" 1939 | \n",
+ " (-79.47, -79.47, -112.13, -112.13) | \n",
" CO2 concentration and Stable Isotopic Composit... | \n",
" [carbon cycle] | \n",
" Thomas Bauska, Fortunat Joos, Alan Mix, Raphae... | \n",
@@ -912,11 +904,14 @@
" DataType EarliestYearBP MostRecentYearBP EarliestYearCE \\\n",
"0 ICE CORES 1217 11 733 \n",
"\n",
- " MostRecentYearCE StudyNotes \\\n",
- "0 1939 CO2 concentration and Stable Isotopic Composit... \n",
+ " MostRecentYearCE Coverage [S, N, W, E] \\\n",
+ "0 1939 (-79.47, -79.47, -112.13, -112.13) \n",
+ "\n",
+ " StudyNotes ScienceKeywords \\\n",
+ "0 CO2 concentration and Stable Isotopic Composit... [carbon cycle] \n",
"\n",
- " ScienceKeywords Investigators \\\n",
- "0 [carbon cycle] Thomas Bauska, Fortunat Joos, Alan Mix, Raphae... \n",
+ " Investigators \\\n",
+ "0 Thomas Bauska, Fortunat Joos, Alan Mix, Raphae... \n",
"\n",
" Publications \\\n",
"0 [{'Author': 'Ahn, J., E. J. Brook, L. Mitchell... \n",
@@ -1253,8 +1248,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:598: UserWarning: No path specified. Saving BibTeX to: bibtex_20251211_1028.bib\n",
- " warnings.warn(f\"No path specified. Saving BibTeX to: {path}\")\n"
+ "[2026-03-24 12:41:54,639][WARNING] - No path specified. Saving BibTeX to: bibtex_20260324_1241.bib\n"
]
},
{
@@ -1315,6 +1309,276 @@
"dataset.get_publications(save=True)\n"
]
},
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8d1912e2",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " StudyID | \n",
+ " DataType | \n",
+ " SiteID | \n",
+ " SiteName | \n",
+ " LocationName | \n",
+ " Latitude | \n",
+ " Longitude | \n",
+ " MinElevation | \n",
+ " MaxElevation | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 12402 | \n",
+ " OTHER COLLECTIONS | \n",
+ " 22723 | \n",
+ " Global | \n",
+ " Geographic Region>Global | \n",
+ " -90 | \n",
+ " 90 | \n",
+ " None | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 8610 | \n",
+ " ICE CORES | \n",
+ " 20633 | \n",
+ " ACT2 | \n",
+ " Continent>North America>Greenland | \n",
+ " 66.0119 | \n",
+ " -45.158 | \n",
+ " 2410 | \n",
+ " 2410 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 8610 | \n",
+ " ICE CORES | \n",
+ " 22899 | \n",
+ " ACT1 | \n",
+ " Continent>North America>Greenland | \n",
+ " 66.0039 | \n",
+ " -46.5511 | \n",
+ " 2410 | \n",
+ " 2410 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 8610 | \n",
+ " ICE CORES | \n",
+ " 22900 | \n",
+ " ACT3 | \n",
+ " Continent>North America>Greenland | \n",
+ " 65.995 | \n",
+ " -43.6069 | \n",
+ " 2410 | \n",
+ " 2410 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 8610 | \n",
+ " ICE CORES | \n",
+ " 22901 | \n",
+ " ACT4 | \n",
+ " Continent>North America>Greenland | \n",
+ " 65.9811 | \n",
+ " -42.7889 | \n",
+ " 2410 | \n",
+ " 2410 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 6177 | \n",
+ " ICE CORES | \n",
+ " 20633 | \n",
+ " ACT2 | \n",
+ " Continent>North America>Greenland | \n",
+ " 66.0119 | \n",
+ " -45.158 | \n",
+ " 2410 | \n",
+ " 2410 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 16279 | \n",
+ " PALEOLIMNOLOGY | \n",
+ " 55837 | \n",
+ " Lake Haukadalsvatn | \n",
+ " Continent>Europe>Northern Europe>Iceland | \n",
+ " 65.055846 | \n",
+ " -21.625757 | \n",
+ " 38 | \n",
+ " 38 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 16279 | \n",
+ " PALEOLIMNOLOGY | \n",
+ " 55838 | \n",
+ " Lake Hvítárvatn | \n",
+ " Continent>Europe>Northern Europe>Iceland | \n",
+ " 64.613723 | \n",
+ " -19.843769 | \n",
+ " 38 | \n",
+ " 38 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 23930 | \n",
+ " TREE RING | \n",
+ " 57630 | \n",
+ " Brooks Range Upland | \n",
+ " Continent>North America>United States Of Ameri... | \n",
+ " 68.375 | \n",
+ " -149.295 | \n",
+ " 910 | \n",
+ " 910 | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 23931 | \n",
+ " TREE RING | \n",
+ " 57631 | \n",
+ " Inigok Riparian | \n",
+ " Continent>North America>United States Of Ameri... | \n",
+ " 69.99 | \n",
+ " -153.04 | \n",
+ " 35 | \n",
+ " 35 | \n",
+ "
\n",
+ " \n",
+ " | 10 | \n",
+ " 23932 | \n",
+ " TREE RING | \n",
+ " 57632 | \n",
+ " Inigok Upland | \n",
+ " Continent>North America>United States Of Ameri... | \n",
+ " 69.99 | \n",
+ " -153.04 | \n",
+ " 38 | \n",
+ " 38 | \n",
+ "
\n",
+ " \n",
+ " | 11 | \n",
+ " 23933 | \n",
+ " TREE RING | \n",
+ " 57633 | \n",
+ " Itkillik Upland | \n",
+ " Continent>North America>United States Of Ameri... | \n",
+ " 68.641 | \n",
+ " -149.614 | \n",
+ " 739 | \n",
+ " 739 | \n",
+ "
\n",
+ " \n",
+ " | 12 | \n",
+ " 22053 | \n",
+ " TREE RING | \n",
+ " 57178 | \n",
+ " Kuparuk Riparian | \n",
+ " Continent>North America>United States Of Ameri... | \n",
+ " 68.662 | \n",
+ " -149.43 | \n",
+ " 717 | \n",
+ " 717 | \n",
+ "
\n",
+ " \n",
+ " | 13 | \n",
+ " 22054 | \n",
+ " TREE RING | \n",
+ " 57179 | \n",
+ " Kuparuk Upland | \n",
+ " Continent>North America>United States Of Ameri... | \n",
+ " 68.662 | \n",
+ " -149.428 | \n",
+ " 720 | \n",
+ " 720 | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " StudyID DataType SiteID SiteName \\\n",
+ "0 12402 OTHER COLLECTIONS 22723 Global \n",
+ "1 8610 ICE CORES 20633 ACT2 \n",
+ "2 8610 ICE CORES 22899 ACT1 \n",
+ "3 8610 ICE CORES 22900 ACT3 \n",
+ "4 8610 ICE CORES 22901 ACT4 \n",
+ "5 6177 ICE CORES 20633 ACT2 \n",
+ "6 16279 PALEOLIMNOLOGY 55837 Lake Haukadalsvatn \n",
+ "7 16279 PALEOLIMNOLOGY 55838 Lake Hvítárvatn \n",
+ "8 23930 TREE RING 57630 Brooks Range Upland \n",
+ "9 23931 TREE RING 57631 Inigok Riparian \n",
+ "10 23932 TREE RING 57632 Inigok Upland \n",
+ "11 23933 TREE RING 57633 Itkillik Upland \n",
+ "12 22053 TREE RING 57178 Kuparuk Riparian \n",
+ "13 22054 TREE RING 57179 Kuparuk Upland \n",
+ "\n",
+ " LocationName Latitude Longitude \\\n",
+ "0 Geographic Region>Global -90 90 \n",
+ "1 Continent>North America>Greenland 66.0119 -45.158 \n",
+ "2 Continent>North America>Greenland 66.0039 -46.5511 \n",
+ "3 Continent>North America>Greenland 65.995 -43.6069 \n",
+ "4 Continent>North America>Greenland 65.9811 -42.7889 \n",
+ "5 Continent>North America>Greenland 66.0119 -45.158 \n",
+ "6 Continent>Europe>Northern Europe>Iceland 65.055846 -21.625757 \n",
+ "7 Continent>Europe>Northern Europe>Iceland 64.613723 -19.843769 \n",
+ "8 Continent>North America>United States Of Ameri... 68.375 -149.295 \n",
+ "9 Continent>North America>United States Of Ameri... 69.99 -153.04 \n",
+ "10 Continent>North America>United States Of Ameri... 69.99 -153.04 \n",
+ "11 Continent>North America>United States Of Ameri... 68.641 -149.614 \n",
+ "12 Continent>North America>United States Of Ameri... 68.662 -149.43 \n",
+ "13 Continent>North America>United States Of Ameri... 68.662 -149.428 \n",
+ "\n",
+ " MinElevation MaxElevation \n",
+ "0 None None \n",
+ "1 2410 2410 \n",
+ "2 2410 2410 \n",
+ "3 2410 2410 \n",
+ "4 2410 2410 \n",
+ "5 2410 2410 \n",
+ "6 38 38 \n",
+ "7 38 38 \n",
+ "8 910 910 \n",
+ "9 35 35 \n",
+ "10 38 38 \n",
+ "11 739 739 \n",
+ "12 717 717 \n",
+ "13 720 720 "
+ ]
+ },
+ "execution_count": null,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataset.get_geo()"
+ ]
+ },
{
"cell_type": "markdown",
"id": "30bfdb9d",
@@ -1348,21 +1612,23 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2025-12-11 10:28:38,629][INFO] - search_studies: Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.\n"
+ "[2026-03-24 12:41:54,705][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n",
+ "[2026-03-24 12:41:54,706][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?NOAAStudyId=18316&dataPublisher=NOAA\n"
+ "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&NOAAStudyId=18316&limit=100\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 2832.08it/s]\n"
+ "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 491.54it/s]\n",
+ "[2026-03-24 12:41:55,514][INFO] - Retrieved 1 studies.\n"
]
},
{
@@ -1903,21 +2169,23 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2025-12-11 10:28:43,889][INFO] - search_studies: Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.\n"
+ "[2026-03-24 12:41:59,610][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n",
+ "[2026-03-24 12:41:59,613][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?NOAAStudyId=9957&dataPublisher=NOAA\n"
+ "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&NOAAStudyId=9957&limit=100\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
- "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 1269.08it/s]\n"
+ "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 486.47it/s]\n",
+ "[2026-03-24 12:42:30,274][INFO] - Retrieved 1 studies.\n"
]
},
{
@@ -1949,6 +2217,7 @@
" MostRecentYearBP | \n",
" EarliestYearCE | \n",
" MostRecentYearCE | \n",
+ " Coverage [S, N, W, E] | \n",
" StudyNotes | \n",
" ScienceKeywords | \n",
" Investigators | \n",
@@ -1968,11 +2237,12 @@
" 7678 | \n",
" -6616 | \n",
" -5728 | \n",
+ " (-13.2167, 25.28, -44.05, 108.08) | \n",
" Oxygen isotope data from six stalagmites in Ch... | \n",
" [abrupt climate change, Intertropical Converge... | \n",
- " R. Lawrence Edwards, Augusto Mangini, Stephen ... | \n",
+ " Hai Cheng, Dominik Fleitmann, R. Lawrence Edwa... | \n",
" [{'Author': 'Cheng, H., D. Fleitmann, R.L. Edw... | \n",
- " [[{'DataTableID': '18803', 'DataTableName': 'H... | \n",
+ " [[{'DataTableID': '18801', 'DataTableName': 'D... | \n",
" [{'fundingAgency': 'Comer Science and Educatio... | \n",
" \n",
" \n",
@@ -1986,6 +2256,9 @@
" EarliestYearBP MostRecentYearBP EarliestYearCE MostRecentYearCE \\\n",
"0 8566 7678 -6616 -5728 \n",
"\n",
+ " Coverage [S, N, W, E] \\\n",
+ "0 (-13.2167, 25.28, -44.05, 108.08) \n",
+ "\n",
" StudyNotes \\\n",
"0 Oxygen isotope data from six stalagmites in Ch... \n",
"\n",
@@ -1993,13 +2266,13 @@
"0 [abrupt climate change, Intertropical Converge... \n",
"\n",
" Investigators \\\n",
- "0 R. Lawrence Edwards, Augusto Mangini, Stephen ... \n",
+ "0 Hai Cheng, Dominik Fleitmann, R. Lawrence Edwa... \n",
"\n",
" Publications \\\n",
"0 [{'Author': 'Cheng, H., D. Fleitmann, R.L. Edw... \n",
"\n",
" Sites \\\n",
- "0 [[{'DataTableID': '18803', 'DataTableName': 'H... \n",
+ "0 [[{'DataTableID': '18801', 'DataTableName': 'D... \n",
"\n",
" Funding \n",
"0 [{'fundingAgency': 'Comer Science and Educatio... "
@@ -2064,229 +2337,229 @@
" \n",
" \n",
" | 0 | \n",
- " 18803 | \n",
- " H14 | \n",
+ " 18801 | \n",
+ " D4 Dongge | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [age, depth, delta 18O] | \n",
" Speleothem | \n",
" 2 | \n",
- " 31383 | \n",
- " Hoti Cave | \n",
- " Continent>Asia>Western Asia>Middle East>Oman | \n",
- " 23.08 | \n",
- " 57.35 | \n",
- " 800 | \n",
- " 800 | \n",
+ " 6554 | \n",
+ " Dongge Cave | \n",
+ " Continent>Asia>Eastern Asia>China | \n",
+ " 25.28 | \n",
+ " 108.08 | \n",
+ " 680 | \n",
+ " 680 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
" \n",
" | 1 | \n",
- " 18803 | \n",
- " H14 | \n",
+ " 18801 | \n",
+ " D4 Dongge | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [] | \n",
" Speleothem | \n",
" 2 | \n",
- " 31383 | \n",
- " Hoti Cave | \n",
- " Continent>Asia>Western Asia>Middle East>Oman | \n",
- " 23.08 | \n",
- " 57.35 | \n",
- " 800 | \n",
- " 800 | \n",
+ " 6554 | \n",
+ " Dongge Cave | \n",
+ " Continent>Asia>Eastern Asia>China | \n",
+ " 25.28 | \n",
+ " 108.08 | \n",
+ " 680 | \n",
+ " 680 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
" \n",
" | 2 | \n",
- " 18804 | \n",
- " PAD07 | \n",
+ " 18802 | \n",
+ " DA Dongge | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [age, depth, delta 18O] | \n",
" Speleothem | \n",
" 2 | \n",
- " 31568 | \n",
- " Padre Cave | \n",
- " Continent>South America>Brazil | \n",
- " -13.2167 | \n",
- " -44.05 | \n",
- " 650 | \n",
- " 800 | \n",
+ " 6554 | \n",
+ " Dongge Cave | \n",
+ " Continent>Asia>Eastern Asia>China | \n",
+ " 25.28 | \n",
+ " 108.08 | \n",
+ " 680 | \n",
+ " 680 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
" \n",
" | 3 | \n",
- " 18804 | \n",
- " PAD07 | \n",
+ " 18802 | \n",
+ " DA Dongge | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [] | \n",
" Speleothem | \n",
" 2 | \n",
- " 31568 | \n",
- " Padre Cave | \n",
- " Continent>South America>Brazil | \n",
- " -13.2167 | \n",
- " -44.05 | \n",
- " 650 | \n",
- " 800 | \n",
+ " 6554 | \n",
+ " Dongge Cave | \n",
+ " Continent>Asia>Eastern Asia>China | \n",
+ " 25.28 | \n",
+ " 108.08 | \n",
+ " 680 | \n",
+ " 680 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
" \n",
" | 4 | \n",
- " 18805 | \n",
- " PX5 | \n",
+ " 18806 | \n",
+ " Q5 Qunf | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [age, depth, delta 18O] | \n",
" Speleothem | \n",
" 2 | \n",
- " 31569 | \n",
- " Paixão Cave | \n",
- " Continent>South America>Brazil | \n",
- " -12.65 | \n",
- " -41.05 | \n",
+ " 14640 | \n",
+ " Qunf Cave | \n",
+ " Continent>Asia>Western Asia>Middle East>Oman | \n",
+ " 17.17 | \n",
+ " 54.3 | \n",
+ " 650 | \n",
" 650 | \n",
- " 800 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
" \n",
" | 5 | \n",
- " 18805 | \n",
- " PX5 | \n",
+ " 18806 | \n",
+ " Q5 Qunf | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [] | \n",
" Speleothem | \n",
" 2 | \n",
- " 31569 | \n",
- " Paixão Cave | \n",
- " Continent>South America>Brazil | \n",
- " -12.65 | \n",
- " -41.05 | \n",
+ " 14640 | \n",
+ " Qunf Cave | \n",
+ " Continent>Asia>Western Asia>Middle East>Oman | \n",
+ " 17.17 | \n",
+ " 54.3 | \n",
+ " 650 | \n",
" 650 | \n",
- " 800 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
" \n",
" | 6 | \n",
- " 18801 | \n",
- " D4 Dongge | \n",
+ " 18803 | \n",
+ " H14 | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [age, depth, delta 18O] | \n",
" Speleothem | \n",
" 2 | \n",
- " 6554 | \n",
- " Dongge Cave | \n",
- " Continent>Asia>Eastern Asia>China | \n",
- " 25.28 | \n",
- " 108.08 | \n",
- " 680 | \n",
- " 680 | \n",
+ " 31383 | \n",
+ " Hoti Cave | \n",
+ " Continent>Asia>Western Asia>Middle East>Oman | \n",
+ " 23.08 | \n",
+ " 57.35 | \n",
+ " 800 | \n",
+ " 800 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
" \n",
" | 7 | \n",
- " 18801 | \n",
- " D4 Dongge | \n",
+ " 18803 | \n",
+ " H14 | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [] | \n",
" Speleothem | \n",
" 2 | \n",
- " 6554 | \n",
- " Dongge Cave | \n",
- " Continent>Asia>Eastern Asia>China | \n",
- " 25.28 | \n",
- " 108.08 | \n",
- " 680 | \n",
- " 680 | \n",
+ " 31383 | \n",
+ " Hoti Cave | \n",
+ " Continent>Asia>Western Asia>Middle East>Oman | \n",
+ " 23.08 | \n",
+ " 57.35 | \n",
+ " 800 | \n",
+ " 800 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
" \n",
" | 8 | \n",
- " 18802 | \n",
- " DA Dongge | \n",
+ " 18804 | \n",
+ " PAD07 | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [age, depth, delta 18O] | \n",
" Speleothem | \n",
" 2 | \n",
- " 6554 | \n",
- " Dongge Cave | \n",
- " Continent>Asia>Eastern Asia>China | \n",
- " 25.28 | \n",
- " 108.08 | \n",
- " 680 | \n",
- " 680 | \n",
+ " 31568 | \n",
+ " Padre Cave | \n",
+ " Continent>South America>Brazil | \n",
+ " -13.2167 | \n",
+ " -44.05 | \n",
+ " 650 | \n",
+ " 800 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
" \n",
" | 9 | \n",
- " 18802 | \n",
- " DA Dongge | \n",
+ " 18804 | \n",
+ " PAD07 | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [] | \n",
" Speleothem | \n",
" 2 | \n",
- " 6554 | \n",
- " Dongge Cave | \n",
- " Continent>Asia>Eastern Asia>China | \n",
- " 25.28 | \n",
- " 108.08 | \n",
- " 680 | \n",
- " 680 | \n",
+ " 31568 | \n",
+ " Padre Cave | \n",
+ " Continent>South America>Brazil | \n",
+ " -13.2167 | \n",
+ " -44.05 | \n",
+ " 650 | \n",
+ " 800 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
" \n",
" | 10 | \n",
- " 18806 | \n",
- " Q5 Qunf | \n",
+ " 18805 | \n",
+ " PX5 | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [age, depth, delta 18O] | \n",
" Speleothem | \n",
" 2 | \n",
- " 14640 | \n",
- " Qunf Cave | \n",
- " Continent>Asia>Western Asia>Middle East>Oman | \n",
- " 17.17 | \n",
- " 54.3 | \n",
- " 650 | \n",
+ " 31569 | \n",
+ " Paixão Cave | \n",
+ " Continent>South America>Brazil | \n",
+ " -12.65 | \n",
+ " -41.05 | \n",
" 650 | \n",
+ " 800 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
" \n",
" | 11 | \n",
- " 18806 | \n",
- " Q5 Qunf | \n",
+ " 18805 | \n",
+ " PX5 | \n",
" cal yr BP | \n",
" https://www.ncei.noaa.gov/pub/data/paleo/spele... | \n",
" [] | \n",
" Speleothem | \n",
" 2 | \n",
- " 14640 | \n",
- " Qunf Cave | \n",
- " Continent>Asia>Western Asia>Middle East>Oman | \n",
- " 17.17 | \n",
- " 54.3 | \n",
- " 650 | \n",
+ " 31569 | \n",
+ " Paixão Cave | \n",
+ " Continent>South America>Brazil | \n",
+ " -12.65 | \n",
+ " -41.05 | \n",
" 650 | \n",
+ " 800 | \n",
" 9957 | \n",
" 8.2k Event Speleothem Oxygen Isotope Data | \n",
"
\n",
@@ -2296,18 +2569,18 @@
],
"text/plain": [
" DataTableID DataTableName TimeUnit \\\n",
- "0 18803 H14 cal yr BP \n",
- "1 18803 H14 cal yr BP \n",
- "2 18804 PAD07 cal yr BP \n",
- "3 18804 PAD07 cal yr BP \n",
- "4 18805 PX5 cal yr BP \n",
- "5 18805 PX5 cal yr BP \n",
- "6 18801 D4 Dongge cal yr BP \n",
- "7 18801 D4 Dongge cal yr BP \n",
- "8 18802 DA Dongge cal yr BP \n",
- "9 18802 DA Dongge cal yr BP \n",
- "10 18806 Q5 Qunf cal yr BP \n",
- "11 18806 Q5 Qunf cal yr BP \n",
+ "0 18801 D4 Dongge cal yr BP \n",
+ "1 18801 D4 Dongge cal yr BP \n",
+ "2 18802 DA Dongge cal yr BP \n",
+ "3 18802 DA Dongge cal yr BP \n",
+ "4 18806 Q5 Qunf cal yr BP \n",
+ "5 18806 Q5 Qunf cal yr BP \n",
+ "6 18803 H14 cal yr BP \n",
+ "7 18803 H14 cal yr BP \n",
+ "8 18804 PAD07 cal yr BP \n",
+ "9 18804 PAD07 cal yr BP \n",
+ "10 18805 PX5 cal yr BP \n",
+ "11 18805 PX5 cal yr BP \n",
"\n",
" FileURL \\\n",
"0 https://www.ncei.noaa.gov/pub/data/paleo/spele... \n",
@@ -2324,46 +2597,46 @@
"11 https://www.ncei.noaa.gov/pub/data/paleo/spele... \n",
"\n",
" Variables FileDescription TotalFilesAvailable SiteID \\\n",
- "0 [age, depth, delta 18O] Speleothem 2 31383 \n",
- "1 [] Speleothem 2 31383 \n",
- "2 [age, depth, delta 18O] Speleothem 2 31568 \n",
- "3 [] Speleothem 2 31568 \n",
- "4 [age, depth, delta 18O] Speleothem 2 31569 \n",
- "5 [] Speleothem 2 31569 \n",
- "6 [age, depth, delta 18O] Speleothem 2 6554 \n",
- "7 [] Speleothem 2 6554 \n",
- "8 [age, depth, delta 18O] Speleothem 2 6554 \n",
- "9 [] Speleothem 2 6554 \n",
- "10 [age, depth, delta 18O] Speleothem 2 14640 \n",
- "11 [] Speleothem 2 14640 \n",
+ "0 [age, depth, delta 18O] Speleothem 2 6554 \n",
+ "1 [] Speleothem 2 6554 \n",
+ "2 [age, depth, delta 18O] Speleothem 2 6554 \n",
+ "3 [] Speleothem 2 6554 \n",
+ "4 [age, depth, delta 18O] Speleothem 2 14640 \n",
+ "5 [] Speleothem 2 14640 \n",
+ "6 [age, depth, delta 18O] Speleothem 2 31383 \n",
+ "7 [] Speleothem 2 31383 \n",
+ "8 [age, depth, delta 18O] Speleothem 2 31568 \n",
+ "9 [] Speleothem 2 31568 \n",
+ "10 [age, depth, delta 18O] Speleothem 2 31569 \n",
+ "11 [] Speleothem 2 31569 \n",
"\n",
" SiteName LocationName Latitude \\\n",
- "0 Hoti Cave Continent>Asia>Western Asia>Middle East>Oman 23.08 \n",
- "1 Hoti Cave Continent>Asia>Western Asia>Middle East>Oman 23.08 \n",
- "2 Padre Cave Continent>South America>Brazil -13.2167 \n",
- "3 Padre Cave Continent>South America>Brazil -13.2167 \n",
- "4 Paixão Cave Continent>South America>Brazil -12.65 \n",
- "5 Paixão Cave Continent>South America>Brazil -12.65 \n",
- "6 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n",
- "7 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n",
- "8 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n",
- "9 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n",
- "10 Qunf Cave Continent>Asia>Western Asia>Middle East>Oman 17.17 \n",
- "11 Qunf Cave Continent>Asia>Western Asia>Middle East>Oman 17.17 \n",
+ "0 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n",
+ "1 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n",
+ "2 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n",
+ "3 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n",
+ "4 Qunf Cave Continent>Asia>Western Asia>Middle East>Oman 17.17 \n",
+ "5 Qunf Cave Continent>Asia>Western Asia>Middle East>Oman 17.17 \n",
+ "6 Hoti Cave Continent>Asia>Western Asia>Middle East>Oman 23.08 \n",
+ "7 Hoti Cave Continent>Asia>Western Asia>Middle East>Oman 23.08 \n",
+ "8 Padre Cave Continent>South America>Brazil -13.2167 \n",
+ "9 Padre Cave Continent>South America>Brazil -13.2167 \n",
+ "10 Paixão Cave Continent>South America>Brazil -12.65 \n",
+ "11 Paixão Cave Continent>South America>Brazil -12.65 \n",
"\n",
" Longitude MinElevation MaxElevation StudyID \\\n",
- "0 57.35 800 800 9957 \n",
- "1 57.35 800 800 9957 \n",
- "2 -44.05 650 800 9957 \n",
- "3 -44.05 650 800 9957 \n",
- "4 -41.05 650 800 9957 \n",
- "5 -41.05 650 800 9957 \n",
- "6 108.08 680 680 9957 \n",
- "7 108.08 680 680 9957 \n",
- "8 108.08 680 680 9957 \n",
- "9 108.08 680 680 9957 \n",
- "10 54.3 650 650 9957 \n",
- "11 54.3 650 650 9957 \n",
+ "0 108.08 680 680 9957 \n",
+ "1 108.08 680 680 9957 \n",
+ "2 108.08 680 680 9957 \n",
+ "3 108.08 680 680 9957 \n",
+ "4 54.3 650 650 9957 \n",
+ "5 54.3 650 650 9957 \n",
+ "6 57.35 800 800 9957 \n",
+ "7 57.35 800 800 9957 \n",
+ "8 -44.05 650 800 9957 \n",
+ "9 -44.05 650 800 9957 \n",
+ "10 -41.05 650 800 9957 \n",
+ "11 -41.05 650 800 9957 \n",
"\n",
" StudyName \n",
"0 8.2k Event Speleothem Oxygen Isotope Data \n",
@@ -3879,8 +4152,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:1067: UserWarning: Attached 'https://www.ncei.noaa.gov/pub/data/paleo/climate_forcing/trace_gases/mcelwain1995co2.txt' is not linked to any parent study; can not add metadata.\n",
- " warnings.warn(\n"
+ "[2026-03-24 12:42:32,114][WARNING] - Attached 'https://www.ncei.noaa.gov/pub/data/paleo/climate_forcing/trace_gases/mcelwain1995co2.txt' is not linked to any parent study; can not add metadata.\n"
]
}
],
@@ -4079,8 +4351,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:1067: UserWarning: Attached 'https://www.ncei.noaa.gov/pub/data/paleo/reconstructions/climate12k/temperature/version1.0.0/Temp12k_directory_LiPD_files/AdelaideTarn.Jara.2015.lpd' is not linked to any parent study; can not add metadata.\n",
- " warnings.warn(\n"
+ "[2026-03-24 12:42:33,338][WARNING] - Attached 'https://www.ncei.noaa.gov/pub/data/paleo/reconstructions/climate12k/temperature/version1.0.0/Temp12k_directory_LiPD_files/AdelaideTarn.Jara.2015.lpd' is not linked to any parent study; can not add metadata.\n"
]
},
{
@@ -4090,9 +4361,9 @@
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mUnsupportedFileTypeError\u001b[39m Traceback (most recent call last)",
- "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[39]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m error_ds = pt.Dataset()\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43merror_ds\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_urls\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhttps://www.ncei.noaa.gov/pub/data/paleo/reconstructions/climate12k/temperature/version1.0.0/Temp12k_directory_LiPD_files/AdelaideTarn.Jara.2015.lpd\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
- "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:1071\u001b[39m, in \u001b[36mDataset.get_data\u001b[39m\u001b[34m(self, dataTableIDs, file_urls)\u001b[39m\n\u001b[32m 1066\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m mapping:\n\u001b[32m 1067\u001b[39m warnings.warn(\n\u001b[32m 1068\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mAttached \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is not linked to any parent study; can not add metadata.\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 1069\u001b[39m \u001b[38;5;167;01mUserWarning\u001b[39;00m\n\u001b[32m 1070\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1071\u001b[39m dfs.extend(\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_process_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 1072\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1073\u001b[39m mapping_details = \u001b[38;5;28mself\u001b[39m.data_table_index.get(mapping)\n",
- "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:890\u001b[39m, in \u001b[36mDataset._process_file\u001b[39m\u001b[34m(self, file_url, mapping)\u001b[39m\n\u001b[32m 888\u001b[39m file_type = file_url.split(\u001b[33m'\u001b[39m\u001b[33m.\u001b[39m\u001b[33m'\u001b[39m)[-\u001b[32m1\u001b[39m].lower()\n\u001b[32m 889\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m._PROPRIETARY_TYPES:\n\u001b[32m--> \u001b[39m\u001b[32m890\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 891\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mpyleotups works with .txt files only. File type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is proprietary.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 892\u001b[39m )\n\u001b[32m 893\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type != \u001b[33m'\u001b[39m\u001b[33mtxt\u001b[39m\u001b[33m'\u001b[39m:\n\u001b[32m 894\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 895\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mInvalid file type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m. Only .txt files are supported.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 896\u001b[39m )\n",
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[20]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m error_ds = pt.NOAADataset()\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43merror_ds\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_urls\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhttps://www.ncei.noaa.gov/pub/data/paleo/reconstructions/climate12k/temperature/version1.0.0/Temp12k_directory_LiPD_files/AdelaideTarn.Jara.2015.lpd\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
+ "\u001b[36mFile \u001b[39m\u001b[32m~\\OneDrive\\Desktop\\pyelotups\\pyleotups\\core\\NOAADataset.py:1014\u001b[39m, in \u001b[36mNOAADataset.get_data\u001b[39m\u001b[34m(self, dataTableIDs, file_urls)\u001b[39m\n\u001b[32m 1012\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m mapping:\n\u001b[32m 1013\u001b[39m log.warning(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mAttached \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is not linked to any parent study; can not add metadata.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1014\u001b[39m dfs.extend(\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_process_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 1015\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1016\u001b[39m mapping_details = \u001b[38;5;28mself\u001b[39m.data_table_index.get(mapping)\n",
+ "\u001b[36mFile \u001b[39m\u001b[32m~\\OneDrive\\Desktop\\pyelotups\\pyleotups\\core\\NOAADataset.py:841\u001b[39m, in \u001b[36mNOAADataset._process_file\u001b[39m\u001b[34m(self, file_url, mapping)\u001b[39m\n\u001b[32m 839\u001b[39m file_type = file_url.split(\u001b[33m'\u001b[39m\u001b[33m.\u001b[39m\u001b[33m'\u001b[39m)[-\u001b[32m1\u001b[39m].lower()\n\u001b[32m 840\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m._PROPRIETARY_TYPES:\n\u001b[32m--> \u001b[39m\u001b[32m841\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 842\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mpyleotups works with .txt files only. File type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is proprietary.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 843\u001b[39m )\n\u001b[32m 844\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type != \u001b[33m'\u001b[39m\u001b[33mtxt\u001b[39m\u001b[33m'\u001b[39m:\n\u001b[32m 845\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 846\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mInvalid file type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m. Only .txt files are supported.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 847\u001b[39m )\n",
"\u001b[31mUnsupportedFileTypeError\u001b[39m: pyleotups works with .txt files only. File type 'lpd' is proprietary."
]
}
@@ -4113,8 +4384,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:1067: UserWarning: Attached 'https://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/frank1999/frank1999.xls' is not linked to any parent study; can not add metadata.\n",
- " warnings.warn(\n"
+ "[2026-03-24 12:42:40,758][WARNING] - Attached 'https://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/frank1999/frank1999.xls' is not linked to any parent study; can not add metadata.\n"
]
},
{
@@ -4124,9 +4394,9 @@
"traceback": [
"\u001b[31m---------------------------------------------------------------------------\u001b[39m",
"\u001b[31mUnsupportedFileTypeError\u001b[39m Traceback (most recent call last)",
- "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[40]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43merror_ds\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_urls\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhttps://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/frank1999/frank1999.xls\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
- "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:1071\u001b[39m, in \u001b[36mDataset.get_data\u001b[39m\u001b[34m(self, dataTableIDs, file_urls)\u001b[39m\n\u001b[32m 1066\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m mapping:\n\u001b[32m 1067\u001b[39m warnings.warn(\n\u001b[32m 1068\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mAttached \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is not linked to any parent study; can not add metadata.\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 1069\u001b[39m \u001b[38;5;167;01mUserWarning\u001b[39;00m\n\u001b[32m 1070\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1071\u001b[39m dfs.extend(\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_process_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 1072\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1073\u001b[39m mapping_details = \u001b[38;5;28mself\u001b[39m.data_table_index.get(mapping)\n",
- "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:894\u001b[39m, in \u001b[36mDataset._process_file\u001b[39m\u001b[34m(self, file_url, mapping)\u001b[39m\n\u001b[32m 890\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 891\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mpyleotups works with .txt files only. File type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is proprietary.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 892\u001b[39m )\n\u001b[32m 893\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type != \u001b[33m'\u001b[39m\u001b[33mtxt\u001b[39m\u001b[33m'\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m894\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 895\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mInvalid file type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m. Only .txt files are supported.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 896\u001b[39m )\n\u001b[32m 898\u001b[39m \u001b[38;5;66;03m# Step 1: Detect parser type by reading initial lines\u001b[39;00m\n\u001b[32m 899\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdetect_parser_type\u001b[39m(lines):\n\u001b[32m 900\u001b[39m \u001b[38;5;66;03m# 1. Clean lines: strip whitespace and remove empty lines\u001b[39;00m\n",
+ "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[21]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43merror_ds\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_urls\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhttps://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/frank1999/frank1999.xls\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n",
+ "\u001b[36mFile \u001b[39m\u001b[32m~\\OneDrive\\Desktop\\pyelotups\\pyleotups\\core\\NOAADataset.py:1014\u001b[39m, in \u001b[36mNOAADataset.get_data\u001b[39m\u001b[34m(self, dataTableIDs, file_urls)\u001b[39m\n\u001b[32m 1012\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m mapping:\n\u001b[32m 1013\u001b[39m log.warning(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mAttached \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is not linked to any parent study; can not add metadata.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1014\u001b[39m dfs.extend(\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_process_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 1015\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1016\u001b[39m mapping_details = \u001b[38;5;28mself\u001b[39m.data_table_index.get(mapping)\n",
+ "\u001b[36mFile \u001b[39m\u001b[32m~\\OneDrive\\Desktop\\pyelotups\\pyleotups\\core\\NOAADataset.py:845\u001b[39m, in \u001b[36mNOAADataset._process_file\u001b[39m\u001b[34m(self, file_url, mapping)\u001b[39m\n\u001b[32m 841\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 842\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mpyleotups works with .txt files only. File type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is proprietary.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 843\u001b[39m )\n\u001b[32m 844\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type != \u001b[33m'\u001b[39m\u001b[33mtxt\u001b[39m\u001b[33m'\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m845\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 846\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mInvalid file type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m. Only .txt files are supported.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 847\u001b[39m )\n\u001b[32m 849\u001b[39m \u001b[38;5;66;03m# Step 1: Detect parser type by reading initial lines\u001b[39;00m\n\u001b[32m 850\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdetect_parser_type\u001b[39m(lines):\n\u001b[32m 851\u001b[39m \u001b[38;5;66;03m# 1. Clean lines: strip whitespace and remove empty lines\u001b[39;00m\n",
"\u001b[31mUnsupportedFileTypeError\u001b[39m: Invalid file type 'xls'. Only .txt files are supported."
]
}
@@ -4159,7 +4429,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2025-12-11 10:50:30,372][INFO] - search_studies: Limit set to 10.\n"
+ "[2026-03-24 12:42:51,327][INFO] - search_studies: Limit set to 10.\n",
+ "[2026-03-24 12:42:51,330][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n"
]
},
{
@@ -4174,7 +4445,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Parsing NOAA studies: 100%|██████████| 10/10 [00:00<00:00, 10559.68it/s]"
+ "Parsing NOAA studies: 100%|██████████| 10/10 [00:00<00:00, 1539.14it/s]\n",
+ "[2026-03-24 12:42:52,372][WARNING] - Retrieved 10 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n",
+ "[2026-03-24 12:42:52,376][INFO] - Retrieved 10 studies.\n"
]
},
{
@@ -4184,13 +4457,6 @@
"Current studies in dataset: 10\n"
]
},
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- },
{
"data": {
"text/html": [
@@ -4220,6 +4486,7 @@
" MostRecentYearBP | \n",
" EarliestYearCE | \n",
" MostRecentYearCE | \n",
+ " Coverage [S, N, W, E] | \n",
" StudyNotes | \n",
" ScienceKeywords | \n",
" Investigators | \n",
@@ -4239,9 +4506,10 @@
" -45.0 | \n",
" 1000.0 | \n",
" 1995.0 | \n",
+ " (0.0, 90.0, -180.0, 180.0) | \n",
" Calibration ensemble reconstructions of existi... | \n",
" [carbon cycle, sensitivity, Air Temperature Re... | \n",
- " David Frank, Valerie Trouet, Jan Esper, Christ... | \n",
+ " David Frank, Jan Esper, Christoph Raible, Ulf ... | \n",
" [{'Author': 'Frank, D.C., J. Esper, C.C. Raibl... | \n",
" [[{'DataTableID': '19235', 'DataTableName': 'F... | \n",
" [{'fundingAgency': 'Swiss National Science Fou... | \n",
@@ -4256,6 +4524,7 @@
" -46.0 | \n",
" 1000.0 | \n",
" 1996.0 | \n",
+ " (56.66667, 69.48333, -18.1957, 18.36667) | \n",
" A set of reconstructions of sea surface temper... | \n",
" None | \n",
" Laura Cunningham, William Austin, Karen-Luise ... | \n",
@@ -4273,9 +4542,10 @@
" -55.0 | \n",
" 1000.0 | \n",
" 2005.0 | \n",
+ " (0.0, 90.0, -180.0, 180.0) | \n",
" None | \n",
" [Atmospheric and Oceanic Circulation Patterns ... | \n",
- " Kai Kornhuber, Ellie Broadman, Valerie Trouet | \n",
+ " Ellie Broadman, Valerie Trouet, Kai Kornhuber | \n",
" [{'Author': 'Broadman, Ellie, Kai Kornhuber, I... | \n",
" [[{'DataTableID': '56946', 'DataTableName': 'W... | \n",
" [{'fundingAgency': 'US National Science Founda... | \n",
@@ -4290,6 +4560,7 @@
" -60.0 | \n",
" 800.0 | \n",
" 2010.0 | \n",
+ " (0.0, 70.0, -80.0, 0.0) | \n",
" Summer (May-September) Atlantic Multidecadal V... | \n",
" [Atmospheric and Oceanic Circulation Patterns ... | \n",
" Jianglin Wang, Bao Yang, Fredrik Ljungqvist, J... | \n",
@@ -4307,9 +4578,10 @@
" -5.0 | \n",
" 50.0 | \n",
" 1955.0 | \n",
+ " (-90.0, 90.0, -180.0, 180.0) | \n",
" Reconstruction of a precipitation-based Southe... | \n",
" [Atmospheric and Oceanic Circulation Patterns ... | \n",
- " Liguang Sun, Yuhong Wang, Wen Huang, Shican Qi... | \n",
+ " Hong Yan, Liguang Sun, Yuhong Wang, Wen Huang,... | \n",
" [{'Author': 'Yan, H., L. Sun, Y. Wang, W. Huan... | \n",
" [[{'DataTableID': '20526', 'DataTableName': 'S... | \n",
" [{'fundingAgency': 'National Natural Science F... | \n",
@@ -4324,6 +4596,7 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " (70.0, 90.0, -180.0, 180.0) | \n",
" Provided Keywords: protactinium-231, 231Pa, th... | \n",
" None | \n",
" Lauren Kipp, Jerry McManus, Markus Kienast | \n",
@@ -4341,6 +4614,7 @@
" -61.0 | \n",
" 1400.0 | \n",
" 2011.0 | \n",
+ " (45.0, 85.0, -180.0, 180.0) | \n",
" Ensemble Climate Reconstructions, input data f... | \n",
" [Air Temperature Reconstruction] | \n",
" Martin Tingley, Peter Huybers | \n",
@@ -4358,9 +4632,10 @@
" -27.0 | \n",
" -7439.0 | \n",
" 1977.0 | \n",
+ " (-90.0, 90.0, -180.0, 180.0) | \n",
" Records of common production rate of cosmogeni... | \n",
" [Solar Forcing Reconstruction] | \n",
- " Irene Brunner, Marcus Christl, Hubertus Fische... | \n",
+ " Friedhelm Steinhilber, Jose Abreu, Jürg Beer, ... | \n",
" [{'Author': 'Steinhilber, F., J.A. Abreu, J. B... | \n",
" [[{'DataTableID': '21230', 'DataTableName': 'T... | \n",
" [{'fundingAgency': 'Swiss National Science Fou... | \n",
@@ -4375,12 +4650,13 @@
" -52.0 | \n",
" -1026.0 | \n",
" 2002.0 | \n",
+ " (64.63805, 64.64305, -19.85912, -19.83995) | \n",
" None | \n",
" [Medieval Warm Period, Little Ice Age (LIA), A... | \n",
" Darren Larsen, Gifford Miller, Áslaug Geirsdót... | \n",
" [{'Author': 'Larsen, D.J., Miller, G.H., Geirs... | \n",
" [[{'DataTableID': '24775', 'DataTableName': 'H... | \n",
- " [] | \n",
+ " [{'fundingAgency': 'US National Science Founda... | \n",
" \n",
" \n",
" | 9 | \n",
@@ -4392,6 +4668,7 @@
" NaN | \n",
" NaN | \n",
" NaN | \n",
+ " (68.433742, 68.433742, -159.173633, -159.173633) | \n",
" None | \n",
" [Arctic, temperature, precipitation] | \n",
" Joseph Stoner, Mark Abbott, Jason Dorfman | \n",
@@ -4428,17 +4705,29 @@
"8 PALEOLIMNOLOGY 2976.0 -52.0 -1026.0 \n",
"9 PALEOLIMNOLOGY NaN NaN NaN \n",
"\n",
- " MostRecentYearCE StudyNotes \\\n",
- "0 1995.0 Calibration ensemble reconstructions of existi... \n",
- "1 1996.0 A set of reconstructions of sea surface temper... \n",
- "2 2005.0 None \n",
- "3 2010.0 Summer (May-September) Atlantic Multidecadal V... \n",
- "4 1955.0 Reconstruction of a precipitation-based Southe... \n",
- "5 NaN Provided Keywords: protactinium-231, 231Pa, th... \n",
- "6 2011.0 Ensemble Climate Reconstructions, input data f... \n",
- "7 1977.0 Records of common production rate of cosmogeni... \n",
- "8 2002.0 None \n",
- "9 NaN None \n",
+ " MostRecentYearCE Coverage [S, N, W, E] \\\n",
+ "0 1995.0 (0.0, 90.0, -180.0, 180.0) \n",
+ "1 1996.0 (56.66667, 69.48333, -18.1957, 18.36667) \n",
+ "2 2005.0 (0.0, 90.0, -180.0, 180.0) \n",
+ "3 2010.0 (0.0, 70.0, -80.0, 0.0) \n",
+ "4 1955.0 (-90.0, 90.0, -180.0, 180.0) \n",
+ "5 NaN (70.0, 90.0, -180.0, 180.0) \n",
+ "6 2011.0 (45.0, 85.0, -180.0, 180.0) \n",
+ "7 1977.0 (-90.0, 90.0, -180.0, 180.0) \n",
+ "8 2002.0 (64.63805, 64.64305, -19.85912, -19.83995) \n",
+ "9 NaN (68.433742, 68.433742, -159.173633, -159.173633) \n",
+ "\n",
+ " StudyNotes \\\n",
+ "0 Calibration ensemble reconstructions of existi... \n",
+ "1 A set of reconstructions of sea surface temper... \n",
+ "2 None \n",
+ "3 Summer (May-September) Atlantic Multidecadal V... \n",
+ "4 Reconstruction of a precipitation-based Southe... \n",
+ "5 Provided Keywords: protactinium-231, 231Pa, th... \n",
+ "6 Ensemble Climate Reconstructions, input data f... \n",
+ "7 Records of common production rate of cosmogeni... \n",
+ "8 None \n",
+ "9 None \n",
"\n",
" ScienceKeywords \\\n",
"0 [carbon cycle, sensitivity, Air Temperature Re... \n",
@@ -4453,14 +4742,14 @@
"9 [Arctic, temperature, precipitation] \n",
"\n",
" Investigators \\\n",
- "0 David Frank, Valerie Trouet, Jan Esper, Christ... \n",
+ "0 David Frank, Jan Esper, Christoph Raible, Ulf ... \n",
"1 Laura Cunningham, William Austin, Karen-Luise ... \n",
- "2 Kai Kornhuber, Ellie Broadman, Valerie Trouet \n",
+ "2 Ellie Broadman, Valerie Trouet, Kai Kornhuber \n",
"3 Jianglin Wang, Bao Yang, Fredrik Ljungqvist, J... \n",
- "4 Liguang Sun, Yuhong Wang, Wen Huang, Shican Qi... \n",
+ "4 Hong Yan, Liguang Sun, Yuhong Wang, Wen Huang,... \n",
"5 Lauren Kipp, Jerry McManus, Markus Kienast \n",
"6 Martin Tingley, Peter Huybers \n",
- "7 Irene Brunner, Marcus Christl, Hubertus Fische... \n",
+ "7 Friedhelm Steinhilber, Jose Abreu, Jürg Beer, ... \n",
"8 Darren Larsen, Gifford Miller, Áslaug Geirsdót... \n",
"9 Joseph Stoner, Mark Abbott, Jason Dorfman \n",
"\n",
@@ -4497,7 +4786,7 @@
"5 [] \n",
"6 [{'fundingAgency': 'US National Science Founda... \n",
"7 [{'fundingAgency': 'Swiss National Science Fou... \n",
- "8 [] \n",
+ "8 [{'fundingAgency': 'US National Science Founda... \n",
"9 [{'fundingAgency': 'US National Science Founda... "
]
},
@@ -4526,7 +4815,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2025-12-11 10:50:47,008][INFO] - search_studies: Limit set to 10.\n"
+ "[2026-03-24 12:42:52,631][INFO] - search_studies: Limit set to 10.\n",
+ "[2026-03-24 12:42:52,634][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n"
]
},
{
@@ -4542,7 +4832,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Parsing NOAA studies: 100%|██████████| 10/10 [00:00<00:00, 5585.70it/s]"
+ "Parsing NOAA studies: 100%|██████████| 10/10 [00:00<00:00, 491.82it/s]\n",
+ "[2026-03-24 12:42:53,591][WARNING] - Retrieved 10 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n",
+ "[2026-03-24 12:42:53,594][INFO] - Retrieved 10 studies.\n"
]
},
{
@@ -4552,13 +4844,6 @@
"Current studies in dataset: 10\n"
]
},
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "\n"
- ]
- },
{
"data": {
"text/html": [
@@ -4588,6 +4873,7 @@
" MostRecentYearBP | \n",
" EarliestYearCE | \n",
" MostRecentYearCE | \n",
+ " Coverage [S, N, W, E] | \n",
" StudyNotes | \n",
" ScienceKeywords | \n",
" Investigators | \n",
@@ -4607,6 +4893,7 @@
" -40 | \n",
" 1961 | \n",
" 1990 | \n",
+ " (-90.0, 90.0, -180.0, 180.0) | \n",
" Matlab code for two-factor (location and year)... | \n",
" None | \n",
" Martin Tingley | \n",
@@ -4624,12 +4911,13 @@
" -53 | \n",
" 1772 | \n",
" 2003 | \n",
- " None | \n",
+ " (65.9811, 66.0119, -46.5511, -42.7889) | \n",
+ " Melt layer stratigraphy for 4 Greenland Arctic... | \n",
" None | \n",
" Sarah Das | \n",
" [] | \n",
" [[{'DataTableID': '12448', 'DataTableName': 'A... | \n",
- " [] | \n",
+ " [{'fundingAgency': 'US National Science Founda... | \n",
"
\n",
" \n",
" | 2 | \n",
@@ -4641,6 +4929,7 @@
" -52 | \n",
" 1772 | \n",
" 2002 | \n",
+ " (66.0119, 66.0119, -45.158, -45.158) | \n",
" None | \n",
" None | \n",
" Joseph McConnell, Ross Edwards | \n",
@@ -4658,8 +4947,9 @@
" -56 | \n",
" -8350 | \n",
" 2006 | \n",
+ " (64.613723, 65.055846, -21.625757, -19.843769) | \n",
" Keywords - Iceland, Lake sediment, Holocene pa... | \n",
- " [Arctic, abrupt climate change, Little Ice Age... | \n",
+ " [Medieval Warm Period, Arctic, abrupt climate ... | \n",
" Áslaug Geirsdóttir, Gifford Miller, Darren Lar... | \n",
" [{'Author': 'Geirsdóttir Á., G.H. Miller, D.J.... | \n",
" [[{'DataTableID': '26370', 'DataTableName': 'H... | \n",
@@ -4675,8 +4965,9 @@
" -65 | \n",
" 1966 | \n",
" 2015 | \n",
+ " (68.375, 68.375, -149.295, -149.295) | \n",
" NOAA Template Raw Measurements file added 2019... | \n",
- " [thin red willow, diamondleaf willow, Salix pu... | \n",
+ " [Tealeaf Willow, thin red willow, diamondleaf ... | \n",
" Daniel Ackerman, R. Daniel Griffin, Sarah Hobb... | \n",
" [{'Author': 'Daniel E. Ackerman, Daniel Griffi... | \n",
" [[{'DataTableID': '35733', 'DataTableName': 'A... | \n",
@@ -4692,6 +4983,7 @@
" -66 | \n",
" 1968 | \n",
" 2016 | \n",
+ " (69.99, 69.99, -153.04, -153.04) | \n",
" NOAA Template Raw Measurements file added 2019... | \n",
" [SAPC, Salix pulchra Cham., Tealeaf Willow, di... | \n",
" Daniel Ackerman, R. Daniel Griffin, Sarah Hobb... | \n",
@@ -4709,6 +5001,7 @@
" -66 | \n",
" 1974 | \n",
" 2016 | \n",
+ " (69.99, 69.99, -153.04, -153.04) | \n",
" NOAA Template Raw Measurements file added 2019... | \n",
" [Tealeaf Willow, thin red willow, Salix pulchr... | \n",
" Daniel Ackerman, R. Daniel Griffin, Sarah Hobb... | \n",
@@ -4726,6 +5019,7 @@
" -66 | \n",
" 1962 | \n",
" 2016 | \n",
+ " (68.641, 68.641, -149.614, -149.614) | \n",
" NOAA Template Raw Measurements file added 2019... | \n",
" [diamondleaf willow, thin red willow, Tealeaf ... | \n",
" Daniel Ackerman, R. Daniel Griffin, Sarah Hobb... | \n",
@@ -4743,6 +5037,7 @@
" -65 | \n",
" 1972 | \n",
" 2015 | \n",
+ " (68.662, 68.662, -149.43, -149.43) | \n",
" Each sample represents mean of 4 radii measure... | \n",
" [thin red willow, SAPC, Tealeaf Willow, diamon... | \n",
" Daniel Ackerman, R. Daniel Griffin, Sarah Hobb... | \n",
@@ -4760,8 +5055,9 @@
" -65 | \n",
" 1965 | \n",
" 2015 | \n",
+ " (68.662, 68.662, -149.428, -149.428) | \n",
" Each sample represents mean of 4 radii measure... | \n",
- " [SAPC, Salix pulchra Cham., Tealeaf Willow, th... | \n",
+ " [diamondleaf willow, SAPC, Salix pulchra Cham.... | \n",
" Daniel Ackerman, R. Daniel Griffin, Sarah Hobb... | \n",
" [{'Author': 'Daniel Ackerman, Daniel Griffin, ... | \n",
" [[{'DataTableID': '33152', 'DataTableName': 'A... | \n",
@@ -4796,29 +5092,41 @@
"8 TREE RING -22 -65 1972 \n",
"9 TREE RING -15 -65 1965 \n",
"\n",
- " MostRecentYearCE StudyNotes \\\n",
- "0 1990 Matlab code for two-factor (location and year)... \n",
- "1 2003 None \n",
- "2 2002 None \n",
- "3 2006 Keywords - Iceland, Lake sediment, Holocene pa... \n",
- "4 2015 NOAA Template Raw Measurements file added 2019... \n",
- "5 2016 NOAA Template Raw Measurements file added 2019... \n",
- "6 2016 NOAA Template Raw Measurements file added 2019... \n",
- "7 2016 NOAA Template Raw Measurements file added 2019... \n",
- "8 2015 Each sample represents mean of 4 radii measure... \n",
- "9 2015 Each sample represents mean of 4 radii measure... \n",
+ " MostRecentYearCE Coverage [S, N, W, E] \\\n",
+ "0 1990 (-90.0, 90.0, -180.0, 180.0) \n",
+ "1 2003 (65.9811, 66.0119, -46.5511, -42.7889) \n",
+ "2 2002 (66.0119, 66.0119, -45.158, -45.158) \n",
+ "3 2006 (64.613723, 65.055846, -21.625757, -19.843769) \n",
+ "4 2015 (68.375, 68.375, -149.295, -149.295) \n",
+ "5 2016 (69.99, 69.99, -153.04, -153.04) \n",
+ "6 2016 (69.99, 69.99, -153.04, -153.04) \n",
+ "7 2016 (68.641, 68.641, -149.614, -149.614) \n",
+ "8 2015 (68.662, 68.662, -149.43, -149.43) \n",
+ "9 2015 (68.662, 68.662, -149.428, -149.428) \n",
+ "\n",
+ " StudyNotes \\\n",
+ "0 Matlab code for two-factor (location and year)... \n",
+ "1 Melt layer stratigraphy for 4 Greenland Arctic... \n",
+ "2 None \n",
+ "3 Keywords - Iceland, Lake sediment, Holocene pa... \n",
+ "4 NOAA Template Raw Measurements file added 2019... \n",
+ "5 NOAA Template Raw Measurements file added 2019... \n",
+ "6 NOAA Template Raw Measurements file added 2019... \n",
+ "7 NOAA Template Raw Measurements file added 2019... \n",
+ "8 Each sample represents mean of 4 radii measure... \n",
+ "9 Each sample represents mean of 4 radii measure... \n",
"\n",
" ScienceKeywords \\\n",
"0 None \n",
"1 None \n",
"2 None \n",
- "3 [Arctic, abrupt climate change, Little Ice Age... \n",
- "4 [thin red willow, diamondleaf willow, Salix pu... \n",
+ "3 [Medieval Warm Period, Arctic, abrupt climate ... \n",
+ "4 [Tealeaf Willow, thin red willow, diamondleaf ... \n",
"5 [SAPC, Salix pulchra Cham., Tealeaf Willow, di... \n",
"6 [Tealeaf Willow, thin red willow, Salix pulchr... \n",
"7 [diamondleaf willow, thin red willow, Tealeaf ... \n",
"8 [thin red willow, SAPC, Tealeaf Willow, diamon... \n",
- "9 [SAPC, Salix pulchra Cham., Tealeaf Willow, th... \n",
+ "9 [diamondleaf willow, SAPC, Salix pulchra Cham.... \n",
"\n",
" Investigators \\\n",
"0 Martin Tingley \n",
@@ -4858,7 +5166,7 @@
"\n",
" Funding \n",
"0 [] \n",
- "1 [] \n",
+ "1 [{'fundingAgency': 'US National Science Founda... \n",
"2 [] \n",
"3 [{'fundingAgency': 'US National Science Founda... \n",
"4 [] \n",
@@ -4914,7 +5222,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2025-12-11 10:53:15,987][INFO] - search_studies: Limit set to 5.\n"
+ "[2026-03-24 12:42:53,830][INFO] - search_studies: Limit set to 5.\n",
+ "[2026-03-24 12:42:53,833][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n"
]
},
{
@@ -4929,9 +5238,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Parsing NOAA studies: 100%|██████████| 5/5 [00:00<00:00, 15252.01it/s]\n",
- "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:501: UserWarning: Retrieved 5 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n",
- " warnings.warn(\n"
+ "Parsing NOAA studies: 100%|██████████| 5/5 [00:00<00:00, 4960.15it/s]\n",
+ "[2026-03-24 12:42:54,640][WARNING] - Retrieved 5 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n",
+ "[2026-03-24 12:42:54,643][INFO] - Retrieved 5 studies.\n"
]
},
{
@@ -5005,7 +5314,8 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2025-12-11 10:53:16,630][INFO] - search_studies: Limit set to 5.\n"
+ "[2026-03-24 12:42:54,666][INFO] - search_studies: Limit set to 5.\n",
+ "[2026-03-24 12:42:54,667][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n"
]
},
{
@@ -5020,9 +5330,9 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "Parsing NOAA studies: 100%|██████████| 5/5 [00:00<00:00, 2753.61it/s]\n",
- "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:501: UserWarning: Retrieved 5 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n",
- " warnings.warn(\n"
+ "Parsing NOAA studies: 100%|██████████| 5/5 [00:00<00:00, 1001.94it/s]\n",
+ "[2026-03-24 12:42:55,547][WARNING] - Retrieved 5 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n",
+ "[2026-03-24 12:42:55,549][INFO] - Retrieved 5 studies.\n"
]
},
{
@@ -5622,7 +5932,15 @@
"name": "python3"
},
"language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
"name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
diff --git a/examples/tutorial-pangaea.ipynb b/examples/tutorial-pangaea.ipynb
index b6dcb5a5..2f4c1695 100644
--- a/examples/tutorial-pangaea.ipynb
+++ b/examples/tutorial-pangaea.ipynb
@@ -247,16 +247,41 @@
"execution_count": null,
"id": "1a99f03e",
"metadata": {},
- "outputs": [],
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[2026-03-24 12:40:26,623][INFO] - Retrived 10 studies\n",
+ "[2026-03-24 12:40:26,629][WARNING] - The search contains dataset(s) [830589, 897517] marked as collection. Refer to the 'CollectionMembers' column toidentify respective child datasets.\n"
+ ]
+ }
+ ],
"source": [
"ds = dataset.search_studies(\n",
- " q=\"delta 180\",\n",
+ " q=\"Khider, D.\",\n",
" # bbox=(-10, -10, 10, 10), #(min_lon, min_lat, max_lon, max_lat) \n",
" # keywords=[\"Sr/Ca\"],\n",
- " limit=5\n",
+ " limit=10\n",
")\n"
]
},
+ {
+ "cell_type": "markdown",
+ "id": "c4444573",
+ "metadata": {},
+ "source": [
+ "### 2. EXPLORING RESULTS "
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "b6fe20f6",
+ "metadata": {},
+ "source": [
+ "#### a. Getting a summary. "
+ ]
+ },
{
"cell_type": "markdown",
"id": "9c1fbcfb",
@@ -264,7 +289,7 @@
"source": [
"You can access the search results in two ways:\n",
"- `dataset.get_summary()` → returns a DataFrame of search results (default behavior)\n",
- "- `display=True` → immediately returns summary table \n",
+ "- ACCESS the DataFrame in return value. i.e. `ds`\n",
"\n",
"All results are stored internally in `dataset.studies`"
]
@@ -279,10 +304,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
- "[2026-03-04 12:52:48,092][WARNING] - Data set is of type collection, please select one of its child datasets - \n",
- "[2026-03-04 12:52:48,880][WARNING] - Dataset is either restricted or of type \"collection\" - https://doi.org/10.1594/PANGAEA.753001\n",
- "[2026-03-04 12:52:54,175][WARNING] - Data set is of type collection, please select one of its child datasets - \n",
- "[2026-03-04 12:52:54,926][WARNING] - Dataset is either restricted or of type \"collection\" - https://doi.org/10.1594/PANGAEA.787094\n"
+ "[2026-03-24 12:40:26,647][WARNING] - The search contains dataset(s) [830589, 897517] marked as collection. Refer to the 'CollectionMembers' column toidentify respective child datasets.\n"
]
},
{
@@ -318,150 +340,246 @@
" Publications | \n",
" Sites | \n",
" Funding | \n",
- " native | \n",
+ " CollectionMembers | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
- " 10.1594/PANGAEA.753001 | \n",
- " Meteorological observations and eddy covarianc... | \n",
- " None | \n",
- " None | \n",
- " 2006-05-30T07:15:00 | \n",
- " 2006-09-19T06:15:00 | \n",
- " We present the first ecosystem-scale methane f... | \n",
- " [Arctic Tundra, atmospheric radiation, Eddy Co... | \n",
- " Sachs, Torsten, Wille, Christian, Boike, Julia... | \n",
- " Sachs, Torsten; Wille, Christian; Boike, Julia... | \n",
+ " 830589 | \n",
+ " Stable isotope record of sediment core MD98-2177 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " We present a reconstruction of El Niño Souther... | \n",
" [] | \n",
+ " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... | \n",
+ " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... | \n",
+ " [MD98-2177] | \n",
" [] | \n",
- " {'raw_uri': 'https://doi.org/10.1594/PANGAEA.7... | \n",
+ " [830586, 830587, 830588] | \n",
"
\n",
" \n",
" | 1 | \n",
- " 10.1594/PANGAEA.853952 | \n",
- " Seaweed - epiphyte - mesograzer communities we... | \n",
+ " 897517 | \n",
+ " Globigerinoides ruber sediment trap data in th... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " We present results here from a high-resolution... | \n",
+ " [] | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [897509, 897512, 897513, 897514, 897515, 897516] | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 830588 | \n",
+ " (Table 3) Lead 214 and Lead 210 concentration ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" None | \n",
+ " [] | \n",
+ " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... | \n",
+ " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... | \n",
+ " [MD98-2177] | \n",
+ " [] | \n",
" None | \n",
- " 2013-06-19T00:00:00 | \n",
- " 2014-04-12T00:00:00 | \n",
- " Rising seawater temperature and CO2 concentrat... | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 897512 | \n",
+ " Globigerinoides ruber flux analysis from a lon... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" None | \n",
- " Werner, Franziska Julie, Graiff, Angelika, Mat... | \n",
- " Werner, Franziska Julie; Graiff, Angelika; Mat... | \n",
" [] | \n",
- " [{'url': 'https://www.bioacid.de/', 'fundingGr... | \n",
- " {'raw_uri': 'https://doi.org/10.1594/PANGAEA.8... | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [GMT_Gulf_of_Mexico] | \n",
+ " [] | \n",
+ " None | \n",
"
\n",
" \n",
- " | 2 | \n",
- " 10.1594/PANGAEA.901492 | \n",
- " Beach profile data for the Elwha River Delta, ... | \n",
+ " 4 | \n",
+ " 897516 | \n",
+ " CTD data from a long-running sediment trap tim... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" None | \n",
+ " [] | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [GMT_Gulf_of_Mexico] | \n",
+ " [] | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 897514 | \n",
+ " Magnesium/Calcium ratio of Globigerinoides rub... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" None | \n",
- " 2011-05-17T02:49:53 | \n",
- " 2011-05-17T08:09:46 | \n",
- " Data were collected using GNSS survey methods,... | \n",
- " [Beach Nourishment, Elwha, Shoreline Change] | \n",
- " Miller, Ian | \n",
- " Miller, Ian (2019): Beach profile data for the... | \n",
" [] | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [GMT_Gulf_of_Mexico] | \n",
" [] | \n",
- " {'raw_uri': 'https://doi.org/10.1594/PANGAEA.9... | \n",
+ " None | \n",
"
\n",
" \n",
- " | 3 | \n",
- " 10.1594/PANGAEA.901614 | \n",
- " Beach profile data for the Elwha River Delta, ... | \n",
+ " 6 | \n",
+ " 830587 | \n",
+ " (Table 2) Age determination of sediment core M... | \n",
+ " 98.0 | \n",
+ " 1950.0 | \n",
+ " 0.0 | \n",
+ " 1852.0 | \n",
" None | \n",
+ " [] | \n",
+ " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... | \n",
+ " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... | \n",
+ " [MD98-2177] | \n",
+ " [] | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 830586 | \n",
+ " (Table S1) Stable carbon and oxygen isotope ra... | \n",
+ " 1231.0 | \n",
+ " 103.0 | \n",
+ " 704.0 | \n",
+ " 1851.0 | \n",
" None | \n",
- " 2016-10-14T08:05:19 | \n",
- " 2016-10-14T09:32:26 | \n",
- " Data were collected using GNSS survey methods,... | \n",
- " [Beach Nourishment, Elwha, Shoreline Change] | \n",
- " Miller, Ian | \n",
- " Miller, Ian (2019): Beach profile data for the... | \n",
" [] | \n",
+ " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... | \n",
+ " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... | \n",
+ " [MD98-2177] | \n",
" [] | \n",
- " {'raw_uri': 'https://doi.org/10.1594/PANGAEA.9... | \n",
+ " None | \n",
"
\n",
" \n",
- " | 4 | \n",
- " 10.1594/PANGAEA.787094 | \n",
- " Lithologic description and vertical permeabili... | \n",
+ " 8 | \n",
+ " 897509 | \n",
+ " Carbonate measurements from a long-running sed... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" None | \n",
+ " [] | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [GMT_Gulf_of_Mexico] | \n",
+ " [] | \n",
" None | \n",
- " 1998-06-24T18:30:00 | \n",
- " 1998-07-04T00:15:00 | \n",
- " Vertical permeability testing was conducted on... | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 897513 | \n",
+ " Isotpes analysis of Globigerinoides ruber from... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
" None | \n",
- " Stover, S Cheree, Screaton, Elizabeth J, Likos... | \n",
- " Stover, S Cheree; Screaton, Elizabeth J; Likos... | \n",
" [] | \n",
- " [{'url': 'https://www-odp.tamu.edu:443/', 'fun... | \n",
- " {'raw_uri': 'https://doi.org/10.1594/PANGAEA.7... | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [GMT_Gulf_of_Mexico] | \n",
+ " [] | \n",
+ " None | \n",
"
\n",
" \n",
"\n",
""
],
"text/plain": [
- " StudyID StudyName \\\n",
- "0 10.1594/PANGAEA.753001 Meteorological observations and eddy covarianc... \n",
- "1 10.1594/PANGAEA.853952 Seaweed - epiphyte - mesograzer communities we... \n",
- "2 10.1594/PANGAEA.901492 Beach profile data for the Elwha River Delta, ... \n",
- "3 10.1594/PANGAEA.901614 Beach profile data for the Elwha River Delta, ... \n",
- "4 10.1594/PANGAEA.787094 Lithologic description and vertical permeabili... \n",
- "\n",
- " EarliestYearBP MostRecentYearBP EarliestYearCE MostRecentYearCE \\\n",
- "0 None None 2006-05-30T07:15:00 2006-09-19T06:15:00 \n",
- "1 None None 2013-06-19T00:00:00 2014-04-12T00:00:00 \n",
- "2 None None 2011-05-17T02:49:53 2011-05-17T08:09:46 \n",
- "3 None None 2016-10-14T08:05:19 2016-10-14T09:32:26 \n",
- "4 None None 1998-06-24T18:30:00 1998-07-04T00:15:00 \n",
+ " StudyID StudyName EarliestYearBP \\\n",
+ "0 830589 Stable isotope record of sediment core MD98-2177 NaN \n",
+ "1 897517 Globigerinoides ruber sediment trap data in th... NaN \n",
+ "2 830588 (Table 3) Lead 214 and Lead 210 concentration ... NaN \n",
+ "3 897512 Globigerinoides ruber flux analysis from a lon... NaN \n",
+ "4 897516 CTD data from a long-running sediment trap tim... NaN \n",
+ "5 897514 Magnesium/Calcium ratio of Globigerinoides rub... NaN \n",
+ "6 830587 (Table 2) Age determination of sediment core M... 98.0 \n",
+ "7 830586 (Table S1) Stable carbon and oxygen isotope ra... 1231.0 \n",
+ "8 897509 Carbonate measurements from a long-running sed... NaN \n",
+ "9 897513 Isotpes analysis of Globigerinoides ruber from... NaN \n",
"\n",
- " StudyNotes \\\n",
- "0 We present the first ecosystem-scale methane f... \n",
- "1 Rising seawater temperature and CO2 concentrat... \n",
- "2 Data were collected using GNSS survey methods,... \n",
- "3 Data were collected using GNSS survey methods,... \n",
- "4 Vertical permeability testing was conducted on... \n",
+ " MostRecentYearBP EarliestYearCE MostRecentYearCE \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "5 NaN NaN NaN \n",
+ "6 1950.0 0.0 1852.0 \n",
+ "7 103.0 704.0 1851.0 \n",
+ "8 NaN NaN NaN \n",
+ "9 NaN NaN NaN \n",
"\n",
- " ScienceKeywords \\\n",
- "0 [Arctic Tundra, atmospheric radiation, Eddy Co... \n",
- "1 None \n",
- "2 [Beach Nourishment, Elwha, Shoreline Change] \n",
- "3 [Beach Nourishment, Elwha, Shoreline Change] \n",
- "4 None \n",
+ " StudyNotes ScienceKeywords \\\n",
+ "0 We present a reconstruction of El Niño Souther... [] \n",
+ "1 We present results here from a high-resolution... [] \n",
+ "2 None [] \n",
+ "3 None [] \n",
+ "4 None [] \n",
+ "5 None [] \n",
+ "6 None [] \n",
+ "7 None [] \n",
+ "8 None [] \n",
+ "9 None [] \n",
"\n",
" Investigators \\\n",
- "0 Sachs, Torsten, Wille, Christian, Boike, Julia... \n",
- "1 Werner, Franziska Julie, Graiff, Angelika, Mat... \n",
- "2 Miller, Ian \n",
- "3 Miller, Ian \n",
- "4 Stover, S Cheree, Screaton, Elizabeth J, Likos... \n",
+ "0 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n",
+ "1 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
+ "2 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n",
+ "3 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
+ "4 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
+ "5 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
+ "6 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n",
+ "7 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n",
+ "8 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
+ "9 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
"\n",
- " Publications Sites \\\n",
- "0 Sachs, Torsten; Wille, Christian; Boike, Julia... [] \n",
- "1 Werner, Franziska Julie; Graiff, Angelika; Mat... [] \n",
- "2 Miller, Ian (2019): Beach profile data for the... [] \n",
- "3 Miller, Ian (2019): Beach profile data for the... [] \n",
- "4 Stover, S Cheree; Screaton, Elizabeth J; Likos... [] \n",
+ " Publications Sites \\\n",
+ "0 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n",
+ "1 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [] \n",
+ "2 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n",
+ "3 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n",
+ "4 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n",
+ "5 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n",
+ "6 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n",
+ "7 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n",
+ "8 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n",
+ "9 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n",
"\n",
- " Funding \\\n",
- "0 [] \n",
- "1 [{'url': 'https://www.bioacid.de/', 'fundingGr... \n",
- "2 [] \n",
- "3 [] \n",
- "4 [{'url': 'https://www-odp.tamu.edu:443/', 'fun... \n",
- "\n",
- " native \n",
- "0 {'raw_uri': 'https://doi.org/10.1594/PANGAEA.7... \n",
- "1 {'raw_uri': 'https://doi.org/10.1594/PANGAEA.8... \n",
- "2 {'raw_uri': 'https://doi.org/10.1594/PANGAEA.9... \n",
- "3 {'raw_uri': 'https://doi.org/10.1594/PANGAEA.9... \n",
- "4 {'raw_uri': 'https://doi.org/10.1594/PANGAEA.7... "
+ " Funding CollectionMembers \n",
+ "0 [] [830586, 830587, 830588] \n",
+ "1 [] [897509, 897512, 897513, 897514, 897515, 897516] \n",
+ "2 [] None \n",
+ "3 [] None \n",
+ "4 [] None \n",
+ "5 [] None \n",
+ "6 [] None \n",
+ "7 [] None \n",
+ "8 [] None \n",
+ "9 [] None "
]
},
"execution_count": null,
@@ -478,7 +596,7 @@
"id": "599109b3",
"metadata": {},
"source": [
- "### 3. Geographic Metadata\n",
+ "#### b. Exploring Geographic Metadata\n",
"\n",
"Returns site-level geographic information where available.\n",
"\n",
@@ -518,20 +636,634 @@
" LocationName | \n",
" Latitude | \n",
" Longitude | \n",
- " MinElevation | \n",
- " MaxElevation | \n",
- " DataType | \n",
+ " Elevation | \n",
" \n",
" \n",
" \n",
+ " \n",
+ " | 0 | \n",
+ " 830589 | \n",
+ " 2013989 | \n",
+ " MD98-2177 | \n",
+ " None | \n",
+ " 1.4 | \n",
+ " 119.08 | \n",
+ " -968.0 | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 830588 | \n",
+ " 2013989 | \n",
+ " MD98-2177 | \n",
+ " None | \n",
+ " 1.4 | \n",
+ " 119.08 | \n",
+ " -968.0 | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 897512 | \n",
+ " 2901221 | \n",
+ " GMT_Gulf_of_Mexico | \n",
+ " Gulf of Mexico | \n",
+ " 27.5 | \n",
+ " -90.30 | \n",
+ " -700.0 | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 897516 | \n",
+ " 2901221 | \n",
+ " GMT_Gulf_of_Mexico | \n",
+ " Gulf of Mexico | \n",
+ " 27.5 | \n",
+ " -90.30 | \n",
+ " -700.0 | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 897514 | \n",
+ " 2901221 | \n",
+ " GMT_Gulf_of_Mexico | \n",
+ " Gulf of Mexico | \n",
+ " 27.5 | \n",
+ " -90.30 | \n",
+ " -700.0 | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 830587 | \n",
+ " 2013989 | \n",
+ " MD98-2177 | \n",
+ " None | \n",
+ " 1.4 | \n",
+ " 119.08 | \n",
+ " -968.0 | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 830586 | \n",
+ " 2013989 | \n",
+ " MD98-2177 | \n",
+ " None | \n",
+ " 1.4 | \n",
+ " 119.08 | \n",
+ " -968.0 | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 897509 | \n",
+ " 2901221 | \n",
+ " GMT_Gulf_of_Mexico | \n",
+ " Gulf of Mexico | \n",
+ " 27.5 | \n",
+ " -90.30 | \n",
+ " -700.0 | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 897513 | \n",
+ " 2901221 | \n",
+ " GMT_Gulf_of_Mexico | \n",
+ " Gulf of Mexico | \n",
+ " 27.5 | \n",
+ " -90.30 | \n",
+ " -700.0 | \n",
+ "
\n",
" \n",
"\n",
""
],
"text/plain": [
- "Empty DataFrame\n",
- "Columns: [StudyID, SiteID, SiteName, LocationName, Latitude, Longitude, MinElevation, MaxElevation, DataType]\n",
- "Index: []"
+ " StudyID SiteID SiteName LocationName Latitude Longitude \\\n",
+ "0 830589 2013989 MD98-2177 None 1.4 119.08 \n",
+ "1 830588 2013989 MD98-2177 None 1.4 119.08 \n",
+ "2 897512 2901221 GMT_Gulf_of_Mexico Gulf of Mexico 27.5 -90.30 \n",
+ "3 897516 2901221 GMT_Gulf_of_Mexico Gulf of Mexico 27.5 -90.30 \n",
+ "4 897514 2901221 GMT_Gulf_of_Mexico Gulf of Mexico 27.5 -90.30 \n",
+ "5 830587 2013989 MD98-2177 None 1.4 119.08 \n",
+ "6 830586 2013989 MD98-2177 None 1.4 119.08 \n",
+ "7 897509 2901221 GMT_Gulf_of_Mexico Gulf of Mexico 27.5 -90.30 \n",
+ "8 897513 2901221 GMT_Gulf_of_Mexico Gulf of Mexico 27.5 -90.30 \n",
+ "\n",
+ " Elevation \n",
+ "0 -968.0 \n",
+ "1 -968.0 \n",
+ "2 -700.0 \n",
+ "3 -700.0 \n",
+ "4 -700.0 \n",
+ "5 -968.0 \n",
+ "6 -968.0 \n",
+ "7 -700.0 \n",
+ "8 -700.0 "
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "geo = dataset.get_geo()\n",
+ "display(geo)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2e70d2c2",
+ "metadata": {},
+ "source": [
+ "#### c. Explore the listed Publication"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e1a552c7",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "(BibliographyData(\n",
+ " entries=OrderedCaseInsensitiveDict([\n",
+ " ('10_1594_PANGAEA_830589_1_1', Entry('misc',\n",
+ " fields=[\n",
+ " ('title', 'Stable isotope record of sediment core MD98-2177'),\n",
+ " ('year', '2011'),\n",
+ " ('doi', '10.1594/PANGAEA.830589'),\n",
+ " ('url', 'https://doi.org/10.1594/PANGAEA.830589')],\n",
+ " persons={'author': [Person('Khider'), Person('D'), Person('Stott'), Person('D, Lowell'), Person('Emile-Geay'), Person('J'), Person('Thunell'), Person('C, Robert'), Person('Hammond'), Person('E, Douglas')]})), \n",
+ " ('10_1594_PANGAEA_897517_1_2', Entry('misc',\n",
+ " fields=[\n",
+ " ('title', 'Globigerinoides ruber sediment trap data in the Gulf of Mexico'),\n",
+ " ('year', '2019'),\n",
+ " ('doi', '10.1594/PANGAEA.897517'),\n",
+ " ('url', 'https://doi.org/10.1594/PANGAEA.897517')],\n",
+ " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]})), \n",
+ " ('10_1594_PANGAEA_830588_1_3', Entry('misc',\n",
+ " fields=[\n",
+ " ('title', '(Table 3) Lead 214 and Lead 210 concentration of sediment core MD98-2177'),\n",
+ " ('year', '2011'),\n",
+ " ('doi', '10.1594/PANGAEA.830588'),\n",
+ " ('url', 'https://doi.org/10.1594/PANGAEA.830588')],\n",
+ " persons={'author': [Person('Khider'), Person('D'), Person('Stott'), Person('D, Lowell'), Person('Emile-Geay'), Person('J'), Person('Thunell'), Person('C, Robert'), Person('Hammond'), Person('E, Douglas')]})), \n",
+ " ('10_1594_PANGAEA_897512_1_4', Entry('misc',\n",
+ " fields=[\n",
+ " ('title', 'Globigerinoides ruber flux analysis from a long-running sediment trap time series from the northern Gulf of Mexico'),\n",
+ " ('year', '2019'),\n",
+ " ('doi', '10.1594/PANGAEA.897512'),\n",
+ " ('url', 'https://doi.org/10.1594/PANGAEA.897512')],\n",
+ " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]})), \n",
+ " ('10_1594_PANGAEA_897516_1_5', Entry('misc',\n",
+ " fields=[\n",
+ " ('title', 'CTD data from a long-running sediment trap time series from the northern Gulf of Mexico'),\n",
+ " ('year', '2019'),\n",
+ " ('doi', '10.1594/PANGAEA.897516'),\n",
+ " ('url', 'https://doi.org/10.1594/PANGAEA.897516')],\n",
+ " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]})), \n",
+ " ('10_1594_PANGAEA_897514_1_6', Entry('misc',\n",
+ " fields=[\n",
+ " ('title', 'Magnesium/Calcium ratio of Globigerinoides ruber from a long-running sediment trap time series from the northern Gulf of Mexico'),\n",
+ " ('year', '2019'),\n",
+ " ('doi', '10.1594/PANGAEA.897514'),\n",
+ " ('url', 'https://doi.org/10.1594/PANGAEA.897514')],\n",
+ " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]})), \n",
+ " ('10_1594_PANGAEA_830587_1_7', Entry('misc',\n",
+ " fields=[\n",
+ " ('title', '(Table 2) Age determination of sediment core MD98-2177'),\n",
+ " ('year', '2011'),\n",
+ " ('doi', '10.1594/PANGAEA.830587'),\n",
+ " ('url', 'https://doi.org/10.1594/PANGAEA.830587')],\n",
+ " persons={'author': [Person('Khider'), Person('D'), Person('Stott'), Person('D, Lowell'), Person('Emile-Geay'), Person('J'), Person('Thunell'), Person('C, Robert'), Person('Hammond'), Person('E, Douglas')]})), \n",
+ " ('10_1594_PANGAEA_830586_1_8', Entry('misc',\n",
+ " fields=[\n",
+ " ('title', '(Table S1) Stable carbon and oxygen isotope ratios of Pulleniatina obliquiloculata of sediment core MD98-2177'),\n",
+ " ('year', '2011'),\n",
+ " ('doi', '10.1594/PANGAEA.830586'),\n",
+ " ('url', 'https://doi.org/10.1594/PANGAEA.830586')],\n",
+ " persons={'author': [Person('Khider'), Person('D'), Person('Stott'), Person('D, Lowell'), Person('Emile-Geay'), Person('J'), Person('Thunell'), Person('C, Robert'), Person('Hammond'), Person('E, Douglas')]})), \n",
+ " ('10_1594_PANGAEA_897509_1_9', Entry('misc',\n",
+ " fields=[\n",
+ " ('title', 'Carbonate measurements from a long-running sediment trap time series from the northern Gulf of Mexico'),\n",
+ " ('year', '2019'),\n",
+ " ('doi', '10.1594/PANGAEA.897509'),\n",
+ " ('url', 'https://doi.org/10.1594/PANGAEA.897509')],\n",
+ " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]})), \n",
+ " ('10_1594_PANGAEA_897513_1_10', Entry('misc',\n",
+ " fields=[\n",
+ " ('title', 'Isotpes analysis of Globigerinoides ruber from a long-running sediment trap time series from the northern Gulf of Mexico'),\n",
+ " ('year', '2019'),\n",
+ " ('doi', '10.1594/PANGAEA.897513'),\n",
+ " ('url', 'https://doi.org/10.1594/PANGAEA.897513')],\n",
+ " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]}))]),\n",
+ " \n",
+ " preamble=[]),\n",
+ " StudyID StudyName \\\n",
+ " 0 830589 Stable isotope record of sediment core MD98-2177 \n",
+ " 1 830589 Stable isotope record of sediment core MD98-2177 \n",
+ " 2 897517 Globigerinoides ruber sediment trap data in th... \n",
+ " 3 897517 Globigerinoides ruber sediment trap data in th... \n",
+ " 4 830588 (Table 3) Lead 214 and Lead 210 concentration ... \n",
+ " 5 897512 Globigerinoides ruber flux analysis from a lon... \n",
+ " 6 897516 CTD data from a long-running sediment trap tim... \n",
+ " 7 897514 Magnesium/Calcium ratio of Globigerinoides rub... \n",
+ " 8 830587 (Table 2) Age determination of sediment core M... \n",
+ " 9 830586 (Table S1) Stable carbon and oxygen isotope ra... \n",
+ " 10 897509 Carbonate measurements from a long-running sed... \n",
+ " 11 897513 Isotpes analysis of Globigerinoides ruber from... \n",
+ " \n",
+ " Author \\\n",
+ " 0 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n",
+ " 1 None \n",
+ " 2 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n",
+ " 3 None \n",
+ " 4 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n",
+ " 5 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n",
+ " 6 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n",
+ " 7 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n",
+ " 8 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n",
+ " 9 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n",
+ " 10 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n",
+ " 11 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n",
+ " \n",
+ " Title Journal Year Volume \\\n",
+ " 0 Stable isotope record of sediment core MD98-2177 PANGAEA 2011 None \n",
+ " 1 Khider, D et al. (2011): Assessing El Niño Sou... None 2011 None \n",
+ " 2 Globigerinoides ruber sediment trap data in th... PANGAEA 2019 None \n",
+ " 3 Richey, JN et al. (2019): Considerations for G... None 2019 None \n",
+ " 4 (Table 3) Lead 214 and Lead 210 concentration ... PANGAEA 2011 None \n",
+ " 5 Globigerinoides ruber flux analysis from a lon... PANGAEA 2019 None \n",
+ " 6 CTD data from a long-running sediment trap tim... PANGAEA 2019 None \n",
+ " 7 Magnesium/Calcium ratio of Globigerinoides rub... PANGAEA 2019 None \n",
+ " 8 (Table 2) Age determination of sediment core M... PANGAEA 2011 None \n",
+ " 9 (Table S1) Stable carbon and oxygen isotope ra... PANGAEA 2011 None \n",
+ " 10 Carbonate measurements from a long-running sed... PANGAEA 2019 None \n",
+ " 11 Isotpes analysis of Globigerinoides ruber from... PANGAEA 2019 None \n",
+ " \n",
+ " Number Pages Type DOI \\\n",
+ " 0 None None dataset 10.1594/PANGAEA.830589 \n",
+ " 1 None None article 10.1029/2011PA002139 \n",
+ " 2 None None dataset 10.1594/PANGAEA.897517 \n",
+ " 3 None None article 10.1029/2018PA003417 \n",
+ " 4 None None dataset 10.1594/PANGAEA.830588 \n",
+ " 5 None None dataset 10.1594/PANGAEA.897512 \n",
+ " 6 None None dataset 10.1594/PANGAEA.897516 \n",
+ " 7 None None dataset 10.1594/PANGAEA.897514 \n",
+ " 8 None None dataset 10.1594/PANGAEA.830587 \n",
+ " 9 None None dataset 10.1594/PANGAEA.830586 \n",
+ " 10 None None dataset 10.1594/PANGAEA.897509 \n",
+ " 11 None None dataset 10.1594/PANGAEA.897513 \n",
+ " \n",
+ " URL \\\n",
+ " 0 https://doi.org/10.1594/PANGAEA.830589 \n",
+ " 1 None \n",
+ " 2 https://doi.org/10.1594/PANGAEA.897517 \n",
+ " 3 None \n",
+ " 4 https://doi.org/10.1594/PANGAEA.830588 \n",
+ " 5 https://doi.org/10.1594/PANGAEA.897512 \n",
+ " 6 https://doi.org/10.1594/PANGAEA.897516 \n",
+ " 7 https://doi.org/10.1594/PANGAEA.897514 \n",
+ " 8 https://doi.org/10.1594/PANGAEA.830587 \n",
+ " 9 https://doi.org/10.1594/PANGAEA.830586 \n",
+ " 10 https://doi.org/10.1594/PANGAEA.897509 \n",
+ " 11 https://doi.org/10.1594/PANGAEA.897513 \n",
+ " \n",
+ " CitationKey \n",
+ " 0 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n",
+ " 1 Khider, D et al. (2011): Assessing El Niño Sou... \n",
+ " 2 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n",
+ " 3 Richey, JN et al. (2019): Considerations for G... \n",
+ " 4 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n",
+ " 5 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n",
+ " 6 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n",
+ " 7 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n",
+ " 8 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n",
+ " 9 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n",
+ " 10 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n",
+ " 11 Richey, Julie N; Thirumalai, Kaustubh; Khider,... )"
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "pubs = dataset.get_publications()\n",
+ "display(pubs)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "78e7e978",
+ "metadata": {},
+ "source": [
+ "Save the publications directly to bibtex. Follwoing saves the publications to a file named `pangaea-datasets.bib` in your workng directory. Alter the path to save to your desired location "
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b053f5b6",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "save = dataset.get_publications(save = True, path=\"./pnagaea-datasets.bib\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "2bf2ead1",
+ "metadata": {},
+ "source": [
+ "### 3. Extract Data from studies"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "665a4353",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "[2026-03-24 12:53:26,636][WARNING] - The search contains dataset(s) [830589, 897517] marked as collection. Refer to the 'CollectionMembers' column toidentify respective child datasets.\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " StudyID | \n",
+ " StudyName | \n",
+ " EarliestYearBP | \n",
+ " MostRecentYearBP | \n",
+ " EarliestYearCE | \n",
+ " MostRecentYearCE | \n",
+ " StudyNotes | \n",
+ " ScienceKeywords | \n",
+ " Investigators | \n",
+ " Publications | \n",
+ " Sites | \n",
+ " Funding | \n",
+ " CollectionMembers | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 830589 | \n",
+ " Stable isotope record of sediment core MD98-2177 | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " We present a reconstruction of El Niño Souther... | \n",
+ " [] | \n",
+ " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... | \n",
+ " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... | \n",
+ " [MD98-2177] | \n",
+ " [] | \n",
+ " [830586, 830587, 830588] | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 897517 | \n",
+ " Globigerinoides ruber sediment trap data in th... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " We present results here from a high-resolution... | \n",
+ " [] | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [] | \n",
+ " [] | \n",
+ " [897509, 897512, 897513, 897514, 897515, 897516] | \n",
+ "
\n",
+ " \n",
+ " | 2 | \n",
+ " 830588 | \n",
+ " (Table 3) Lead 214 and Lead 210 concentration ... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " [] | \n",
+ " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... | \n",
+ " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... | \n",
+ " [MD98-2177] | \n",
+ " [] | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 3 | \n",
+ " 897512 | \n",
+ " Globigerinoides ruber flux analysis from a lon... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " [] | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [GMT_Gulf_of_Mexico] | \n",
+ " [] | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 4 | \n",
+ " 897516 | \n",
+ " CTD data from a long-running sediment trap tim... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " [] | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [GMT_Gulf_of_Mexico] | \n",
+ " [] | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 5 | \n",
+ " 897514 | \n",
+ " Magnesium/Calcium ratio of Globigerinoides rub... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " [] | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [GMT_Gulf_of_Mexico] | \n",
+ " [] | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 6 | \n",
+ " 830587 | \n",
+ " (Table 2) Age determination of sediment core M... | \n",
+ " 98.0 | \n",
+ " 1950.0 | \n",
+ " 0.0 | \n",
+ " 1852.0 | \n",
+ " None | \n",
+ " [] | \n",
+ " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... | \n",
+ " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... | \n",
+ " [MD98-2177] | \n",
+ " [] | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 7 | \n",
+ " 830586 | \n",
+ " (Table S1) Stable carbon and oxygen isotope ra... | \n",
+ " 1231.0 | \n",
+ " 103.0 | \n",
+ " 704.0 | \n",
+ " 1851.0 | \n",
+ " None | \n",
+ " [] | \n",
+ " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... | \n",
+ " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... | \n",
+ " [MD98-2177] | \n",
+ " [] | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 8 | \n",
+ " 897509 | \n",
+ " Carbonate measurements from a long-running sed... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " [] | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [GMT_Gulf_of_Mexico] | \n",
+ " [] | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ " | 9 | \n",
+ " 897513 | \n",
+ " Isotpes analysis of Globigerinoides ruber from... | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " NaN | \n",
+ " None | \n",
+ " [] | \n",
+ " Richey, Julie N, Thirumalai, Kaustubh, Khider,... | \n",
+ " Richey, Julie N; Thirumalai, Kaustubh; Khider,... | \n",
+ " [GMT_Gulf_of_Mexico] | \n",
+ " [] | \n",
+ " None | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " StudyID StudyName EarliestYearBP \\\n",
+ "0 830589 Stable isotope record of sediment core MD98-2177 NaN \n",
+ "1 897517 Globigerinoides ruber sediment trap data in th... NaN \n",
+ "2 830588 (Table 3) Lead 214 and Lead 210 concentration ... NaN \n",
+ "3 897512 Globigerinoides ruber flux analysis from a lon... NaN \n",
+ "4 897516 CTD data from a long-running sediment trap tim... NaN \n",
+ "5 897514 Magnesium/Calcium ratio of Globigerinoides rub... NaN \n",
+ "6 830587 (Table 2) Age determination of sediment core M... 98.0 \n",
+ "7 830586 (Table S1) Stable carbon and oxygen isotope ra... 1231.0 \n",
+ "8 897509 Carbonate measurements from a long-running sed... NaN \n",
+ "9 897513 Isotpes analysis of Globigerinoides ruber from... NaN \n",
+ "\n",
+ " MostRecentYearBP EarliestYearCE MostRecentYearCE \\\n",
+ "0 NaN NaN NaN \n",
+ "1 NaN NaN NaN \n",
+ "2 NaN NaN NaN \n",
+ "3 NaN NaN NaN \n",
+ "4 NaN NaN NaN \n",
+ "5 NaN NaN NaN \n",
+ "6 1950.0 0.0 1852.0 \n",
+ "7 103.0 704.0 1851.0 \n",
+ "8 NaN NaN NaN \n",
+ "9 NaN NaN NaN \n",
+ "\n",
+ " StudyNotes ScienceKeywords \\\n",
+ "0 We present a reconstruction of El Niño Souther... [] \n",
+ "1 We present results here from a high-resolution... [] \n",
+ "2 None [] \n",
+ "3 None [] \n",
+ "4 None [] \n",
+ "5 None [] \n",
+ "6 None [] \n",
+ "7 None [] \n",
+ "8 None [] \n",
+ "9 None [] \n",
+ "\n",
+ " Investigators \\\n",
+ "0 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n",
+ "1 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
+ "2 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n",
+ "3 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
+ "4 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
+ "5 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
+ "6 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n",
+ "7 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n",
+ "8 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
+ "9 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n",
+ "\n",
+ " Publications Sites \\\n",
+ "0 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n",
+ "1 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [] \n",
+ "2 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n",
+ "3 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n",
+ "4 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n",
+ "5 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n",
+ "6 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n",
+ "7 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n",
+ "8 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n",
+ "9 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n",
+ "\n",
+ " Funding CollectionMembers \n",
+ "0 [] [830586, 830587, 830588] \n",
+ "1 [] [897509, 897512, 897513, 897514, 897515, 897516] \n",
+ "2 [] None \n",
+ "3 [] None \n",
+ "4 [] None \n",
+ "5 [] None \n",
+ "6 [] None \n",
+ "7 [] None \n",
+ "8 [] None \n",
+ "9 [] None "
]
},
"execution_count": null,
@@ -540,8 +1272,75 @@
}
],
"source": [
- "geo = dataset.get_geo()\n",
- "geo.head()"
+ "dataset.get_summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4c00a283",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "dfs = dataset.get_data([830586, 830587])"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "baabaa90",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ " Depth sed Depth top Depth bot Age Age_2 Age_3 \\\n",
+ "0 0.005 0.0 0.01 1843 1851 0.103 \n",
+ "1 0.005 0.0 0.01 1843 1851 0.103 \n",
+ "2 0.005 0.0 0.01 1843 1851 0.103 \n",
+ "3 0.005 0.0 0.01 1843 1851 0.103 \n",
+ "4 0.005 0.0 0.01 1843 1851 0.103 \n",
+ "\n",
+ " P. obliquiloculata δ13C P. obliquiloculata δ18O Mass Event \\\n",
+ "0 0.936 -2.254 34 MD98-2177 \n",
+ "1 0.895 -2.356 37 MD98-2177 \n",
+ "2 0.514 -2.630 20 MD98-2177 \n",
+ "3 0.900 -2.480 25 MD98-2177 \n",
+ "4 0.957 -2.094 29 MD98-2177 \n",
+ "\n",
+ " Latitude Longitude Elevation Date/Time \n",
+ "0 1.4 119.08 -968.0 NaT \n",
+ "1 1.4 119.08 -968.0 NaT \n",
+ "2 1.4 119.08 -968.0 NaT \n",
+ "3 1.4 119.08 -968.0 NaT \n",
+ "4 1.4 119.08 -968.0 NaT \n",
+ " Depth sed Lab Lab label Age dated \\\n",
+ "0 0.00 Lawrence Livermore National Laboratory 95299 0.580 \n",
+ "1 0.12 University of California, Irvine OS-38302 0.395 \n",
+ "2 0.50 Lawrence Livermore National Laboratory 100234 1.110 \n",
+ "3 0.94 Lawrence Livermore National Laboratory 100235 1.745 \n",
+ "4 1.09 University of California, Irvine OS-38335 1.870 \n",
+ "\n",
+ " Age dated std dev Age Age e Comm Event Latitude Longitude \\\n",
+ "0 0.045 1852 51 NaN MD98-2177 1.4 119.08 \n",
+ "1 0.090 0 0 modern MD98-2177 1.4 119.08 \n",
+ "2 0.060 1350 70 NaN MD98-2177 1.4 119.08 \n",
+ "3 0.045 730 82 NaN MD98-2177 1.4 119.08 \n",
+ "4 0.110 584 136 NaN MD98-2177 1.4 119.08 \n",
+ "\n",
+ " Elevation Date/Time \n",
+ "0 -968.0 NaT \n",
+ "1 -968.0 NaT \n",
+ "2 -968.0 NaT \n",
+ "3 -968.0 NaT \n",
+ "4 -968.0 NaT \n"
+ ]
+ }
+ ],
+ "source": [
+ "for df in dfs:\n",
+ " print(df.head())"
]
}
],
diff --git a/pyleotups/core/NOAADataset.py b/pyleotups/core/NOAADataset.py
index 223877e0..ff3f20ca 100644
--- a/pyleotups/core/NOAADataset.py
+++ b/pyleotups/core/NOAADataset.py
@@ -51,7 +51,7 @@ def __init__(self):
self.data_table_index = {} # dataTableID -> dict with study, site, paleo_data
self.file_url_to_datatable = {} # file_url -> dataTableID
# self.last_timing = {}
- self.logger = logging.getLogger("pyleotups.Dataset")
+ self.logger = logging.getLogger("pyleotups.NOAADataset")
def _reindex(self):
@@ -92,9 +92,9 @@ def __add__(self, other):
except Exception:
check_same_study_content = False
if not check_same_study_content:
- warnings.warn(
+ log.warning(
f"NOAADataset union: duplicate StudyID {sid} with differing content. "
- "Keeping left-hand version. i.e. if C = A + B is perfomed, contents of A will be kept.", UserWarning
+ "Keeping left-hand version. i.e. if C = A + B is perfomed, contents of A will be kept."
)
# else identical content -> do nothing
else:
@@ -116,9 +116,9 @@ def __iadd__(self, other):
except Exception:
check_same_study_content = False
if not check_same_study_content:
- warnings.warn(
+ log.warning(
f"Dataset in-place union: duplicate StudyID {sid} with differing content. "
- "Keeping existing version. i.e. IF A = A + B is perfomed, contents of A will be kept", UserWarning
+ "Keeping existing version. i.e. IF A = A + B is perfomed, contents of A will be kept"
)
else:
self.studies[sid] = study
@@ -453,14 +453,15 @@ def search_studies(self, **kwargs):
if status == 204:
inv = payload.get("investigators")
if inv:
- warnings.warn(
+ log.warning(
"No studies found for investigator(s): "
f"{inv}. NOAA expects 'LastName, Initials'. Try variations like:\n"
" - 'LastName, Initials'\n - 'LastName'\n - 'Initials'"
)
# Nothing to parse; return display summary (empty) or None
- return self.get_summary()
- # if kwargs.get("display") else None
+ log.info(f"Retrieved {len(self.studies)} studies.")
+ return self.get_summary()
+ # if ("display" in kwargs and kwargs.get("display")) else log.info(f"Retrieved {len(self.studies)} studies.")
# Non-204: ensure success and parse JSON
try:
@@ -470,9 +471,10 @@ def search_studies(self, **kwargs):
# Parse into internal structures (you already have this)
self._parse_response(response_json, kwargs.get("limit"))
+ log.info(f"Retrieved {len(self.studies)} studies.")
return self.get_summary()
- # if kwargs.get("display") else log.info(f"Parsed {len(self.studies)} studies.")
+ # if ("display" in kwargs and kwargs.get("display")) else log.info(f"Retrieved {len(self.studies)} studies.")
def _parse_response(self, data, limit):
@@ -502,7 +504,7 @@ def _parse_response(self, data, limit):
self.file_url_to_datatable[file_url] = paleo.datatable_id
if isinstance(limit, int) and len(data.get('study', [])) >= limit:
- warnings.warn(
+ log.warning(
f"Retrieved {limit} studies, which is the specified limit. "
"Consider increasing the limit parameter to fetch more studies."
)
@@ -600,7 +602,7 @@ def get_publications(self, save=False, path=None, verbose=False):
if not path:
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M")
path = f"bibtex_{timestamp}.bib"
- warnings.warn(f"No path specified. Saving BibTeX to: {path}")
+ log.warning(f"No path specified. Saving BibTeX to: {path}")
try:
writer = Writer()
@@ -825,64 +827,6 @@ def get_variables(self, dataTableIDs):
return pd.DataFrame(columns=["StudyID", "SiteID", "FileURL", "VariableName"]) # fallback for no data
return df.set_index("DataTableID")
-
- @DeprecationWarning
- def get_data_deprecated(self, dataTableIDs=None, file_urls=None):
- """
- Fetch external data for given dataTableIDs or file URLs and attach study/site metadata.
-
- Parameters
- ----------
- dataTableIDs : list or str, optional
- One or more NOAA data table IDs.
- file_urls : list or str, optional
- One or more file URLs.
-
- Returns
- -------
- list of pandas.DataFrame
- A list of DataFrames, each corresponding to fetched data.
- """
-
- if dataTableIDs:
- dataTableIDs = assert_list(dataTableIDs)
- dfs = []
- for dt_id in dataTableIDs:
- mapping = self.data_table_index.get(dt_id)
- if not mapping:
- print(f"Data Table ID {dt_id} not found or no associated file URL.")
- continue
- file_url = mapping['paleo_data'].file_url
- if not file_url:
- print(f"No file URL for Data Table ID {dt_id}.")
- continue
- fetched_data = DataFetcher.fetch_data(file_url)
- if isinstance(fetched_data, list):
- for df in fetched_data:
- df.attrs['NOAAStudyId'] = mapping['study_id']
- df.attrs['SiteID'] = mapping['site_id']
- study_obj = self.studies.get(mapping['study_id'], {})
- df.attrs['StudyName'] = study_obj.metadata.get("studyName") if hasattr(study_obj, 'metadata') else None
- publications = study_obj.publications if hasattr(study_obj, 'publications') else None
- print(len(publications))
- for pub in publications:
- if hasattr(pub, "doi"):
- doi = pub.doi if pub.doi else None
- df.attrs['PublicationDOI'].append(doi)
- dfs.append(df)
- else:
- fetched_data.attrs['NOAAStudyId'] = mapping['study_id']
- fetched_data.attrs['SiteID'] = mapping['site_id']
- study_obj = self.studies.get(mapping['study_id'], {})
- fetched_data.attrs['StudyName'] = study_obj.metadata.get("studyName") if hasattr(study_obj, 'metadata') else None
- dfs.append(fetched_data)
- return dfs
- if file_urls:
- file_urls = assert_list(file_urls)
- dfs = [DataFetcher.fetch_data(url) for url in file_urls]
- return dfs
- print("No dataTableID or file URL provided.")
- return pd.DataFrame()
def _process_file(self, file_url, mapping=None):
@@ -1050,10 +994,6 @@ def get_data(self, dataTableIDs=None, file_urls=None):
dataTableIDs = assert_list(dataTableIDs)
for dt_id in dataTableIDs:
- # print(self.data_table_index, type(self.data_table_index.values()))
- # for id, value in self.data_table_index.items():
- # print(type(id))
- # print(value, type(value))
mapping = self.data_table_index.get(dt_id)
if not mapping:
raise ValueError(f"No parent study mapping found for Data Table ID '{dt_id}'. "
@@ -1070,18 +1010,13 @@ def get_data(self, dataTableIDs=None, file_urls=None):
for url in file_urls:
mapping = self.file_url_to_datatable.get(url)
if not mapping:
- warnings.warn(
- f"Attached '{url}' is not linked to any parent study; can not add metadata.",
- UserWarning
- )
+ log.warning(f"Attached '{url}' is not linked to any parent study; can not add metadata.")
dfs.extend(self._process_file(url))
else:
mapping_details = self.data_table_index.get(mapping)
if not mapping_details:
- warnings.warn(
- f"Mapping details for file URL '{url}' (Data Table ID '{mapping}') not found; can not add metadata.",
- UserWarning
- )
+ log.warning(
+ f"Mapping details for file URL '{url}' (Data Table ID '{mapping}') not found; can not add metadata.")
dfs.extend(self._process_file(url))
else:
dfs.extend(self._process_file(url, mapping_details))
diff --git a/pyleotups/core/PangaeaDataset.py b/pyleotups/core/PangaeaDataset.py
index 350f749c..f7b14a8a 100644
--- a/pyleotups/core/PangaeaDataset.py
+++ b/pyleotups/core/PangaeaDataset.py
@@ -17,6 +17,8 @@
from ..utils.PangaeaStudy import PangaeaStudy
+logging.getLogger("pangaeapy").setLevel(logging.ERROR)
+
logger = logging.getLogger(__name__)
# try to import pangaeapy; raise helpful error if missing
@@ -158,7 +160,7 @@ def search_studies(self,
display: if True, return get_summary() after populating registry
Returns:
- None by default, or pandas.DataFrame (same shape as Dataset.get_summary()) if display=True.
+ pandas.DataFrame (same shape as Dataset.get_summary()).
"""
# Direct ID loading mode
if study_ids is not None:
@@ -168,10 +170,11 @@ def search_studies(self,
self._resolve_and_register_ids(study_ids)
- if display:
- return self.get_summary()
+ logger.info(f"Retrived {len(self.studies)} studies")
- return
+ return self.get_summary()
+ # if display else logger.info(f"Retrived {len(self.studies)} studies")
+
# Query-based search
# build query string
@@ -183,7 +186,7 @@ def search_studies(self,
try:
pq = PanQuery(query=query_str, bbox=bbox, limit=limit, offset=offset)
except Exception as exc:
- logger.exception("PanQuery failed")
+ logger.exception(f"PanQuery failed due to {exc}")
raise
# register results in self.studies but do not accumulate into a dataframe here
@@ -199,10 +202,10 @@ def search_studies(self,
auth_token=self.auth_token,
)
-
- # Only return if user explicitly asked for display
- if display:
- return self.get_summary()
+ logger.info(f"Retrived {len(self.studies)} studies")
+
+ return self.get_summary()
+ # if display else logger.info(f"Retrived {len(self.studies)} studies")
# -------------------------
diff --git a/pyleotups/tests/test_NOAADataset.py b/pyleotups/tests/test_NOAADataset.py
index 36d2b9c0..fc350bf8 100644
--- a/pyleotups/tests/test_NOAADataset.py
+++ b/pyleotups/tests/test_NOAADataset.py
@@ -358,8 +358,9 @@ def test_get_data_t03_from_file_url_with_mapping(self, mock_parser, mock_get):
# --- Test t04: file_url not in mapping, should still parse ---
@patch("pyleotups.core.NOAADataset.requests.get")
@patch("pyleotups.core.NOAADataset.StandardParser")
- def test_get_data_t04_unmapped_file_url_warns_and_parses(self, mock_parser, mock_get):
+ def test_get_data_t04_unmapped_file_url_warns_and_parses(self, mock_parser, mock_get, caplog):
unmapped_url = "https://example.com/fake.txt"
+
mock_get.return_value.status_code = 200
mock_get.return_value.raise_for_status = lambda: None
mock_get.return_value.text = "# mock\n# mock\n# mock\n# mock\n# mock"
@@ -367,9 +368,18 @@ def test_get_data_t04_unmapped_file_url_warns_and_parses(self, mock_parser, mock
dummy_df = pd.DataFrame({"depth": [10, 20]})
mock_parser.return_value.parse.return_value = dummy_df
- with pytest.warns(UserWarning, match="not linked to any parent study"):
+ # Capture logs at WARNING level
+ with caplog.at_level("WARNING"):
result = self.ds.get_data(file_urls=[unmapped_url])
- assert isinstance(result[0], pd.DataFrame)
+
+ # Assert log message was emitted
+ assert any(
+ "not linked to any parent study" in record.message
+ for record in caplog.records
+ )
+
+ # Existing assertion
+ assert isinstance(result[0], pd.DataFrame)
# --- Test t05: file with unsupported extension ---
def test_get_data_t05_unsupported_file_type_raises(self):
@@ -486,7 +496,7 @@ def test_add_t02_same_id_identical_keeps_left_no_warning(self):
assert len(C.data_table_index) == len(A.data_table_index)
assert len(C.file_url_to_datatable) == len(A.file_url_to_datatable)
- def test_add_t03_same_id_different_warns_and_keeps_left(self):
+ def test_add_t03_same_id_different_warns_and_keeps_left(self, caplog):
"""C = A + B where same NOAAStudyId but different content → warning; C looks like A."""
A = _build_NOAAdataset_for_noaa_id(18315)
@@ -497,7 +507,8 @@ def _mutate(study_dict):
B = _build_NOAAdataset_for_noaa_id(18315, mutate=_mutate)
# Expect a UserWarning mentioning duplicate/different study; keep regex loose and case-insensitive
- with pytest.warns(UserWarning, match=r"(?i)duplicate.*study.*18315"):
+ # with pytest.warns(UserWarning, match=r"(?i)duplicate.*study.*18315"):
+ with caplog.at_level("WARNING"):
C = A + B
assert _ids(C) == {18315}
@@ -508,6 +519,11 @@ def _mutate(study_dict):
assert len(C.data_table_index) == len(A.data_table_index)
assert len(C.file_url_to_datatable) == len(A.file_url_to_datatable)
+ assert any(
+ "duplicate" in record.message.lower() and "study" in record.message.lower() and "18315" in record.message
+ for record in caplog.records
+ )
+
# ---------------------------------------------------------------------------
# Tests: A = A + B (rebinding variable name to result of binary add)
@@ -546,7 +562,7 @@ def test_add_rebind_t02_same_id_identical_no_warning(self):
assert len(A.data_table_index) == len(canonical_A.data_table_index)
assert len(A.file_url_to_datatable) == len(canonical_A.file_url_to_datatable)
- def test_add_rebind_t03_same_id_different_warns_and_keeps_left(self):
+ def test_add_rebind_t03_same_id_different_warns_and_keeps_left(self, caplog):
"""A = A + B where same NOAAStudyId but different content → warning; A still looks like original A."""
A = _build_NOAAdataset_for_noaa_id(18315)
@@ -555,7 +571,8 @@ def _mutate(study_dict):
B = _build_NOAAdataset_for_noaa_id(18315, mutate=_mutate)
- with pytest.warns(UserWarning, match=r"(?i)duplicate.*study.*18315"):
+ # with pytest.warns(UserWarning, match=r"(?i)duplicate.*study.*18315"):
+ with caplog.at_level("WARNING"):
A = A + B
assert _ids(A) == {18315}
@@ -566,3 +583,8 @@ def _mutate(study_dict):
assert _ids(A) == _ids(canonical_A)
assert len(A.data_table_index) == len(canonical_A.data_table_index)
assert len(A.file_url_to_datatable) == len(canonical_A.file_url_to_datatable)
+
+ assert any(
+ "duplicate" in record.message.lower() and "study" in record.message.lower() and "18315" in record.message
+ for record in caplog.records
+ )
diff --git a/pyleotups/utils/NOAAStudy.py b/pyleotups/utils/NOAAStudy.py
index 48b0ebd8..3285af20 100644
--- a/pyleotups/utils/NOAAStudy.py
+++ b/pyleotups/utils/NOAAStudy.py
@@ -68,6 +68,7 @@ def __init__(self, study_data):
"Malformed site entry encountered. Original error: "
f"{str(e)}"
)
+ self.coverage = self._compute_coverage()
def _load_metadata(self, study_data):
"""
@@ -130,6 +131,27 @@ def _load_funding(self, study_data):
for f in funding_info if isinstance(f, dict)
]
return []
+
+ def _compute_coverage(self):
+ south_vals = []
+ north_vals = []
+ west_vals = []
+ east_vals = []
+
+ for site in self.sites:
+ if not np.isnan(site.south_lat):
+ south_vals.append(site.south_lat)
+ if not np.isnan(site.north_lat):
+ north_vals.append(site.north_lat)
+ if not np.isnan(site.west_lon):
+ west_vals.append(site.west_lon)
+ if not np.isnan(site.east_lon):
+ east_vals.append(site.east_lon)
+
+ if not south_vals or not north_vals or not west_vals or not east_vals:
+ return (np.nan, np.nan, np.nan, np.nan)
+
+ return (min(south_vals), max(north_vals), min(west_vals), max(east_vals))
def to_dict(self):
@@ -151,6 +173,7 @@ def to_dict(self):
"MostRecentYearBP": self.metadata.get("mostRecentYearBP"),
"EarliestYearCE": self.metadata.get("earliestYearCE"),
"MostRecentYearCE": self.metadata.get("mostRecentYearCE"),
+ "Coverage [S, N, W, E]": self.coverage,
"StudyNotes": self.metadata.get("studyNotes"),
"ScienceKeywords": self.metadata.get("scienceKeywords"),
"Investigators": self.investigators,
diff --git a/pyleotups/utils/Parser/ExcelParser.py b/pyleotups/utils/Parser/ExcelParser.py
index aad6d5f4..561c8479 100644
--- a/pyleotups/utils/Parser/ExcelParser.py
+++ b/pyleotups/utils/Parser/ExcelParser.py
@@ -6,6 +6,7 @@
from dataclasses import dataclass, field
from typing import Any, List, Optional, Tuple, Iterable, Dict
from enum import Enum
+from .NonStandardParserUtils import auto_cast_df
NUMERIC_THRESHOLD_HEADER = 0.25
@@ -416,7 +417,8 @@ def _process_block(self, block: Block, idx: int):
block.df = None
elif block.block_type == BlockType.COMPLETE_TABULAR and merged_headers:
- block.df = self._generate_df(block, grid, merged_headers, hdr_info)
+ df = self._generate_df(block, grid, merged_headers, hdr_info)
+ block.df = auto_cast_df(df)
else:
block.df = None
@@ -766,13 +768,7 @@ def _ensure_unique(names: List[str]) -> List[str]:
return out
if __name__ == "__main__":
- # Example usage
- # parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/NonStandardParser/Correspondence/notebook/frank1999.xls")
- # parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/ExcelParser/Data/orig-ocean99-xls/Clemens/Clemens1996/clemens1996.xls")
- # parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/ExcelParser/Data/orig-ocean99-xls/Ishiwatari/ishiwatari1999.xls")
- parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/ExcelParser/Data/orig-ocean99-xls/Overpeck1996/overpeck1996.xls")
- # parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/ExcelParser/Data/orig-ocean99-xls/Bond/bond1992.xls")
- # parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/ExcelParser/Data/orig-ocean99-xls/Charles/charles1996.xls")
+ parser = ExcelParser("https://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/frank1999/frank1999.xls")
blocks = parser.parse()
diff --git a/pyleotups/utils/Parser/NonStandardParserUtils.py b/pyleotups/utils/Parser/NonStandardParserUtils.py
index 31813c9f..3b594f04 100644
--- a/pyleotups/utils/Parser/NonStandardParserUtils.py
+++ b/pyleotups/utils/Parser/NonStandardParserUtils.py
@@ -415,7 +415,9 @@ def generate_df(lines_info, delimiter, headers, header_extent=0):
raise ValueError(f"Column count ({len(rows[0])}) "
f"does not match header count ({len(col_names)})")
- return pd.DataFrame(rows, columns=col_names)
+ df = pd.DataFrame(rows, columns=col_names)
+ df = auto_cast_df(df)
+ return df
def assign_tokens_by_overlap(lines_info, delimiter, headers, header_extent=0):
@@ -629,4 +631,20 @@ def refine_headers_by_correspondence(header_lines, data_lines, delimiter, broadc
last_token_identity = current_token_identity
- return refined_headers
\ No newline at end of file
+ return refined_headers
+
+def auto_cast_df(df: pd.DataFrame) -> pd.DataFrame:
+ """
+ Attempt to convert object columns to numeric where possible.
+ Leaves non-convertible columns unchanged.
+ """
+ for col in df.columns:
+ if df[col].dtype == "object":
+ # Try numeric conversion
+ converted = pd.to_numeric(df[col], errors="coerce")
+
+ # Only replace if conversion actually changed type
+ if converted.dtype != "object":
+ df[col] = converted
+
+ return df
\ No newline at end of file
diff --git a/pyleotups/utils/Parser/StandardParser.py b/pyleotups/utils/Parser/StandardParser.py
index dbf2ce52..0bc9ed88 100644
--- a/pyleotups/utils/Parser/StandardParser.py
+++ b/pyleotups/utils/Parser/StandardParser.py
@@ -4,6 +4,8 @@
import pandas as pd
import re
+from .NonStandardParserUtils import auto_cast_df
+
@DeprecationWarning
class DataFetcher:
"""
@@ -398,6 +400,7 @@ def _construct_dataframe(self):
df = pd.DataFrame(padded, columns=self.variables)
df.attrs['variables'] = self.variables
+ df = auto_cast_df(df)
return df
def _skip_empty_lines(self, index):
@@ -472,469 +475,8 @@ def _extract_first_non_digit_token(self, line):
return token
return None
-# def fetch_file(url):
-# """
-# Download a file from the given URL and split its content into lines.
-
-# Parameters
-# ----------
-# url : str
-# The URL of the file to fetch.
-
-# Returns
-# -------
-# list of str
-# The file content split into individual lines.
-
-# Raises
-# ------
-# requests.HTTPError
-# If the HTTP request returned an unsuccessful status code.
-# """
-# response = requests.get(url)
-# response.raise_for_status()
-# return response.text.splitlines()
-
-
-# def identify_metadata(lines):
-# """
-# Identify the metadata block in the file by finding lines that start with '#'.
-
-# Parameters
-# ----------
-# lines : list of str
-# All lines from the file.
-
-# Returns
-# -------
-# tuple of (int, int) or (None, None)
-# A tuple containing the first and last indices of metadata lines.
-# Returns (None, None) if no metadata lines are found.
-# """
-# metadata_indices = [i for i, line in enumerate(lines) if line.lstrip().startswith('#')]
-# if metadata_indices:
-# return metadata_indices[0], metadata_indices[-1]
-# return None, None
-
-
-# def extract_first_non_digit_token(line):
-# """
-# Remove any leading comment markers from a line and return the first token that is not purely numeric.
-
-# Parameters
-# ----------
-# line : str
-# A line of text (typically from metadata).
-
-# Returns
-# -------
-# str or None
-# The first non-digit token, or None if no valid token is found.
-# """
-# pattern = r'^\s*(.*?)(?:\t|\s{2,})(?:[^,\n]*,){0,9}[^,\n]*$'
-# match = re.match(pattern, line)
-# if match:
-# return match.group(1).strip()
-# tokens = re.split(r'[\s,]+', line.strip())
-# for token in tokens:
-# if token and not token.isdigit():
-# return token
-# return None
-
-
-# def parse_metadata_variables(lines, meta_start, meta_end):
-# """
-# Extract variable names from a metadata block when an explicit "Variables" block exists.
-
-# This function attempts to extract variables by looking for a metadata line that starts with
-# "# variables" (case-insensitive). If found, it first searches for lines starting with '##'
-# following the marker. If no such lines exist, it falls back to splitting other non-comment lines.
-
-# Parameters
-# ----------
-# lines : list of str
-# All lines from the file.
-# meta_start : int
-# Index of the first metadata line.
-# meta_end : int
-# Index of the last metadata line.
-
-# Returns
-# -------
-# tuple of (list of str, int)
-# A tuple where the first element is a list of extracted variable names and the second element is
-# the header skip count (usually 1 if variables are successfully extracted).
-# """
-# variables = []
-# header_skip_count = 0
-# variable_block_index = None
-
-# for i in range(meta_start, meta_end + 1):
-# if re.match(r'^#\s*variables', lines[i], re.IGNORECASE):
-# variable_block_index = i
-# break
-
-# if variable_block_index is not None:
-# # CASE 1A: Look for lines starting with '##'
-# for i in range(variable_block_index + 1, meta_end + 1):
-# if lines[i].lstrip().startswith('##'):
-# token = extract_first_non_digit_token(lines[i].lstrip('#'))
-# if token:
-# variables.append(token)
-# # CASE 1B: Fallback if no '##' lines found.
-# if not variables:
-# for i in range(variable_block_index + 1, meta_end + 1):
-# if lines[i].strip() and not lines[i].startswith("#"):
-# if len(re.split(r',', lines[i].strip())) >= 9:
-# token = extract_first_non_digit_token(lines[i])
-# if token:
-# variables.append(token)
-# if variables:
-# header_skip_count = 1
-# return variables, header_skip_count
-
-
-# def parse_data_header_variables(lines, meta_end):
-# """
-# Extract variable names from the data header when no explicit metadata "Variables" block exists.
-
-# It searches from the line immediately after the metadata block until a non-comment line is found
-# that, when split by either tab or comma, yields at least 9 tokens.
-
-# Parameters
-# ----------
-# lines : list of str
-# All lines from the file.
-# meta_end : int
-# The index of the last metadata line.
-
-# Returns
-# -------
-# tuple of (list of str, int)
-# A tuple containing the extracted variable names and a header skip count (typically 1).
-# """
-# variables = []
-# header_skip_count = 1
-# for i in range(meta_end + 1, len(lines)):
-# line = lines[i].strip()
-# if line and not line.lstrip().startswith('#'):
-# tokens_tab = re.split(r'\t', line)
-# tokens_comma = re.split(r',', line)
-# if len(tokens_tab) >= 9 or len(tokens_comma) >= 9:
-# variables = tokens_tab if len(tokens_tab) >= len(tokens_comma) else tokens_comma
-# break
-# return variables, header_skip_count
-
-
-# def fallback_variable_extraction(lines, meta_end):
-# """
-# Fallback extraction: use the first non-empty line in the data block, split by tabs.
-
-# Parameters
-# ----------
-# lines : list of str
-# All lines from the file.
-# meta_end : int
-# The index of the last metadata line.
-
-# Returns
-# -------
-# tuple of (list of str, int)
-# A tuple containing variable names (or autogenerated names for empty tokens) and a header skip count.
-# """
-# variables = []
-# header_skip_count = 1
-# for i in range(meta_end + 1, len(lines)):
-# if lines[i].strip():
-# tokens = re.split(r'\t', lines[i].strip())
-# if len(tokens) > 1:
-# variables = [f"Unnamed_{idx}" if not token else token for idx, token in enumerate(tokens)]
-# break
-# return variables, header_skip_count
-
-
-# def variable_parser(lines, meta_start, meta_end):
-# """
-# Extract variable names (column headers) from a NOAA text file using multiple methods.
-
-# The function first attempts to extract variables from a metadata block containing an explicit
-# "Variables" marker. If that fails, it attempts extraction from the first data header line. If that
-# fails too, it uses a fallback method on the first non-empty data line.
-
-# Parameters
-# ----------
-# lines : list of str
-# All lines from the file.
-# meta_start : int
-# The index of the first metadata line.
-# meta_end : int
-# The index of the last metadata line.
-
-# Returns
-# -------
-# tuple of (list of str, str, int)
-# A tuple (variables, source, header_skip_count) where:
-# - variables is the list of extracted variable names,
-# - source is "metadata" if variables were extracted from the metadata block,
-# or "data" if extracted from the data header,
-# - header_skip_count indicates how many header lines should be skipped.
-# """
-# variables, header_skip_count = parse_metadata_variables(lines, meta_start, meta_end)
-# if variables:
-# return variables, "metadata", header_skip_count
-
-# variables, header_skip_count = parse_data_header_variables(lines, meta_end)
-# if variables:
-# return variables, "data", header_skip_count
-
-# variables, header_skip_count = fallback_variable_extraction(lines, meta_end)
-# if variables:
-# return variables, "data", header_skip_count
-
-# return [], None, 0
-
-
-# def skip_empty_lines(lines, index):
-# """
-# Advance the index until a non-empty line is encountered.
-
-# Parameters
-# ----------
-# lines : list of str
-# The file lines.
-# index : int
-# The starting index.
-
-# Returns
-# -------
-# int
-# The index of the first non-empty line.
-# """
-# while index < len(lines) and not lines[index].strip():
-# index += 1
-# return index
-
-
-# def detect_delimiter(data_lines):
-# r"""
-# Detect the delimiter used in a set of data lines.
-
-# It first tries tab-delimitation; if token counts are inconsistent, it falls back to splitting
-# on two or more spaces.
-
-# Parameters
-# ----------
-# data_lines : list of str
-# A list of non-empty data lines.
-
-# Returns
-# -------
-# str
-# The detected delimiter, either the tab character ('\t') or a regex pattern (r'\s{2,}').
-# """
-# non_empty = [line.strip() for line in data_lines if line.strip()]
-# if not non_empty:
-# return '\t'
-# tab_counts = [len(line.split('\t')) for line in non_empty]
-# if len(set(tab_counts)) == 1 and tab_counts[0] > 1:
-# return '\t'
-# space_counts = [len(re.split(r'\s{2,}', line)) for line in non_empty]
-# if len(set(space_counts)) == 1 and space_counts[0] > 1:
-# return r'\s{2,}'
-# return '\t'
-
-
-# def data_parser(lines, meta_end, skip_lines=0):
-# """
-# Parse the data block of the file, skipping empty lines and header lines.
-
-# This function detects the delimiter used in the data block and ensures that all rows are padded
-# to have a uniform number of columns.
-
-# Parameters
-# ----------
-# lines : list of str
-# All lines from the file.
-# meta_end : int
-# The index of the last metadata line.
-# skip_lines : int, optional
-# Number of header lines to skip in the data block, by default 0.
-
-# Returns
-# -------
-# tuple of (list, int) or (None, None)
-# A tuple (data, row_len) where data is a list of rows (each row is a list of tokens) and row_len
-# is the uniform number of columns. Returns (None, None) if parsing fails.
-# """
-# data = []
-# index = meta_end + 1
-# index = skip_empty_lines(lines, index)
-# index += skip_lines
-# remaining_lines = lines[index:]
-# delimiter = detect_delimiter(remaining_lines)
-# for line in remaining_lines:
-# if not line.strip():
-# continue
-# if delimiter == '\t':
-# row = line.split('\t')
-# else:
-# row = re.split(delimiter, line.strip())
-# data.append(row)
-# if not data or (data and len(data[0]) < 2):
-# return None, None
-# max_len = max(len(row) for row in data)
-# for i in range(len(data)):
-# if len(data[i]) < max_len:
-# data[i] = data[i] + [''] * (max_len - len(data[i]))
-# return data, max_len
-
-
-# def dataframe_constructor(data, variables):
-# """
-# Construct a pandas DataFrame from parsed data rows and variable names.
-
-# Handles three cases:
-# - Exact match: The number of variables equals the number of columns.
-# - Extra columns: More columns than variables (trims extra columns).
-# - Missing columns: Fewer columns than variables (pads rows with empty strings).
-
-# Parameters
-# ----------
-# data : list of list of str
-# Parsed data rows.
-# variables : list of str
-# Column headers.
-
-# Returns
-# -------
-# pandas.DataFrame or None
-# The constructed DataFrame with an attribute 'variables' set, or None if data or variables are missing.
-# """
-# if not data or not variables:
-# return None
-
-# row_len = len(data[0])
-# var_len = len(variables)
-
-# if var_len == row_len:
-# df = pd.DataFrame(data, columns=variables)
-# elif var_len < row_len:
-# data_trimmed = [row[:var_len] for row in data]
-# df = pd.DataFrame(data_trimmed, columns=variables)
-# elif var_len > row_len:
-# data_padded = [row + [''] * (var_len - len(row)) for row in data]
-# df = pd.DataFrame(data_padded, columns=variables)
-
-# df.attrs['variables'] = variables
-# return df
-
-# # ---------------------------------------------------------------------------
-# # StandardParser Class
-# # ---------------------------------------------------------------------------
-# class StandardParser:
-# """
-# StandardParser encapsulates the complete workflow for downloading and parsing a NOAA text file.
-
-# The class maintains attributes such as the URL, file lines, metadata boundaries, extracted variable names,
-# header skip count, parsed data, and the final DataFrame.
-
-# Attributes
-# ----------
-# url : str
-# The URL of the file to parse.
-# lines : list of str
-# The content of the file split into lines.
-# meta_start : int
-# The index of the first metadata line.
-# meta_end : int
-# The index of the last metadata line.
-# variables : list of str
-# The extracted variable names.
-# skip_lines : int
-# The number of header lines to skip in the data block.
-# data : list of list of str
-# The parsed data rows.
-# df : pandas.DataFrame
-# The constructed DataFrame.
-
-# Methods
-# -------
-# parse(url=None)
-# Execute the full parsing workflow and return the constructed DataFrame.
-# _fetch_file()
-# Fetch the file and set the 'lines' attribute.
-# _identify_metadata()
-# Identify metadata boundaries and set 'meta_start' and 'meta_end'.
-# _extract_variables()
-# Extract variable names and header skip count, setting 'variables' and 'skip_lines'.
-# _parse_data()
-# Parse the data block from the file and set the 'data' attribute.
-# _construct_dataframe()
-# Construct the final DataFrame from parsed data and variables.
-# """
-# def __init__(self, url=None):
-# self.url = url
-# self.lines = None
-# self.meta_start = None
-# self.meta_end = None
-# self.variables = None
-# self.skip_lines = 0
-# self.data = None
-# self.df = None
-
-# def parse(self, url=None):
-# """
-# Orchestrate the full parsing process.
-
-# Parameters
-# ----------
-# url : str, optional
-# The URL to parse. If provided, it overrides the existing URL attribute.
-
-# Returns
-# -------
-# pandas.DataFrame
-# The constructed DataFrame.
-
-# Raises
-# ------
-# ParsingError
-# If any step of the parsing process fails.
-# """
-# if url is not None:
-# self.url = url
-# if not self.url:
-# raise ParsingError("No URL provided.")
-# try:
-# self._fetch_file()
-# except Exception as e:
-# raise ParsingError(f"Error fetching file: {e}")
-# self.meta_start, self.meta_end = self._identify_metadata()
-# if self.meta_start is None:
-# raise ParsingError("Invalid file format."
-# "Wrapper can only parse stndard NOAA template formatted files")
-# self.variables, _, self.skip_lines = self._extract_variables()
-# if not self.variables:
-# raise ParsingError("Failed to extract variable names from file.")
-# self.data, _ = self._parse_data()
-# if self.data is None:
-# raise ParsingError("No valid data block found.")
-# self.df = self._construct_dataframe()
-# if self.df is None:
-# raise ParsingError("DataFrame construction failed.")
-# return self.df
-
-# def _fetch_file(self):
-# self.lines = fetch_file(self.url)
-
-# def _identify_metadata(self):
-# return identify_metadata(self.lines)
-
-# def _extract_variables(self):
-# return variable_parser(self.lines, self.meta_start, self.meta_end)
-
-# def _parse_data(self):
-# return data_parser(self.lines, self.meta_end, self.skip_lines)
-
-# def _construct_dataframe(self):
-# return dataframe_constructor(self.data, self.variables)
\ No newline at end of file
+if __name__ == "__main__":
+ parser = StandardParser("https://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/khider2014/khider2014-benth.txt")
+ dfs = parser.parse()
+ print(dfs)
+ print(dfs["depth_cm"].dtype)
\ No newline at end of file
diff --git a/pyleotups/utils/Site.py b/pyleotups/utils/Site.py
index 7e2fef2c..516d572c 100644
--- a/pyleotups/utils/Site.py
+++ b/pyleotups/utils/Site.py
@@ -32,6 +32,13 @@ def __init__(self, site_data, study_id):
self.lon = np.nan
self.min_elevation = np.nan
self.max_elevation = np.nan
+
+ properties = geo.get('properties', {})
+
+ self.south_lat = self._safe_float(properties.get('southernmostLatitude'))
+ self.north_lat = self._safe_float(properties.get('northernmostLatitude'))
+ self.west_lon = self._safe_float(properties.get('westernmostLongitude'))
+ self.east_lon = self._safe_float(properties.get('easternmostLongitude'))
# ✅ Validate paleoData entries
paleo_data_list = site_data.get('paleoData', [])
@@ -41,6 +48,12 @@ def __init__(self, site_data, study_id):
if isinstance(paleo, dict)
]
+ def _safe_float(self, val):
+ try:
+ return float(val)
+ except (TypeError, ValueError):
+ return np.nan
+
def to_dict(self):
"""
diff --git a/pyleotups/utils/api/query_builder.py b/pyleotups/utils/api/query_builder.py
index b06e0396..c9c50dff 100644
--- a/pyleotups/utils/api/query_builder.py
+++ b/pyleotups/utils/api/query_builder.py
@@ -31,6 +31,11 @@ def build_payload(**kwargs) -> Tuple[dict, List[str]]:
notes: List[str] = []
payload: dict = {}
+ # Defaults
+ if kwargs.get("data_type_id") is not None:
+ payload["dataTypeID"] = kwargs.get("data_type_id")
+ payload["dataPublisher"] = DATA_PUBLISHER
+
# Identifier short-circuit
xml_id = kwargs.get("xml_id")
noaa_id = kwargs.get("noaa_id")
@@ -39,13 +44,12 @@ def build_payload(**kwargs) -> Tuple[dict, List[str]]:
payload["xmlId"] = validate_digits(xml_id)
if noaa_id is not None:
payload["NOAAStudyId"] = validate_digits(noaa_id)
- payload["dataPublisher"] = DATA_PUBLISHER
+
# Ignore all other filters by design
- notes.append("Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.")
- return payload, notes
+ # notes.append("Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.")
+ # return payload, notes
- # Defaults
- payload["dataPublisher"] = DATA_PUBLISHER
+
payload["limit"] = kwargs.get("limit", DEFAULT_LIMIT)
if payload["limit"] != DEFAULT_LIMIT:
notes.append(f"Limit set to {payload['limit']}.")
@@ -82,6 +86,8 @@ def build_payload(**kwargs) -> Tuple[dict, List[str]]:
payload["minLon"] = validate_int_range("min_lon", v, -180, 180)
if (v := kwargs.get("max_lon")) is not None:
payload["maxLon"] = validate_int_range("max_lon", v, -180, 180)
+
+ notes.append("Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.")
# Elevation (any ints allowed)
if (v := kwargs.get("min_elevation")) is not None: