From 4cf622bb968c2408b4b1ade250bc956244024e67 Mon Sep 17 00:00:00 2001 From: Dhiren Date: Thu, 19 Mar 2026 10:12:23 -0700 Subject: [PATCH 1/9] 1. Adding data_type in kwargs. 2. Adding warning for lat, lon searches. Making search_studies response consistent --- pyleotups/core/NOAADataset.py | 72 ++-------------------------- pyleotups/utils/api/query_builder.py | 16 +++++-- 2 files changed, 15 insertions(+), 73 deletions(-) diff --git a/pyleotups/core/NOAADataset.py b/pyleotups/core/NOAADataset.py index 223877e0..c83c78be 100644 --- a/pyleotups/core/NOAADataset.py +++ b/pyleotups/core/NOAADataset.py @@ -51,7 +51,7 @@ def __init__(self): self.data_table_index = {} # dataTableID -> dict with study, site, paleo_data self.file_url_to_datatable = {} # file_url -> dataTableID # self.last_timing = {} - self.logger = logging.getLogger("pyleotups.Dataset") + self.logger = logging.getLogger("pyleotups.NOAADataset") def _reindex(self): @@ -459,8 +459,7 @@ def search_studies(self, **kwargs): " - 'LastName, Initials'\n - 'LastName'\n - 'Initials'" ) # Nothing to parse; return display summary (empty) or None - return self.get_summary() - # if kwargs.get("display") else None + return self.get_summary() if ("display" in kwargs and kwargs.get("display")) else log.info(f"Retrieved {len(self.studies)} studies.") # Non-204: ensure success and parse JSON try: @@ -471,8 +470,7 @@ def search_studies(self, **kwargs): # Parse into internal structures (you already have this) self._parse_response(response_json, kwargs.get("limit")) - return self.get_summary() - # if kwargs.get("display") else log.info(f"Parsed {len(self.studies)} studies.") + return self.get_summary() if ("display" in kwargs and kwargs.get("display")) else log.info(f"Retrieved {len(self.studies)} studies.") def _parse_response(self, data, limit): @@ -825,64 +823,6 @@ def get_variables(self, dataTableIDs): return pd.DataFrame(columns=["StudyID", "SiteID", "FileURL", "VariableName"]) # fallback for no data return df.set_index("DataTableID") - - @DeprecationWarning - def get_data_deprecated(self, dataTableIDs=None, file_urls=None): - """ - Fetch external data for given dataTableIDs or file URLs and attach study/site metadata. - - Parameters - ---------- - dataTableIDs : list or str, optional - One or more NOAA data table IDs. - file_urls : list or str, optional - One or more file URLs. - - Returns - ------- - list of pandas.DataFrame - A list of DataFrames, each corresponding to fetched data. - """ - - if dataTableIDs: - dataTableIDs = assert_list(dataTableIDs) - dfs = [] - for dt_id in dataTableIDs: - mapping = self.data_table_index.get(dt_id) - if not mapping: - print(f"Data Table ID {dt_id} not found or no associated file URL.") - continue - file_url = mapping['paleo_data'].file_url - if not file_url: - print(f"No file URL for Data Table ID {dt_id}.") - continue - fetched_data = DataFetcher.fetch_data(file_url) - if isinstance(fetched_data, list): - for df in fetched_data: - df.attrs['NOAAStudyId'] = mapping['study_id'] - df.attrs['SiteID'] = mapping['site_id'] - study_obj = self.studies.get(mapping['study_id'], {}) - df.attrs['StudyName'] = study_obj.metadata.get("studyName") if hasattr(study_obj, 'metadata') else None - publications = study_obj.publications if hasattr(study_obj, 'publications') else None - print(len(publications)) - for pub in publications: - if hasattr(pub, "doi"): - doi = pub.doi if pub.doi else None - df.attrs['PublicationDOI'].append(doi) - dfs.append(df) - else: - fetched_data.attrs['NOAAStudyId'] = mapping['study_id'] - fetched_data.attrs['SiteID'] = mapping['site_id'] - study_obj = self.studies.get(mapping['study_id'], {}) - fetched_data.attrs['StudyName'] = study_obj.metadata.get("studyName") if hasattr(study_obj, 'metadata') else None - dfs.append(fetched_data) - return dfs - if file_urls: - file_urls = assert_list(file_urls) - dfs = [DataFetcher.fetch_data(url) for url in file_urls] - return dfs - print("No dataTableID or file URL provided.") - return pd.DataFrame() def _process_file(self, file_url, mapping=None): @@ -1050,10 +990,6 @@ def get_data(self, dataTableIDs=None, file_urls=None): dataTableIDs = assert_list(dataTableIDs) for dt_id in dataTableIDs: - # print(self.data_table_index, type(self.data_table_index.values())) - # for id, value in self.data_table_index.items(): - # print(type(id)) - # print(value, type(value)) mapping = self.data_table_index.get(dt_id) if not mapping: raise ValueError(f"No parent study mapping found for Data Table ID '{dt_id}'. " @@ -1070,7 +1006,7 @@ def get_data(self, dataTableIDs=None, file_urls=None): for url in file_urls: mapping = self.file_url_to_datatable.get(url) if not mapping: - warnings.warn( + log.warning( f"Attached '{url}' is not linked to any parent study; can not add metadata.", UserWarning ) diff --git a/pyleotups/utils/api/query_builder.py b/pyleotups/utils/api/query_builder.py index b06e0396..c9c50dff 100644 --- a/pyleotups/utils/api/query_builder.py +++ b/pyleotups/utils/api/query_builder.py @@ -31,6 +31,11 @@ def build_payload(**kwargs) -> Tuple[dict, List[str]]: notes: List[str] = [] payload: dict = {} + # Defaults + if kwargs.get("data_type_id") is not None: + payload["dataTypeID"] = kwargs.get("data_type_id") + payload["dataPublisher"] = DATA_PUBLISHER + # Identifier short-circuit xml_id = kwargs.get("xml_id") noaa_id = kwargs.get("noaa_id") @@ -39,13 +44,12 @@ def build_payload(**kwargs) -> Tuple[dict, List[str]]: payload["xmlId"] = validate_digits(xml_id) if noaa_id is not None: payload["NOAAStudyId"] = validate_digits(noaa_id) - payload["dataPublisher"] = DATA_PUBLISHER + # Ignore all other filters by design - notes.append("Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.") - return payload, notes + # notes.append("Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.") + # return payload, notes - # Defaults - payload["dataPublisher"] = DATA_PUBLISHER + payload["limit"] = kwargs.get("limit", DEFAULT_LIMIT) if payload["limit"] != DEFAULT_LIMIT: notes.append(f"Limit set to {payload['limit']}.") @@ -82,6 +86,8 @@ def build_payload(**kwargs) -> Tuple[dict, List[str]]: payload["minLon"] = validate_int_range("min_lon", v, -180, 180) if (v := kwargs.get("max_lon")) is not None: payload["maxLon"] = validate_int_range("max_lon", v, -180, 180) + + notes.append("Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.") # Elevation (any ints allowed) if (v := kwargs.get("min_elevation")) is not None: From 159afc24e9e10db2260c82eb91c1f5e7a4d83278 Mon Sep 17 00:00:00 2001 From: Dhiren Date: Thu, 19 Mar 2026 10:59:22 -0700 Subject: [PATCH 2/9] Adding aggregated coverage details for each study, updating warnings to logs --- pyleotups/core/NOAADataset.py | 12 ++++++------ pyleotups/utils/NOAAStudy.py | 24 ++++++++++++++++++++++++ pyleotups/utils/Site.py | 13 +++++++++++++ 3 files changed, 43 insertions(+), 6 deletions(-) diff --git a/pyleotups/core/NOAADataset.py b/pyleotups/core/NOAADataset.py index c83c78be..2eee81dc 100644 --- a/pyleotups/core/NOAADataset.py +++ b/pyleotups/core/NOAADataset.py @@ -92,7 +92,7 @@ def __add__(self, other): except Exception: check_same_study_content = False if not check_same_study_content: - warnings.warn( + log.warning( f"NOAADataset union: duplicate StudyID {sid} with differing content. " "Keeping left-hand version. i.e. if C = A + B is perfomed, contents of A will be kept.", UserWarning ) @@ -116,7 +116,7 @@ def __iadd__(self, other): except Exception: check_same_study_content = False if not check_same_study_content: - warnings.warn( + log.warning( f"Dataset in-place union: duplicate StudyID {sid} with differing content. " "Keeping existing version. i.e. IF A = A + B is perfomed, contents of A will be kept", UserWarning ) @@ -453,7 +453,7 @@ def search_studies(self, **kwargs): if status == 204: inv = payload.get("investigators") if inv: - warnings.warn( + log.warning( "No studies found for investigator(s): " f"{inv}. NOAA expects 'LastName, Initials'. Try variations like:\n" " - 'LastName, Initials'\n - 'LastName'\n - 'Initials'" @@ -500,7 +500,7 @@ def _parse_response(self, data, limit): self.file_url_to_datatable[file_url] = paleo.datatable_id if isinstance(limit, int) and len(data.get('study', [])) >= limit: - warnings.warn( + log.warning( f"Retrieved {limit} studies, which is the specified limit. " "Consider increasing the limit parameter to fetch more studies." ) @@ -598,7 +598,7 @@ def get_publications(self, save=False, path=None, verbose=False): if not path: timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M") path = f"bibtex_{timestamp}.bib" - warnings.warn(f"No path specified. Saving BibTeX to: {path}") + log.warning(f"No path specified. Saving BibTeX to: {path}") try: writer = Writer() @@ -1014,7 +1014,7 @@ def get_data(self, dataTableIDs=None, file_urls=None): else: mapping_details = self.data_table_index.get(mapping) if not mapping_details: - warnings.warn( + log.warning( f"Mapping details for file URL '{url}' (Data Table ID '{mapping}') not found; can not add metadata.", UserWarning ) diff --git a/pyleotups/utils/NOAAStudy.py b/pyleotups/utils/NOAAStudy.py index 48b0ebd8..f4cb954f 100644 --- a/pyleotups/utils/NOAAStudy.py +++ b/pyleotups/utils/NOAAStudy.py @@ -68,6 +68,7 @@ def __init__(self, study_data): "Malformed site entry encountered. Original error: " f"{str(e)}" ) + self.coverage = self._compute_coverage() def _load_metadata(self, study_data): """ @@ -130,6 +131,28 @@ def _load_funding(self, study_data): for f in funding_info if isinstance(f, dict) ] return [] + + def _compute_coverage(self): + south_vals = [] + north_vals = [] + west_vals = [] + east_vals = [] + + for site in self.sites: + if not np.isnan(site.south_lat): + south_vals.append(site.south_lat) + if not np.isnan(site.north_lat): + north_vals.append(site.north_lat) + if not np.isnan(site.west_lon): + west_vals.append(site.west_lon) + if not np.isnan(site.east_lon): + east_vals.append(site.east_lon) + + if not south_vals or not north_vals or not west_vals or not east_vals: + return (np.nan, np.nan, np.nan, np.nan) + print(self.study_id, south_vals, north_vals, west_vals, east_vals) + + return (min(south_vals), max(north_vals), min(west_vals), max(east_vals)) def to_dict(self): @@ -151,6 +174,7 @@ def to_dict(self): "MostRecentYearBP": self.metadata.get("mostRecentYearBP"), "EarliestYearCE": self.metadata.get("earliestYearCE"), "MostRecentYearCE": self.metadata.get("mostRecentYearCE"), + "Coverage [S, N, W, E]": self.coverage, "StudyNotes": self.metadata.get("studyNotes"), "ScienceKeywords": self.metadata.get("scienceKeywords"), "Investigators": self.investigators, diff --git a/pyleotups/utils/Site.py b/pyleotups/utils/Site.py index 7e2fef2c..516d572c 100644 --- a/pyleotups/utils/Site.py +++ b/pyleotups/utils/Site.py @@ -32,6 +32,13 @@ def __init__(self, site_data, study_id): self.lon = np.nan self.min_elevation = np.nan self.max_elevation = np.nan + + properties = geo.get('properties', {}) + + self.south_lat = self._safe_float(properties.get('southernmostLatitude')) + self.north_lat = self._safe_float(properties.get('northernmostLatitude')) + self.west_lon = self._safe_float(properties.get('westernmostLongitude')) + self.east_lon = self._safe_float(properties.get('easternmostLongitude')) # ✅ Validate paleoData entries paleo_data_list = site_data.get('paleoData', []) @@ -41,6 +48,12 @@ def __init__(self, site_data, study_id): if isinstance(paleo, dict) ] + def _safe_float(self, val): + try: + return float(val) + except (TypeError, ValueError): + return np.nan + def to_dict(self): """ From 340123f86a53c39a7cc72290fe1d5218182838dd Mon Sep 17 00:00:00 2001 From: Dhiren Date: Thu, 19 Mar 2026 11:04:09 -0700 Subject: [PATCH 3/9] Making return type of PangaeaDataset.search_studies() consistent with NOAADataset --- pyleotups/core/PangaeaDataset.py | 9 +++------ pyleotups/utils/NOAAStudy.py | 1 - 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/pyleotups/core/PangaeaDataset.py b/pyleotups/core/PangaeaDataset.py index 350f749c..fe11b390 100644 --- a/pyleotups/core/PangaeaDataset.py +++ b/pyleotups/core/PangaeaDataset.py @@ -168,10 +168,8 @@ def search_studies(self, self._resolve_and_register_ids(study_ids) - if display: - return self.get_summary() - - return + return self.get_summary() if display else logger.info(f"Retrived {len(self.studies)} studies") + # Query-based search # build query string @@ -201,8 +199,7 @@ def search_studies(self, # Only return if user explicitly asked for display - if display: - return self.get_summary() + return self.get_summary() if display else logger.info(f"Retrived {len(self.studies)} studies") # ------------------------- diff --git a/pyleotups/utils/NOAAStudy.py b/pyleotups/utils/NOAAStudy.py index f4cb954f..3285af20 100644 --- a/pyleotups/utils/NOAAStudy.py +++ b/pyleotups/utils/NOAAStudy.py @@ -150,7 +150,6 @@ def _compute_coverage(self): if not south_vals or not north_vals or not west_vals or not east_vals: return (np.nan, np.nan, np.nan, np.nan) - print(self.study_id, south_vals, north_vals, west_vals, east_vals) return (min(south_vals), max(north_vals), min(west_vals), max(east_vals)) From 262c53de112a6f84470145cd8caf1de906a42989 Mon Sep 17 00:00:00 2001 From: Dhiren Date: Tue, 24 Mar 2026 12:24:04 -0700 Subject: [PATCH 4/9] 1. Returning df when search_studies is triggered [Returning None or simple logging breaks previous tests and jupter-execute examples while building documentation. Also good in terms of user perspective to return results]. 2. Updating tests to catch log.warning instead of warnings.warning --- pyleotups/core/NOAADataset.py | 21 ++++++++--------- pyleotups/core/PangaeaDataset.py | 16 ++++++++----- pyleotups/tests/test_NOAADataset.py | 36 +++++++++++++++++++++++------ 3 files changed, 49 insertions(+), 24 deletions(-) diff --git a/pyleotups/core/NOAADataset.py b/pyleotups/core/NOAADataset.py index 2eee81dc..ff3f20ca 100644 --- a/pyleotups/core/NOAADataset.py +++ b/pyleotups/core/NOAADataset.py @@ -94,7 +94,7 @@ def __add__(self, other): if not check_same_study_content: log.warning( f"NOAADataset union: duplicate StudyID {sid} with differing content. " - "Keeping left-hand version. i.e. if C = A + B is perfomed, contents of A will be kept.", UserWarning + "Keeping left-hand version. i.e. if C = A + B is perfomed, contents of A will be kept." ) # else identical content -> do nothing else: @@ -118,7 +118,7 @@ def __iadd__(self, other): if not check_same_study_content: log.warning( f"Dataset in-place union: duplicate StudyID {sid} with differing content. " - "Keeping existing version. i.e. IF A = A + B is perfomed, contents of A will be kept", UserWarning + "Keeping existing version. i.e. IF A = A + B is perfomed, contents of A will be kept" ) else: self.studies[sid] = study @@ -459,7 +459,9 @@ def search_studies(self, **kwargs): " - 'LastName, Initials'\n - 'LastName'\n - 'Initials'" ) # Nothing to parse; return display summary (empty) or None - return self.get_summary() if ("display" in kwargs and kwargs.get("display")) else log.info(f"Retrieved {len(self.studies)} studies.") + log.info(f"Retrieved {len(self.studies)} studies.") + return self.get_summary() + # if ("display" in kwargs and kwargs.get("display")) else log.info(f"Retrieved {len(self.studies)} studies.") # Non-204: ensure success and parse JSON try: @@ -469,8 +471,10 @@ def search_studies(self, **kwargs): # Parse into internal structures (you already have this) self._parse_response(response_json, kwargs.get("limit")) + log.info(f"Retrieved {len(self.studies)} studies.") - return self.get_summary() if ("display" in kwargs and kwargs.get("display")) else log.info(f"Retrieved {len(self.studies)} studies.") + return self.get_summary() + # if ("display" in kwargs and kwargs.get("display")) else log.info(f"Retrieved {len(self.studies)} studies.") def _parse_response(self, data, limit): @@ -1006,18 +1010,13 @@ def get_data(self, dataTableIDs=None, file_urls=None): for url in file_urls: mapping = self.file_url_to_datatable.get(url) if not mapping: - log.warning( - f"Attached '{url}' is not linked to any parent study; can not add metadata.", - UserWarning - ) + log.warning(f"Attached '{url}' is not linked to any parent study; can not add metadata.") dfs.extend(self._process_file(url)) else: mapping_details = self.data_table_index.get(mapping) if not mapping_details: log.warning( - f"Mapping details for file URL '{url}' (Data Table ID '{mapping}') not found; can not add metadata.", - UserWarning - ) + f"Mapping details for file URL '{url}' (Data Table ID '{mapping}') not found; can not add metadata.") dfs.extend(self._process_file(url)) else: dfs.extend(self._process_file(url, mapping_details)) diff --git a/pyleotups/core/PangaeaDataset.py b/pyleotups/core/PangaeaDataset.py index fe11b390..a4280c73 100644 --- a/pyleotups/core/PangaeaDataset.py +++ b/pyleotups/core/PangaeaDataset.py @@ -158,7 +158,7 @@ def search_studies(self, display: if True, return get_summary() after populating registry Returns: - None by default, or pandas.DataFrame (same shape as Dataset.get_summary()) if display=True. + pandas.DataFrame (same shape as Dataset.get_summary()). """ # Direct ID loading mode if study_ids is not None: @@ -168,7 +168,10 @@ def search_studies(self, self._resolve_and_register_ids(study_ids) - return self.get_summary() if display else logger.info(f"Retrived {len(self.studies)} studies") + logger.info(f"Retrived {len(self.studies)} studies") + + return self.get_summary() + # if display else logger.info(f"Retrived {len(self.studies)} studies") # Query-based search @@ -181,7 +184,7 @@ def search_studies(self, try: pq = PanQuery(query=query_str, bbox=bbox, limit=limit, offset=offset) except Exception as exc: - logger.exception("PanQuery failed") + logger.exception(f"PanQuery failed due to {exc}") raise # register results in self.studies but do not accumulate into a dataframe here @@ -197,9 +200,10 @@ def search_studies(self, auth_token=self.auth_token, ) - - # Only return if user explicitly asked for display - return self.get_summary() if display else logger.info(f"Retrived {len(self.studies)} studies") + logger.info(f"Retrived {len(self.studies)} studies") + + return self.get_summary() + # if display else logger.info(f"Retrived {len(self.studies)} studies") # ------------------------- diff --git a/pyleotups/tests/test_NOAADataset.py b/pyleotups/tests/test_NOAADataset.py index 36d2b9c0..fc350bf8 100644 --- a/pyleotups/tests/test_NOAADataset.py +++ b/pyleotups/tests/test_NOAADataset.py @@ -358,8 +358,9 @@ def test_get_data_t03_from_file_url_with_mapping(self, mock_parser, mock_get): # --- Test t04: file_url not in mapping, should still parse --- @patch("pyleotups.core.NOAADataset.requests.get") @patch("pyleotups.core.NOAADataset.StandardParser") - def test_get_data_t04_unmapped_file_url_warns_and_parses(self, mock_parser, mock_get): + def test_get_data_t04_unmapped_file_url_warns_and_parses(self, mock_parser, mock_get, caplog): unmapped_url = "https://example.com/fake.txt" + mock_get.return_value.status_code = 200 mock_get.return_value.raise_for_status = lambda: None mock_get.return_value.text = "# mock\n# mock\n# mock\n# mock\n# mock" @@ -367,9 +368,18 @@ def test_get_data_t04_unmapped_file_url_warns_and_parses(self, mock_parser, mock dummy_df = pd.DataFrame({"depth": [10, 20]}) mock_parser.return_value.parse.return_value = dummy_df - with pytest.warns(UserWarning, match="not linked to any parent study"): + # Capture logs at WARNING level + with caplog.at_level("WARNING"): result = self.ds.get_data(file_urls=[unmapped_url]) - assert isinstance(result[0], pd.DataFrame) + + # Assert log message was emitted + assert any( + "not linked to any parent study" in record.message + for record in caplog.records + ) + + # Existing assertion + assert isinstance(result[0], pd.DataFrame) # --- Test t05: file with unsupported extension --- def test_get_data_t05_unsupported_file_type_raises(self): @@ -486,7 +496,7 @@ def test_add_t02_same_id_identical_keeps_left_no_warning(self): assert len(C.data_table_index) == len(A.data_table_index) assert len(C.file_url_to_datatable) == len(A.file_url_to_datatable) - def test_add_t03_same_id_different_warns_and_keeps_left(self): + def test_add_t03_same_id_different_warns_and_keeps_left(self, caplog): """C = A + B where same NOAAStudyId but different content → warning; C looks like A.""" A = _build_NOAAdataset_for_noaa_id(18315) @@ -497,7 +507,8 @@ def _mutate(study_dict): B = _build_NOAAdataset_for_noaa_id(18315, mutate=_mutate) # Expect a UserWarning mentioning duplicate/different study; keep regex loose and case-insensitive - with pytest.warns(UserWarning, match=r"(?i)duplicate.*study.*18315"): + # with pytest.warns(UserWarning, match=r"(?i)duplicate.*study.*18315"): + with caplog.at_level("WARNING"): C = A + B assert _ids(C) == {18315} @@ -508,6 +519,11 @@ def _mutate(study_dict): assert len(C.data_table_index) == len(A.data_table_index) assert len(C.file_url_to_datatable) == len(A.file_url_to_datatable) + assert any( + "duplicate" in record.message.lower() and "study" in record.message.lower() and "18315" in record.message + for record in caplog.records + ) + # --------------------------------------------------------------------------- # Tests: A = A + B (rebinding variable name to result of binary add) @@ -546,7 +562,7 @@ def test_add_rebind_t02_same_id_identical_no_warning(self): assert len(A.data_table_index) == len(canonical_A.data_table_index) assert len(A.file_url_to_datatable) == len(canonical_A.file_url_to_datatable) - def test_add_rebind_t03_same_id_different_warns_and_keeps_left(self): + def test_add_rebind_t03_same_id_different_warns_and_keeps_left(self, caplog): """A = A + B where same NOAAStudyId but different content → warning; A still looks like original A.""" A = _build_NOAAdataset_for_noaa_id(18315) @@ -555,7 +571,8 @@ def _mutate(study_dict): B = _build_NOAAdataset_for_noaa_id(18315, mutate=_mutate) - with pytest.warns(UserWarning, match=r"(?i)duplicate.*study.*18315"): + # with pytest.warns(UserWarning, match=r"(?i)duplicate.*study.*18315"): + with caplog.at_level("WARNING"): A = A + B assert _ids(A) == {18315} @@ -566,3 +583,8 @@ def _mutate(study_dict): assert _ids(A) == _ids(canonical_A) assert len(A.data_table_index) == len(canonical_A.data_table_index) assert len(A.file_url_to_datatable) == len(canonical_A.file_url_to_datatable) + + assert any( + "duplicate" in record.message.lower() and "study" in record.message.lower() and "18315" in record.message + for record in caplog.records + ) From c895cae5b6c1f5797863b8e73fc49321cf06b0d4 Mon Sep 17 00:00:00 2001 From: Dhiren Date: Tue, 24 Mar 2026 12:38:58 -0700 Subject: [PATCH 5/9] Supressing pangaeapy warnings --- pyleotups/core/PangaeaDataset.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/pyleotups/core/PangaeaDataset.py b/pyleotups/core/PangaeaDataset.py index a4280c73..f7b14a8a 100644 --- a/pyleotups/core/PangaeaDataset.py +++ b/pyleotups/core/PangaeaDataset.py @@ -17,6 +17,8 @@ from ..utils.PangaeaStudy import PangaeaStudy +logging.getLogger("pangaeapy").setLevel(logging.ERROR) + logger = logging.getLogger(__name__) # try to import pangaeapy; raise helpful error if missing From 332f890ed5ce61d85ed9fd7301d200e85aa81a6e Mon Sep 17 00:00:00 2001 From: Dhiren Date: Tue, 24 Mar 2026 12:58:35 -0700 Subject: [PATCH 6/9] updating tutorials --- examples/tutorial-noaa.ipynb | 1016 +++++++++++++++++++----------- examples/tutorial-pangaea.ipynb | 1033 +++++++++++++++++++++++++++---- 2 files changed, 1583 insertions(+), 466 deletions(-) diff --git a/examples/tutorial-noaa.ipynb b/examples/tutorial-noaa.ipynb index e48d2dc7..013bc1f1 100644 --- a/examples/tutorial-noaa.ipynb +++ b/examples/tutorial-noaa.ipynb @@ -74,21 +74,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2025-12-11 10:28:33,466][INFO] - search_studies: Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.\n" + "[2026-03-24 12:41:47,812][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n", + "[2026-03-24 12:41:47,814][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?NOAAStudyId=18316&dataPublisher=NOAA\n" + "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&NOAAStudyId=18316&limit=100\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 1582.76it/s]\n" + "Parsing NOAA studies: 100%|██████████| 1/1 [00:00MostRecentYearBP\n", " EarliestYearCE\n", " MostRecentYearCE\n", + " Coverage [S, N, W, E]\n", " StudyNotes\n", " ScienceKeywords\n", " Investigators\n", @@ -139,6 +142,7 @@ " 11\n", " 733\n", " 1939\n", + " (-79.47, -79.47, -112.13, -112.13)\n", " CO2 concentration and Stable Isotopic Composit...\n", " [carbon cycle]\n", " Thomas Bauska, Fortunat Joos, Alan Mix, Raphae...\n", @@ -157,11 +161,14 @@ " DataType EarliestYearBP MostRecentYearBP EarliestYearCE \\\n", "0 ICE CORES 1217 11 733 \n", "\n", - " MostRecentYearCE StudyNotes \\\n", - "0 1939 CO2 concentration and Stable Isotopic Composit... \n", + " MostRecentYearCE Coverage [S, N, W, E] \\\n", + "0 1939 (-79.47, -79.47, -112.13, -112.13) \n", "\n", - " ScienceKeywords Investigators \\\n", - "0 [carbon cycle] Thomas Bauska, Fortunat Joos, Alan Mix, Raphae... \n", + " StudyNotes ScienceKeywords \\\n", + "0 CO2 concentration and Stable Isotopic Composit... [carbon cycle] \n", + "\n", + " Investigators \\\n", + "0 Thomas Bauska, Fortunat Joos, Alan Mix, Raphae... \n", "\n", " Publications \\\n", "0 [{'Author': 'Ahn, J., E. J. Brook, L. Mitchell... \n", @@ -224,7 +231,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2025-12-11 10:28:33,981][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n" + "[2026-03-24 12:41:48,799][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n", + "[2026-03-24 12:41:48,801][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n" ] }, { @@ -239,8 +247,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "Parsing NOAA studies: 100%|██████████| 24/24 [00:00<00:00, 6960.54it/s]\n", - "[2025-12-11 10:28:34,807][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n" + "Parsing NOAA studies: 100%|██████████| 24/24 [00:00<00:00, 2378.51it/s]\n", + "[2026-03-24 12:41:49,974][INFO] - Retrieved 24 studies.\n", + "[2026-03-24 12:41:49,991][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n", + "[2026-03-24 12:41:49,995][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n" ] }, { @@ -257,7 +267,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "Parsing NOAA studies: 100%|██████████| 2/2 [00:00<00:00, 1997.76it/s]" + "Parsing NOAA studies: 100%|██████████| 2/2 [00:00<00:00, 1000.19it/s]\n", + "[2026-03-24 12:41:50,765][INFO] - Retrieved 2 studies.\n" ] }, { @@ -266,13 +277,6 @@ "text": [ "Found 2 studies.\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] } ], "source": [ @@ -318,64 +322,22 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2025-12-11 10:28:35,380][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n" + "[2026-03-24 12:41:50,808][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n", + "[2026-03-24 12:41:50,809][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n", + "[2026-03-24 12:41:51,435][WARNING] - No studies found for investigator(s): E.R., Wahl. NOAA expects 'LastName, Initials'. Try variations like:\n", + " - 'LastName, Initials'\n", + " - 'LastName'\n", + " - 'Initials'\n", + "[2026-03-24 12:41:51,438][INFO] - Retrieved 0 studies.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&limit=100&investigators=E.R.%2C+Wahl\n" + "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&limit=100&investigators=E.R.%2C+Wahl\n", + "Found 0 studies.\n" ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:452: UserWarning: No studies found for investigator(s): E.R., Wahl. NOAA expects 'LastName, Initials'. Try variations like:\n", - " - 'LastName, Initials'\n", - " - 'LastName'\n", - " - 'Initials'\n", - " warnings.warn(\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
\n", - "
" - ], - "text/plain": [ - "Empty DataFrame\n", - "Columns: []\n", - "Index: []" - ] - }, - "execution_count": null, - "metadata": {}, - "output_type": "execute_result" } ], "source": [ @@ -412,7 +374,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2025-12-11 10:28:35,835][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n" + "[2026-03-24 12:41:51,498][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n", + "[2026-03-24 12:41:51,501][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n" ] }, { @@ -426,9 +389,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "Parsing NOAA studies: 100%|██████████| 100/100 [00:00<00:00, 4172.69it/s]\n", - "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:501: UserWarning: Retrieved 100 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n", - " warnings.warn(\n" + "Parsing NOAA studies: 100%|██████████| 100/100 [00:00<00:00, 2330.91it/s]\n", + "[2026-03-24 12:41:53,526][WARNING] - Retrieved 100 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n", + "[2026-03-24 12:41:53,527][INFO] - Retrieved 100 studies.\n" ] }, { @@ -460,6 +423,7 @@ " MostRecentYearBP\n", " EarliestYearCE\n", " MostRecentYearCE\n", + " Coverage [S, N, W, E]\n", " StudyNotes\n", " ScienceKeywords\n", " Investigators\n", @@ -479,9 +443,10 @@ " -45.0\n", " 1000.0\n", " 1995.0\n", + " (0.0, 90.0, -180.0, 180.0)\n", " Calibration ensemble reconstructions of existi...\n", " [carbon cycle, sensitivity, Air Temperature Re...\n", - " David Frank, Valerie Trouet, Jan Esper, Christ...\n", + " David Frank, Jan Esper, Christoph Raible, Ulf ...\n", " [{'Author': 'Frank, D.C., J. Esper, C.C. Raibl...\n", " [[{'DataTableID': '19235', 'DataTableName': 'F...\n", " [{'fundingAgency': 'Swiss National Science Fou...\n", @@ -496,9 +461,10 @@ " -55.0\n", " 1000.0\n", " 2005.0\n", + " (0.0, 90.0, -180.0, 180.0)\n", " None\n", " [Atmospheric and Oceanic Circulation Patterns ...\n", - " Kai Kornhuber, Ellie Broadman, Valerie Trouet\n", + " Ellie Broadman, Valerie Trouet, Kai Kornhuber\n", " [{'Author': 'Broadman, Ellie, Kai Kornhuber, I...\n", " [[{'DataTableID': '56946', 'DataTableName': 'W...\n", " [{'fundingAgency': 'US National Science Founda...\n", @@ -513,9 +479,10 @@ " -5.0\n", " 50.0\n", " 1955.0\n", + " (-90.0, 90.0, -180.0, 180.0)\n", " Reconstruction of a precipitation-based Southe...\n", " [Atmospheric and Oceanic Circulation Patterns ...\n", - " Liguang Sun, Yuhong Wang, Wen Huang, Shican Qi...\n", + " Hong Yan, Liguang Sun, Yuhong Wang, Wen Huang,...\n", " [{'Author': 'Yan, H., L. Sun, Y. Wang, W. Huan...\n", " [[{'DataTableID': '20526', 'DataTableName': 'S...\n", " [{'fundingAgency': 'National Natural Science F...\n", @@ -530,6 +497,7 @@ " -50.0\n", " -50.0\n", " 2000.0\n", + " (-30.0, 30.0, -180.0, 180.0)\n", " Composite reconstruction of low latitude rainf...\n", " [Precipitation Reconstruction]\n", " Franziska Lechleitner, Sebastian Breitenbach, ...\n", @@ -547,9 +515,10 @@ " -27.0\n", " -7439.0\n", " 1977.0\n", + " (-90.0, 90.0, -180.0, 180.0)\n", " Records of common production rate of cosmogeni...\n", " [Solar Forcing Reconstruction]\n", - " Irene Brunner, Marcus Christl, Hubertus Fische...\n", + " Friedhelm Steinhilber, Jose Abreu, Jürg Beer, ...\n", " [{'Author': 'Steinhilber, F., J.A. Abreu, J. B...\n", " [[{'DataTableID': '21230', 'DataTableName': 'T...\n", " [{'fundingAgency': 'Swiss National Science Fou...\n", @@ -570,6 +539,7 @@ " ...\n", " ...\n", " ...\n", + " ...\n", " \n", " \n", " 95\n", @@ -581,11 +551,12 @@ " 0.0\n", " -18050.0\n", " 1950.0\n", + " (-45.5, 78.49, -163.25, 176.73)\n", " Lake status determined at 1000-year intervals ...\n", " [hydrology, trends]\n", " Frances Alayne Street-Perrott, None Marchand, ...\n", " [{'Author': 'Street-Perrott, F.A., D.S. Marcha...\n", - " [[{'DataTableID': '9036', 'DataTableName': 'Qu...\n", + " [[{'DataTableID': '9005', 'DataTableName': 'Mo...\n", " []\n", " \n", " \n", @@ -598,9 +569,10 @@ " 0.0\n", " -50169.0\n", " 1950.0\n", + " (-80.0, 90.0, -180.0, 180.0)\n", " None\n", " None\n", - " William Gray, Sophia Hines, Andrea Burke, Kass...\n", + " Patrick Rafter, William Gray, Sophia Hines, An...\n", " [{'Author': 'Rafter, Patrick A., William R. Gr...\n", " [[{'DataTableID': '49382', 'DataTableName': 'G...\n", " [{'fundingAgency': 'US National Science Founda...\n", @@ -615,6 +587,7 @@ " -41.0\n", " 1957.0\n", " 1991.0\n", + " (-90.0, 90.0, -180.0, 180.0)\n", " \n", " None\n", " Kazimierz Rozanski, Luis Araguás-Araguás, Robe...\n", @@ -632,6 +605,7 @@ " 0.0\n", " -23050.0\n", " 1950.0\n", + " (-80.0, 90.0, -180.0, 180.0)\n", " Transient simulation of ocean carbonate chemis...\n", " [carbon cycle]\n", " Jun Shao, Lowell Stott, William Gray, Rosanna ...\n", @@ -649,6 +623,7 @@ " 18000.0\n", " -98050.0\n", " -16050.0\n", + " (-90.0, 90.0, -180.0, 180.0)\n", " Tables and Table Notes \\nTable S1. Change i...\n", " [biogeochemical cycles]\n", " Karen Kohfeld, Corinne Le Quéré, Sandy Harriso...\n", @@ -658,7 +633,7 @@ " \n", " \n", "\n", - "

100 rows × 14 columns

\n", + "

100 rows × 15 columns

\n", "" ], "text/plain": [ @@ -688,18 +663,31 @@ "98 PALEOCLIMATIC MODELING 25000.0 0.0 -23050.0 \n", "99 PALEOCEANOGRAPHY 100000.0 18000.0 -98050.0 \n", "\n", - " MostRecentYearCE StudyNotes \\\n", - "0 1995.0 Calibration ensemble reconstructions of existi... \n", - "1 2005.0 None \n", - "2 1955.0 Reconstruction of a precipitation-based Southe... \n", - "3 2000.0 Composite reconstruction of low latitude rainf... \n", - "4 1977.0 Records of common production rate of cosmogeni... \n", - ".. ... ... \n", - "95 1950.0 Lake status determined at 1000-year intervals ... \n", - "96 1950.0 None \n", - "97 1991.0 \n", - "98 1950.0 Transient simulation of ocean carbonate chemis... \n", - "99 -16050.0 Tables and Table Notes \\nTable S1. Change i... \n", + " MostRecentYearCE Coverage [S, N, W, E] \\\n", + "0 1995.0 (0.0, 90.0, -180.0, 180.0) \n", + "1 2005.0 (0.0, 90.0, -180.0, 180.0) \n", + "2 1955.0 (-90.0, 90.0, -180.0, 180.0) \n", + "3 2000.0 (-30.0, 30.0, -180.0, 180.0) \n", + "4 1977.0 (-90.0, 90.0, -180.0, 180.0) \n", + ".. ... ... \n", + "95 1950.0 (-45.5, 78.49, -163.25, 176.73) \n", + "96 1950.0 (-80.0, 90.0, -180.0, 180.0) \n", + "97 1991.0 (-90.0, 90.0, -180.0, 180.0) \n", + "98 1950.0 (-80.0, 90.0, -180.0, 180.0) \n", + "99 -16050.0 (-90.0, 90.0, -180.0, 180.0) \n", + "\n", + " StudyNotes \\\n", + "0 Calibration ensemble reconstructions of existi... \n", + "1 None \n", + "2 Reconstruction of a precipitation-based Southe... \n", + "3 Composite reconstruction of low latitude rainf... \n", + "4 Records of common production rate of cosmogeni... \n", + ".. ... \n", + "95 Lake status determined at 1000-year intervals ... \n", + "96 None \n", + "97 \n", + "98 Transient simulation of ocean carbonate chemis... \n", + "99 Tables and Table Notes \\nTable S1. Change i... \n", "\n", " ScienceKeywords \\\n", "0 [carbon cycle, sensitivity, Air Temperature Re... \n", @@ -715,14 +703,14 @@ "99 [biogeochemical cycles] \n", "\n", " Investigators \\\n", - "0 David Frank, Valerie Trouet, Jan Esper, Christ... \n", - "1 Kai Kornhuber, Ellie Broadman, Valerie Trouet \n", - "2 Liguang Sun, Yuhong Wang, Wen Huang, Shican Qi... \n", + "0 David Frank, Jan Esper, Christoph Raible, Ulf ... \n", + "1 Ellie Broadman, Valerie Trouet, Kai Kornhuber \n", + "2 Hong Yan, Liguang Sun, Yuhong Wang, Wen Huang,... \n", "3 Franziska Lechleitner, Sebastian Breitenbach, ... \n", - "4 Irene Brunner, Marcus Christl, Hubertus Fische... \n", + "4 Friedhelm Steinhilber, Jose Abreu, Jürg Beer, ... \n", ".. ... \n", "95 Frances Alayne Street-Perrott, None Marchand, ... \n", - "96 William Gray, Sophia Hines, Andrea Burke, Kass... \n", + "96 Patrick Rafter, William Gray, Sophia Hines, An... \n", "97 Kazimierz Rozanski, Luis Araguás-Araguás, Robe... \n", "98 Jun Shao, Lowell Stott, William Gray, Rosanna ... \n", "99 Karen Kohfeld, Corinne Le Quéré, Sandy Harriso... \n", @@ -747,7 +735,7 @@ "3 [[{'DataTableID': '33444', 'DataTableName': 'L... \n", "4 [[{'DataTableID': '21230', 'DataTableName': 'T... \n", ".. ... \n", - "95 [[{'DataTableID': '9036', 'DataTableName': 'Qu... \n", + "95 [[{'DataTableID': '9005', 'DataTableName': 'Mo... \n", "96 [[{'DataTableID': '49382', 'DataTableName': 'G... \n", "97 [[{'DataTableID': '32472', 'DataTableName': 'G... \n", "98 [[{'DataTableID': '44097', 'DataTableName': 'S... \n", @@ -766,7 +754,7 @@ "98 [{'fundingAgency': 'US National Science Founda... \n", "99 [] \n", "\n", - "[100 rows x 14 columns]" + "[100 rows x 15 columns]" ] }, "execution_count": null, @@ -829,21 +817,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2025-12-11 10:28:37,851][INFO] - search_studies: Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.\n" + "[2026-03-24 12:41:53,910][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n", + "[2026-03-24 12:41:53,912][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?NOAAStudyId=18316&dataPublisher=NOAA\n" + "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&NOAAStudyId=18316&limit=100\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 1730.32it/s]\n" + "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 1000.79it/s]\n", + "[2026-03-24 12:41:54,527][INFO] - Retrieved 1 studies.\n" ] }, { @@ -875,6 +865,7 @@ " MostRecentYearBP\n", " EarliestYearCE\n", " MostRecentYearCE\n", + " Coverage [S, N, W, E]\n", " StudyNotes\n", " ScienceKeywords\n", " Investigators\n", @@ -894,6 +885,7 @@ " 11\n", " 733\n", " 1939\n", + " (-79.47, -79.47, -112.13, -112.13)\n", " CO2 concentration and Stable Isotopic Composit...\n", " [carbon cycle]\n", " Thomas Bauska, Fortunat Joos, Alan Mix, Raphae...\n", @@ -912,11 +904,14 @@ " DataType EarliestYearBP MostRecentYearBP EarliestYearCE \\\n", "0 ICE CORES 1217 11 733 \n", "\n", - " MostRecentYearCE StudyNotes \\\n", - "0 1939 CO2 concentration and Stable Isotopic Composit... \n", + " MostRecentYearCE Coverage [S, N, W, E] \\\n", + "0 1939 (-79.47, -79.47, -112.13, -112.13) \n", + "\n", + " StudyNotes ScienceKeywords \\\n", + "0 CO2 concentration and Stable Isotopic Composit... [carbon cycle] \n", "\n", - " ScienceKeywords Investigators \\\n", - "0 [carbon cycle] Thomas Bauska, Fortunat Joos, Alan Mix, Raphae... \n", + " Investigators \\\n", + "0 Thomas Bauska, Fortunat Joos, Alan Mix, Raphae... \n", "\n", " Publications \\\n", "0 [{'Author': 'Ahn, J., E. J. Brook, L. Mitchell... \n", @@ -1253,8 +1248,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:598: UserWarning: No path specified. Saving BibTeX to: bibtex_20251211_1028.bib\n", - " warnings.warn(f\"No path specified. Saving BibTeX to: {path}\")\n" + "[2026-03-24 12:41:54,639][WARNING] - No path specified. Saving BibTeX to: bibtex_20260324_1241.bib\n" ] }, { @@ -1315,6 +1309,276 @@ "dataset.get_publications(save=True)\n" ] }, + { + "cell_type": "code", + "execution_count": null, + "id": "8d1912e2", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StudyIDDataTypeSiteIDSiteNameLocationNameLatitudeLongitudeMinElevationMaxElevation
012402OTHER COLLECTIONS22723GlobalGeographic Region>Global-9090NoneNone
18610ICE CORES20633ACT2Continent>North America>Greenland66.0119-45.15824102410
28610ICE CORES22899ACT1Continent>North America>Greenland66.0039-46.551124102410
38610ICE CORES22900ACT3Continent>North America>Greenland65.995-43.606924102410
48610ICE CORES22901ACT4Continent>North America>Greenland65.9811-42.788924102410
56177ICE CORES20633ACT2Continent>North America>Greenland66.0119-45.15824102410
616279PALEOLIMNOLOGY55837Lake HaukadalsvatnContinent>Europe>Northern Europe>Iceland65.055846-21.6257573838
716279PALEOLIMNOLOGY55838Lake HvítárvatnContinent>Europe>Northern Europe>Iceland64.613723-19.8437693838
823930TREE RING57630Brooks Range UplandContinent>North America>United States Of Ameri...68.375-149.295910910
923931TREE RING57631Inigok RiparianContinent>North America>United States Of Ameri...69.99-153.043535
1023932TREE RING57632Inigok UplandContinent>North America>United States Of Ameri...69.99-153.043838
1123933TREE RING57633Itkillik UplandContinent>North America>United States Of Ameri...68.641-149.614739739
1222053TREE RING57178Kuparuk RiparianContinent>North America>United States Of Ameri...68.662-149.43717717
1322054TREE RING57179Kuparuk UplandContinent>North America>United States Of Ameri...68.662-149.428720720
\n", + "
" + ], + "text/plain": [ + " StudyID DataType SiteID SiteName \\\n", + "0 12402 OTHER COLLECTIONS 22723 Global \n", + "1 8610 ICE CORES 20633 ACT2 \n", + "2 8610 ICE CORES 22899 ACT1 \n", + "3 8610 ICE CORES 22900 ACT3 \n", + "4 8610 ICE CORES 22901 ACT4 \n", + "5 6177 ICE CORES 20633 ACT2 \n", + "6 16279 PALEOLIMNOLOGY 55837 Lake Haukadalsvatn \n", + "7 16279 PALEOLIMNOLOGY 55838 Lake Hvítárvatn \n", + "8 23930 TREE RING 57630 Brooks Range Upland \n", + "9 23931 TREE RING 57631 Inigok Riparian \n", + "10 23932 TREE RING 57632 Inigok Upland \n", + "11 23933 TREE RING 57633 Itkillik Upland \n", + "12 22053 TREE RING 57178 Kuparuk Riparian \n", + "13 22054 TREE RING 57179 Kuparuk Upland \n", + "\n", + " LocationName Latitude Longitude \\\n", + "0 Geographic Region>Global -90 90 \n", + "1 Continent>North America>Greenland 66.0119 -45.158 \n", + "2 Continent>North America>Greenland 66.0039 -46.5511 \n", + "3 Continent>North America>Greenland 65.995 -43.6069 \n", + "4 Continent>North America>Greenland 65.9811 -42.7889 \n", + "5 Continent>North America>Greenland 66.0119 -45.158 \n", + "6 Continent>Europe>Northern Europe>Iceland 65.055846 -21.625757 \n", + "7 Continent>Europe>Northern Europe>Iceland 64.613723 -19.843769 \n", + "8 Continent>North America>United States Of Ameri... 68.375 -149.295 \n", + "9 Continent>North America>United States Of Ameri... 69.99 -153.04 \n", + "10 Continent>North America>United States Of Ameri... 69.99 -153.04 \n", + "11 Continent>North America>United States Of Ameri... 68.641 -149.614 \n", + "12 Continent>North America>United States Of Ameri... 68.662 -149.43 \n", + "13 Continent>North America>United States Of Ameri... 68.662 -149.428 \n", + "\n", + " MinElevation MaxElevation \n", + "0 None None \n", + "1 2410 2410 \n", + "2 2410 2410 \n", + "3 2410 2410 \n", + "4 2410 2410 \n", + "5 2410 2410 \n", + "6 38 38 \n", + "7 38 38 \n", + "8 910 910 \n", + "9 35 35 \n", + "10 38 38 \n", + "11 739 739 \n", + "12 717 717 \n", + "13 720 720 " + ] + }, + "execution_count": null, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dataset.get_geo()" + ] + }, { "cell_type": "markdown", "id": "30bfdb9d", @@ -1348,21 +1612,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2025-12-11 10:28:38,629][INFO] - search_studies: Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.\n" + "[2026-03-24 12:41:54,705][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n", + "[2026-03-24 12:41:54,706][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?NOAAStudyId=18316&dataPublisher=NOAA\n" + "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&NOAAStudyId=18316&limit=100\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 2832.08it/s]\n" + "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 491.54it/s]\n", + "[2026-03-24 12:41:55,514][INFO] - Retrieved 1 studies.\n" ] }, { @@ -1903,21 +2169,23 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2025-12-11 10:28:43,889][INFO] - search_studies: Using identifier-only fetch (xml_id/NOAAStudyId). Other parameters will be ignored.\n" + "[2026-03-24 12:41:59,610][INFO] - search_studies: Limit defaulted to 100 (PyleoTUPS).\n", + "[2026-03-24 12:41:59,613][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?NOAAStudyId=9957&dataPublisher=NOAA\n" + "Request URL: https://www.ncei.noaa.gov/access/paleo-search/study/search.json?dataPublisher=NOAA&NOAAStudyId=9957&limit=100\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 1269.08it/s]\n" + "Parsing NOAA studies: 100%|██████████| 1/1 [00:00<00:00, 486.47it/s]\n", + "[2026-03-24 12:42:30,274][INFO] - Retrieved 1 studies.\n" ] }, { @@ -1949,6 +2217,7 @@ " MostRecentYearBP\n", " EarliestYearCE\n", " MostRecentYearCE\n", + " Coverage [S, N, W, E]\n", " StudyNotes\n", " ScienceKeywords\n", " Investigators\n", @@ -1968,11 +2237,12 @@ " 7678\n", " -6616\n", " -5728\n", + " (-13.2167, 25.28, -44.05, 108.08)\n", " Oxygen isotope data from six stalagmites in Ch...\n", " [abrupt climate change, Intertropical Converge...\n", - " R. Lawrence Edwards, Augusto Mangini, Stephen ...\n", + " Hai Cheng, Dominik Fleitmann, R. Lawrence Edwa...\n", " [{'Author': 'Cheng, H., D. Fleitmann, R.L. Edw...\n", - " [[{'DataTableID': '18803', 'DataTableName': 'H...\n", + " [[{'DataTableID': '18801', 'DataTableName': 'D...\n", " [{'fundingAgency': 'Comer Science and Educatio...\n", " \n", " \n", @@ -1986,6 +2256,9 @@ " EarliestYearBP MostRecentYearBP EarliestYearCE MostRecentYearCE \\\n", "0 8566 7678 -6616 -5728 \n", "\n", + " Coverage [S, N, W, E] \\\n", + "0 (-13.2167, 25.28, -44.05, 108.08) \n", + "\n", " StudyNotes \\\n", "0 Oxygen isotope data from six stalagmites in Ch... \n", "\n", @@ -1993,13 +2266,13 @@ "0 [abrupt climate change, Intertropical Converge... \n", "\n", " Investigators \\\n", - "0 R. Lawrence Edwards, Augusto Mangini, Stephen ... \n", + "0 Hai Cheng, Dominik Fleitmann, R. Lawrence Edwa... \n", "\n", " Publications \\\n", "0 [{'Author': 'Cheng, H., D. Fleitmann, R.L. Edw... \n", "\n", " Sites \\\n", - "0 [[{'DataTableID': '18803', 'DataTableName': 'H... \n", + "0 [[{'DataTableID': '18801', 'DataTableName': 'D... \n", "\n", " Funding \n", "0 [{'fundingAgency': 'Comer Science and Educatio... " @@ -2064,229 +2337,229 @@ " \n", " \n", " 0\n", - " 18803\n", - " H14\n", + " 18801\n", + " D4 Dongge\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " [age, depth, delta 18O]\n", " Speleothem\n", " 2\n", - " 31383\n", - " Hoti Cave\n", - " Continent>Asia>Western Asia>Middle East>Oman\n", - " 23.08\n", - " 57.35\n", - " 800\n", - " 800\n", + " 6554\n", + " Dongge Cave\n", + " Continent>Asia>Eastern Asia>China\n", + " 25.28\n", + " 108.08\n", + " 680\n", + " 680\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", " \n", " 1\n", - " 18803\n", - " H14\n", + " 18801\n", + " D4 Dongge\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " []\n", " Speleothem\n", " 2\n", - " 31383\n", - " Hoti Cave\n", - " Continent>Asia>Western Asia>Middle East>Oman\n", - " 23.08\n", - " 57.35\n", - " 800\n", - " 800\n", + " 6554\n", + " Dongge Cave\n", + " Continent>Asia>Eastern Asia>China\n", + " 25.28\n", + " 108.08\n", + " 680\n", + " 680\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", " \n", " 2\n", - " 18804\n", - " PAD07\n", + " 18802\n", + " DA Dongge\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " [age, depth, delta 18O]\n", " Speleothem\n", " 2\n", - " 31568\n", - " Padre Cave\n", - " Continent>South America>Brazil\n", - " -13.2167\n", - " -44.05\n", - " 650\n", - " 800\n", + " 6554\n", + " Dongge Cave\n", + " Continent>Asia>Eastern Asia>China\n", + " 25.28\n", + " 108.08\n", + " 680\n", + " 680\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", " \n", " 3\n", - " 18804\n", - " PAD07\n", + " 18802\n", + " DA Dongge\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " []\n", " Speleothem\n", " 2\n", - " 31568\n", - " Padre Cave\n", - " Continent>South America>Brazil\n", - " -13.2167\n", - " -44.05\n", - " 650\n", - " 800\n", + " 6554\n", + " Dongge Cave\n", + " Continent>Asia>Eastern Asia>China\n", + " 25.28\n", + " 108.08\n", + " 680\n", + " 680\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", " \n", " 4\n", - " 18805\n", - " PX5\n", + " 18806\n", + " Q5 Qunf\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " [age, depth, delta 18O]\n", " Speleothem\n", " 2\n", - " 31569\n", - " Paixão Cave\n", - " Continent>South America>Brazil\n", - " -12.65\n", - " -41.05\n", + " 14640\n", + " Qunf Cave\n", + " Continent>Asia>Western Asia>Middle East>Oman\n", + " 17.17\n", + " 54.3\n", + " 650\n", " 650\n", - " 800\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", " \n", " 5\n", - " 18805\n", - " PX5\n", + " 18806\n", + " Q5 Qunf\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " []\n", " Speleothem\n", " 2\n", - " 31569\n", - " Paixão Cave\n", - " Continent>South America>Brazil\n", - " -12.65\n", - " -41.05\n", + " 14640\n", + " Qunf Cave\n", + " Continent>Asia>Western Asia>Middle East>Oman\n", + " 17.17\n", + " 54.3\n", + " 650\n", " 650\n", - " 800\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", " \n", " 6\n", - " 18801\n", - " D4 Dongge\n", + " 18803\n", + " H14\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " [age, depth, delta 18O]\n", " Speleothem\n", " 2\n", - " 6554\n", - " Dongge Cave\n", - " Continent>Asia>Eastern Asia>China\n", - " 25.28\n", - " 108.08\n", - " 680\n", - " 680\n", + " 31383\n", + " Hoti Cave\n", + " Continent>Asia>Western Asia>Middle East>Oman\n", + " 23.08\n", + " 57.35\n", + " 800\n", + " 800\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", " \n", " 7\n", - " 18801\n", - " D4 Dongge\n", + " 18803\n", + " H14\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " []\n", " Speleothem\n", " 2\n", - " 6554\n", - " Dongge Cave\n", - " Continent>Asia>Eastern Asia>China\n", - " 25.28\n", - " 108.08\n", - " 680\n", - " 680\n", + " 31383\n", + " Hoti Cave\n", + " Continent>Asia>Western Asia>Middle East>Oman\n", + " 23.08\n", + " 57.35\n", + " 800\n", + " 800\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", " \n", " 8\n", - " 18802\n", - " DA Dongge\n", + " 18804\n", + " PAD07\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " [age, depth, delta 18O]\n", " Speleothem\n", " 2\n", - " 6554\n", - " Dongge Cave\n", - " Continent>Asia>Eastern Asia>China\n", - " 25.28\n", - " 108.08\n", - " 680\n", - " 680\n", + " 31568\n", + " Padre Cave\n", + " Continent>South America>Brazil\n", + " -13.2167\n", + " -44.05\n", + " 650\n", + " 800\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", " \n", " 9\n", - " 18802\n", - " DA Dongge\n", + " 18804\n", + " PAD07\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " []\n", " Speleothem\n", " 2\n", - " 6554\n", - " Dongge Cave\n", - " Continent>Asia>Eastern Asia>China\n", - " 25.28\n", - " 108.08\n", - " 680\n", - " 680\n", + " 31568\n", + " Padre Cave\n", + " Continent>South America>Brazil\n", + " -13.2167\n", + " -44.05\n", + " 650\n", + " 800\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", " \n", " 10\n", - " 18806\n", - " Q5 Qunf\n", + " 18805\n", + " PX5\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " [age, depth, delta 18O]\n", " Speleothem\n", " 2\n", - " 14640\n", - " Qunf Cave\n", - " Continent>Asia>Western Asia>Middle East>Oman\n", - " 17.17\n", - " 54.3\n", - " 650\n", + " 31569\n", + " Paixão Cave\n", + " Continent>South America>Brazil\n", + " -12.65\n", + " -41.05\n", " 650\n", + " 800\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", " \n", " 11\n", - " 18806\n", - " Q5 Qunf\n", + " 18805\n", + " PX5\n", " cal yr BP\n", " https://www.ncei.noaa.gov/pub/data/paleo/spele...\n", " []\n", " Speleothem\n", " 2\n", - " 14640\n", - " Qunf Cave\n", - " Continent>Asia>Western Asia>Middle East>Oman\n", - " 17.17\n", - " 54.3\n", - " 650\n", + " 31569\n", + " Paixão Cave\n", + " Continent>South America>Brazil\n", + " -12.65\n", + " -41.05\n", " 650\n", + " 800\n", " 9957\n", " 8.2k Event Speleothem Oxygen Isotope Data\n", " \n", @@ -2296,18 +2569,18 @@ ], "text/plain": [ " DataTableID DataTableName TimeUnit \\\n", - "0 18803 H14 cal yr BP \n", - "1 18803 H14 cal yr BP \n", - "2 18804 PAD07 cal yr BP \n", - "3 18804 PAD07 cal yr BP \n", - "4 18805 PX5 cal yr BP \n", - "5 18805 PX5 cal yr BP \n", - "6 18801 D4 Dongge cal yr BP \n", - "7 18801 D4 Dongge cal yr BP \n", - "8 18802 DA Dongge cal yr BP \n", - "9 18802 DA Dongge cal yr BP \n", - "10 18806 Q5 Qunf cal yr BP \n", - "11 18806 Q5 Qunf cal yr BP \n", + "0 18801 D4 Dongge cal yr BP \n", + "1 18801 D4 Dongge cal yr BP \n", + "2 18802 DA Dongge cal yr BP \n", + "3 18802 DA Dongge cal yr BP \n", + "4 18806 Q5 Qunf cal yr BP \n", + "5 18806 Q5 Qunf cal yr BP \n", + "6 18803 H14 cal yr BP \n", + "7 18803 H14 cal yr BP \n", + "8 18804 PAD07 cal yr BP \n", + "9 18804 PAD07 cal yr BP \n", + "10 18805 PX5 cal yr BP \n", + "11 18805 PX5 cal yr BP \n", "\n", " FileURL \\\n", "0 https://www.ncei.noaa.gov/pub/data/paleo/spele... \n", @@ -2324,46 +2597,46 @@ "11 https://www.ncei.noaa.gov/pub/data/paleo/spele... \n", "\n", " Variables FileDescription TotalFilesAvailable SiteID \\\n", - "0 [age, depth, delta 18O] Speleothem 2 31383 \n", - "1 [] Speleothem 2 31383 \n", - "2 [age, depth, delta 18O] Speleothem 2 31568 \n", - "3 [] Speleothem 2 31568 \n", - "4 [age, depth, delta 18O] Speleothem 2 31569 \n", - "5 [] Speleothem 2 31569 \n", - "6 [age, depth, delta 18O] Speleothem 2 6554 \n", - "7 [] Speleothem 2 6554 \n", - "8 [age, depth, delta 18O] Speleothem 2 6554 \n", - "9 [] Speleothem 2 6554 \n", - "10 [age, depth, delta 18O] Speleothem 2 14640 \n", - "11 [] Speleothem 2 14640 \n", + "0 [age, depth, delta 18O] Speleothem 2 6554 \n", + "1 [] Speleothem 2 6554 \n", + "2 [age, depth, delta 18O] Speleothem 2 6554 \n", + "3 [] Speleothem 2 6554 \n", + "4 [age, depth, delta 18O] Speleothem 2 14640 \n", + "5 [] Speleothem 2 14640 \n", + "6 [age, depth, delta 18O] Speleothem 2 31383 \n", + "7 [] Speleothem 2 31383 \n", + "8 [age, depth, delta 18O] Speleothem 2 31568 \n", + "9 [] Speleothem 2 31568 \n", + "10 [age, depth, delta 18O] Speleothem 2 31569 \n", + "11 [] Speleothem 2 31569 \n", "\n", " SiteName LocationName Latitude \\\n", - "0 Hoti Cave Continent>Asia>Western Asia>Middle East>Oman 23.08 \n", - "1 Hoti Cave Continent>Asia>Western Asia>Middle East>Oman 23.08 \n", - "2 Padre Cave Continent>South America>Brazil -13.2167 \n", - "3 Padre Cave Continent>South America>Brazil -13.2167 \n", - "4 Paixão Cave Continent>South America>Brazil -12.65 \n", - "5 Paixão Cave Continent>South America>Brazil -12.65 \n", - "6 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n", - "7 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n", - "8 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n", - "9 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n", - "10 Qunf Cave Continent>Asia>Western Asia>Middle East>Oman 17.17 \n", - "11 Qunf Cave Continent>Asia>Western Asia>Middle East>Oman 17.17 \n", + "0 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n", + "1 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n", + "2 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n", + "3 Dongge Cave Continent>Asia>Eastern Asia>China 25.28 \n", + "4 Qunf Cave Continent>Asia>Western Asia>Middle East>Oman 17.17 \n", + "5 Qunf Cave Continent>Asia>Western Asia>Middle East>Oman 17.17 \n", + "6 Hoti Cave Continent>Asia>Western Asia>Middle East>Oman 23.08 \n", + "7 Hoti Cave Continent>Asia>Western Asia>Middle East>Oman 23.08 \n", + "8 Padre Cave Continent>South America>Brazil -13.2167 \n", + "9 Padre Cave Continent>South America>Brazil -13.2167 \n", + "10 Paixão Cave Continent>South America>Brazil -12.65 \n", + "11 Paixão Cave Continent>South America>Brazil -12.65 \n", "\n", " Longitude MinElevation MaxElevation StudyID \\\n", - "0 57.35 800 800 9957 \n", - "1 57.35 800 800 9957 \n", - "2 -44.05 650 800 9957 \n", - "3 -44.05 650 800 9957 \n", - "4 -41.05 650 800 9957 \n", - "5 -41.05 650 800 9957 \n", - "6 108.08 680 680 9957 \n", - "7 108.08 680 680 9957 \n", - "8 108.08 680 680 9957 \n", - "9 108.08 680 680 9957 \n", - "10 54.3 650 650 9957 \n", - "11 54.3 650 650 9957 \n", + "0 108.08 680 680 9957 \n", + "1 108.08 680 680 9957 \n", + "2 108.08 680 680 9957 \n", + "3 108.08 680 680 9957 \n", + "4 54.3 650 650 9957 \n", + "5 54.3 650 650 9957 \n", + "6 57.35 800 800 9957 \n", + "7 57.35 800 800 9957 \n", + "8 -44.05 650 800 9957 \n", + "9 -44.05 650 800 9957 \n", + "10 -41.05 650 800 9957 \n", + "11 -41.05 650 800 9957 \n", "\n", " StudyName \n", "0 8.2k Event Speleothem Oxygen Isotope Data \n", @@ -3879,8 +4152,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:1067: UserWarning: Attached 'https://www.ncei.noaa.gov/pub/data/paleo/climate_forcing/trace_gases/mcelwain1995co2.txt' is not linked to any parent study; can not add metadata.\n", - " warnings.warn(\n" + "[2026-03-24 12:42:32,114][WARNING] - Attached 'https://www.ncei.noaa.gov/pub/data/paleo/climate_forcing/trace_gases/mcelwain1995co2.txt' is not linked to any parent study; can not add metadata.\n" ] } ], @@ -4079,8 +4351,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:1067: UserWarning: Attached 'https://www.ncei.noaa.gov/pub/data/paleo/reconstructions/climate12k/temperature/version1.0.0/Temp12k_directory_LiPD_files/AdelaideTarn.Jara.2015.lpd' is not linked to any parent study; can not add metadata.\n", - " warnings.warn(\n" + "[2026-03-24 12:42:33,338][WARNING] - Attached 'https://www.ncei.noaa.gov/pub/data/paleo/reconstructions/climate12k/temperature/version1.0.0/Temp12k_directory_LiPD_files/AdelaideTarn.Jara.2015.lpd' is not linked to any parent study; can not add metadata.\n" ] }, { @@ -4090,9 +4361,9 @@ "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mUnsupportedFileTypeError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[39]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m error_ds = pt.Dataset()\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43merror_ds\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_urls\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhttps://www.ncei.noaa.gov/pub/data/paleo/reconstructions/climate12k/temperature/version1.0.0/Temp12k_directory_LiPD_files/AdelaideTarn.Jara.2015.lpd\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:1071\u001b[39m, in \u001b[36mDataset.get_data\u001b[39m\u001b[34m(self, dataTableIDs, file_urls)\u001b[39m\n\u001b[32m 1066\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m mapping:\n\u001b[32m 1067\u001b[39m warnings.warn(\n\u001b[32m 1068\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mAttached \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is not linked to any parent study; can not add metadata.\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 1069\u001b[39m \u001b[38;5;167;01mUserWarning\u001b[39;00m\n\u001b[32m 1070\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1071\u001b[39m dfs.extend(\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_process_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 1072\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1073\u001b[39m mapping_details = \u001b[38;5;28mself\u001b[39m.data_table_index.get(mapping)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:890\u001b[39m, in \u001b[36mDataset._process_file\u001b[39m\u001b[34m(self, file_url, mapping)\u001b[39m\n\u001b[32m 888\u001b[39m file_type = file_url.split(\u001b[33m'\u001b[39m\u001b[33m.\u001b[39m\u001b[33m'\u001b[39m)[-\u001b[32m1\u001b[39m].lower()\n\u001b[32m 889\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m._PROPRIETARY_TYPES:\n\u001b[32m--> \u001b[39m\u001b[32m890\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 891\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mpyleotups works with .txt files only. File type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is proprietary.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 892\u001b[39m )\n\u001b[32m 893\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type != \u001b[33m'\u001b[39m\u001b[33mtxt\u001b[39m\u001b[33m'\u001b[39m:\n\u001b[32m 894\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 895\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mInvalid file type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m. Only .txt files are supported.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 896\u001b[39m )\n", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[20]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m error_ds = pt.NOAADataset()\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43merror_ds\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_urls\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhttps://www.ncei.noaa.gov/pub/data/paleo/reconstructions/climate12k/temperature/version1.0.0/Temp12k_directory_LiPD_files/AdelaideTarn.Jara.2015.lpd\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~\\OneDrive\\Desktop\\pyelotups\\pyleotups\\core\\NOAADataset.py:1014\u001b[39m, in \u001b[36mNOAADataset.get_data\u001b[39m\u001b[34m(self, dataTableIDs, file_urls)\u001b[39m\n\u001b[32m 1012\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m mapping:\n\u001b[32m 1013\u001b[39m log.warning(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mAttached \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is not linked to any parent study; can not add metadata.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1014\u001b[39m dfs.extend(\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_process_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 1015\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1016\u001b[39m mapping_details = \u001b[38;5;28mself\u001b[39m.data_table_index.get(mapping)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~\\OneDrive\\Desktop\\pyelotups\\pyleotups\\core\\NOAADataset.py:841\u001b[39m, in \u001b[36mNOAADataset._process_file\u001b[39m\u001b[34m(self, file_url, mapping)\u001b[39m\n\u001b[32m 839\u001b[39m file_type = file_url.split(\u001b[33m'\u001b[39m\u001b[33m.\u001b[39m\u001b[33m'\u001b[39m)[-\u001b[32m1\u001b[39m].lower()\n\u001b[32m 840\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m._PROPRIETARY_TYPES:\n\u001b[32m--> \u001b[39m\u001b[32m841\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 842\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mpyleotups works with .txt files only. File type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is proprietary.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 843\u001b[39m )\n\u001b[32m 844\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type != \u001b[33m'\u001b[39m\u001b[33mtxt\u001b[39m\u001b[33m'\u001b[39m:\n\u001b[32m 845\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 846\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mInvalid file type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m. Only .txt files are supported.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 847\u001b[39m )\n", "\u001b[31mUnsupportedFileTypeError\u001b[39m: pyleotups works with .txt files only. File type 'lpd' is proprietary." ] } @@ -4113,8 +4384,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:1067: UserWarning: Attached 'https://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/frank1999/frank1999.xls' is not linked to any parent study; can not add metadata.\n", - " warnings.warn(\n" + "[2026-03-24 12:42:40,758][WARNING] - Attached 'https://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/frank1999/frank1999.xls' is not linked to any parent study; can not add metadata.\n" ] }, { @@ -4124,9 +4394,9 @@ "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mUnsupportedFileTypeError\u001b[39m Traceback (most recent call last)", - "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[40]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43merror_ds\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_urls\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhttps://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/frank1999/frank1999.xls\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:1071\u001b[39m, in \u001b[36mDataset.get_data\u001b[39m\u001b[34m(self, dataTableIDs, file_urls)\u001b[39m\n\u001b[32m 1066\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m mapping:\n\u001b[32m 1067\u001b[39m warnings.warn(\n\u001b[32m 1068\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mAttached \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is not linked to any parent study; can not add metadata.\u001b[39m\u001b[33m\"\u001b[39m,\n\u001b[32m 1069\u001b[39m \u001b[38;5;167;01mUserWarning\u001b[39;00m\n\u001b[32m 1070\u001b[39m )\n\u001b[32m-> \u001b[39m\u001b[32m1071\u001b[39m dfs.extend(\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_process_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 1072\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1073\u001b[39m mapping_details = \u001b[38;5;28mself\u001b[39m.data_table_index.get(mapping)\n", - "\u001b[36mFile \u001b[39m\u001b[32m~/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:894\u001b[39m, in \u001b[36mDataset._process_file\u001b[39m\u001b[34m(self, file_url, mapping)\u001b[39m\n\u001b[32m 890\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 891\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mpyleotups works with .txt files only. File type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is proprietary.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 892\u001b[39m )\n\u001b[32m 893\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type != \u001b[33m'\u001b[39m\u001b[33mtxt\u001b[39m\u001b[33m'\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m894\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 895\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mInvalid file type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m. Only .txt files are supported.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 896\u001b[39m )\n\u001b[32m 898\u001b[39m \u001b[38;5;66;03m# Step 1: Detect parser type by reading initial lines\u001b[39;00m\n\u001b[32m 899\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdetect_parser_type\u001b[39m(lines):\n\u001b[32m 900\u001b[39m \u001b[38;5;66;03m# 1. Clean lines: strip whitespace and remove empty lines\u001b[39;00m\n", + "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[21]\u001b[39m\u001b[32m, line 1\u001b[39m\n\u001b[32m----> \u001b[39m\u001b[32m1\u001b[39m \u001b[43merror_ds\u001b[49m\u001b[43m.\u001b[49m\u001b[43mget_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mfile_urls\u001b[49m\u001b[43m \u001b[49m\u001b[43m=\u001b[49m\u001b[43m \u001b[49m\u001b[43m[\u001b[49m\u001b[33;43m\"\u001b[39;49m\u001b[33;43mhttps://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/frank1999/frank1999.xls\u001b[39;49m\u001b[33;43m\"\u001b[39;49m\u001b[43m]\u001b[49m\u001b[43m)\u001b[49m\n", + "\u001b[36mFile \u001b[39m\u001b[32m~\\OneDrive\\Desktop\\pyelotups\\pyleotups\\core\\NOAADataset.py:1014\u001b[39m, in \u001b[36mNOAADataset.get_data\u001b[39m\u001b[34m(self, dataTableIDs, file_urls)\u001b[39m\n\u001b[32m 1012\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m mapping:\n\u001b[32m 1013\u001b[39m log.warning(\u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mAttached \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00murl\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is not linked to any parent study; can not add metadata.\u001b[39m\u001b[33m\"\u001b[39m)\n\u001b[32m-> \u001b[39m\u001b[32m1014\u001b[39m dfs.extend(\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43m_process_file\u001b[49m\u001b[43m(\u001b[49m\u001b[43murl\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[32m 1015\u001b[39m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[32m 1016\u001b[39m mapping_details = \u001b[38;5;28mself\u001b[39m.data_table_index.get(mapping)\n", + "\u001b[36mFile \u001b[39m\u001b[32m~\\OneDrive\\Desktop\\pyelotups\\pyleotups\\core\\NOAADataset.py:845\u001b[39m, in \u001b[36mNOAADataset._process_file\u001b[39m\u001b[34m(self, file_url, mapping)\u001b[39m\n\u001b[32m 841\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 842\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mpyleotups works with .txt files only. File type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m is proprietary.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 843\u001b[39m )\n\u001b[32m 844\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m file_type != \u001b[33m'\u001b[39m\u001b[33mtxt\u001b[39m\u001b[33m'\u001b[39m:\n\u001b[32m--> \u001b[39m\u001b[32m845\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m UnsupportedFileTypeError(\n\u001b[32m 846\u001b[39m \u001b[33mf\u001b[39m\u001b[33m\"\u001b[39m\u001b[33mInvalid file type \u001b[39m\u001b[33m'\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfile_type\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m\u001b[33m. Only .txt files are supported.\u001b[39m\u001b[33m\"\u001b[39m\n\u001b[32m 847\u001b[39m )\n\u001b[32m 849\u001b[39m \u001b[38;5;66;03m# Step 1: Detect parser type by reading initial lines\u001b[39;00m\n\u001b[32m 850\u001b[39m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[34mdetect_parser_type\u001b[39m(lines):\n\u001b[32m 851\u001b[39m \u001b[38;5;66;03m# 1. Clean lines: strip whitespace and remove empty lines\u001b[39;00m\n", "\u001b[31mUnsupportedFileTypeError\u001b[39m: Invalid file type 'xls'. Only .txt files are supported." ] } @@ -4159,7 +4429,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2025-12-11 10:50:30,372][INFO] - search_studies: Limit set to 10.\n" + "[2026-03-24 12:42:51,327][INFO] - search_studies: Limit set to 10.\n", + "[2026-03-24 12:42:51,330][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n" ] }, { @@ -4174,7 +4445,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "Parsing NOAA studies: 100%|██████████| 10/10 [00:00<00:00, 10559.68it/s]" + "Parsing NOAA studies: 100%|██████████| 10/10 [00:00<00:00, 1539.14it/s]\n", + "[2026-03-24 12:42:52,372][WARNING] - Retrieved 10 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n", + "[2026-03-24 12:42:52,376][INFO] - Retrieved 10 studies.\n" ] }, { @@ -4184,13 +4457,6 @@ "Current studies in dataset: 10\n" ] }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, { "data": { "text/html": [ @@ -4220,6 +4486,7 @@ " MostRecentYearBP\n", " EarliestYearCE\n", " MostRecentYearCE\n", + " Coverage [S, N, W, E]\n", " StudyNotes\n", " ScienceKeywords\n", " Investigators\n", @@ -4239,9 +4506,10 @@ " -45.0\n", " 1000.0\n", " 1995.0\n", + " (0.0, 90.0, -180.0, 180.0)\n", " Calibration ensemble reconstructions of existi...\n", " [carbon cycle, sensitivity, Air Temperature Re...\n", - " David Frank, Valerie Trouet, Jan Esper, Christ...\n", + " David Frank, Jan Esper, Christoph Raible, Ulf ...\n", " [{'Author': 'Frank, D.C., J. Esper, C.C. Raibl...\n", " [[{'DataTableID': '19235', 'DataTableName': 'F...\n", " [{'fundingAgency': 'Swiss National Science Fou...\n", @@ -4256,6 +4524,7 @@ " -46.0\n", " 1000.0\n", " 1996.0\n", + " (56.66667, 69.48333, -18.1957, 18.36667)\n", " A set of reconstructions of sea surface temper...\n", " None\n", " Laura Cunningham, William Austin, Karen-Luise ...\n", @@ -4273,9 +4542,10 @@ " -55.0\n", " 1000.0\n", " 2005.0\n", + " (0.0, 90.0, -180.0, 180.0)\n", " None\n", " [Atmospheric and Oceanic Circulation Patterns ...\n", - " Kai Kornhuber, Ellie Broadman, Valerie Trouet\n", + " Ellie Broadman, Valerie Trouet, Kai Kornhuber\n", " [{'Author': 'Broadman, Ellie, Kai Kornhuber, I...\n", " [[{'DataTableID': '56946', 'DataTableName': 'W...\n", " [{'fundingAgency': 'US National Science Founda...\n", @@ -4290,6 +4560,7 @@ " -60.0\n", " 800.0\n", " 2010.0\n", + " (0.0, 70.0, -80.0, 0.0)\n", " Summer (May-September) Atlantic Multidecadal V...\n", " [Atmospheric and Oceanic Circulation Patterns ...\n", " Jianglin Wang, Bao Yang, Fredrik Ljungqvist, J...\n", @@ -4307,9 +4578,10 @@ " -5.0\n", " 50.0\n", " 1955.0\n", + " (-90.0, 90.0, -180.0, 180.0)\n", " Reconstruction of a precipitation-based Southe...\n", " [Atmospheric and Oceanic Circulation Patterns ...\n", - " Liguang Sun, Yuhong Wang, Wen Huang, Shican Qi...\n", + " Hong Yan, Liguang Sun, Yuhong Wang, Wen Huang,...\n", " [{'Author': 'Yan, H., L. Sun, Y. Wang, W. Huan...\n", " [[{'DataTableID': '20526', 'DataTableName': 'S...\n", " [{'fundingAgency': 'National Natural Science F...\n", @@ -4324,6 +4596,7 @@ " NaN\n", " NaN\n", " NaN\n", + " (70.0, 90.0, -180.0, 180.0)\n", " Provided Keywords: protactinium-231, 231Pa, th...\n", " None\n", " Lauren Kipp, Jerry McManus, Markus Kienast\n", @@ -4341,6 +4614,7 @@ " -61.0\n", " 1400.0\n", " 2011.0\n", + " (45.0, 85.0, -180.0, 180.0)\n", " Ensemble Climate Reconstructions, input data f...\n", " [Air Temperature Reconstruction]\n", " Martin Tingley, Peter Huybers\n", @@ -4358,9 +4632,10 @@ " -27.0\n", " -7439.0\n", " 1977.0\n", + " (-90.0, 90.0, -180.0, 180.0)\n", " Records of common production rate of cosmogeni...\n", " [Solar Forcing Reconstruction]\n", - " Irene Brunner, Marcus Christl, Hubertus Fische...\n", + " Friedhelm Steinhilber, Jose Abreu, Jürg Beer, ...\n", " [{'Author': 'Steinhilber, F., J.A. Abreu, J. B...\n", " [[{'DataTableID': '21230', 'DataTableName': 'T...\n", " [{'fundingAgency': 'Swiss National Science Fou...\n", @@ -4375,12 +4650,13 @@ " -52.0\n", " -1026.0\n", " 2002.0\n", + " (64.63805, 64.64305, -19.85912, -19.83995)\n", " None\n", " [Medieval Warm Period, Little Ice Age (LIA), A...\n", " Darren Larsen, Gifford Miller, Áslaug Geirsdót...\n", " [{'Author': 'Larsen, D.J., Miller, G.H., Geirs...\n", " [[{'DataTableID': '24775', 'DataTableName': 'H...\n", - " []\n", + " [{'fundingAgency': 'US National Science Founda...\n", " \n", " \n", " 9\n", @@ -4392,6 +4668,7 @@ " NaN\n", " NaN\n", " NaN\n", + " (68.433742, 68.433742, -159.173633, -159.173633)\n", " None\n", " [Arctic, temperature, precipitation]\n", " Joseph Stoner, Mark Abbott, Jason Dorfman\n", @@ -4428,17 +4705,29 @@ "8 PALEOLIMNOLOGY 2976.0 -52.0 -1026.0 \n", "9 PALEOLIMNOLOGY NaN NaN NaN \n", "\n", - " MostRecentYearCE StudyNotes \\\n", - "0 1995.0 Calibration ensemble reconstructions of existi... \n", - "1 1996.0 A set of reconstructions of sea surface temper... \n", - "2 2005.0 None \n", - "3 2010.0 Summer (May-September) Atlantic Multidecadal V... \n", - "4 1955.0 Reconstruction of a precipitation-based Southe... \n", - "5 NaN Provided Keywords: protactinium-231, 231Pa, th... \n", - "6 2011.0 Ensemble Climate Reconstructions, input data f... \n", - "7 1977.0 Records of common production rate of cosmogeni... \n", - "8 2002.0 None \n", - "9 NaN None \n", + " MostRecentYearCE Coverage [S, N, W, E] \\\n", + "0 1995.0 (0.0, 90.0, -180.0, 180.0) \n", + "1 1996.0 (56.66667, 69.48333, -18.1957, 18.36667) \n", + "2 2005.0 (0.0, 90.0, -180.0, 180.0) \n", + "3 2010.0 (0.0, 70.0, -80.0, 0.0) \n", + "4 1955.0 (-90.0, 90.0, -180.0, 180.0) \n", + "5 NaN (70.0, 90.0, -180.0, 180.0) \n", + "6 2011.0 (45.0, 85.0, -180.0, 180.0) \n", + "7 1977.0 (-90.0, 90.0, -180.0, 180.0) \n", + "8 2002.0 (64.63805, 64.64305, -19.85912, -19.83995) \n", + "9 NaN (68.433742, 68.433742, -159.173633, -159.173633) \n", + "\n", + " StudyNotes \\\n", + "0 Calibration ensemble reconstructions of existi... \n", + "1 A set of reconstructions of sea surface temper... \n", + "2 None \n", + "3 Summer (May-September) Atlantic Multidecadal V... \n", + "4 Reconstruction of a precipitation-based Southe... \n", + "5 Provided Keywords: protactinium-231, 231Pa, th... \n", + "6 Ensemble Climate Reconstructions, input data f... \n", + "7 Records of common production rate of cosmogeni... \n", + "8 None \n", + "9 None \n", "\n", " ScienceKeywords \\\n", "0 [carbon cycle, sensitivity, Air Temperature Re... \n", @@ -4453,14 +4742,14 @@ "9 [Arctic, temperature, precipitation] \n", "\n", " Investigators \\\n", - "0 David Frank, Valerie Trouet, Jan Esper, Christ... \n", + "0 David Frank, Jan Esper, Christoph Raible, Ulf ... \n", "1 Laura Cunningham, William Austin, Karen-Luise ... \n", - "2 Kai Kornhuber, Ellie Broadman, Valerie Trouet \n", + "2 Ellie Broadman, Valerie Trouet, Kai Kornhuber \n", "3 Jianglin Wang, Bao Yang, Fredrik Ljungqvist, J... \n", - "4 Liguang Sun, Yuhong Wang, Wen Huang, Shican Qi... \n", + "4 Hong Yan, Liguang Sun, Yuhong Wang, Wen Huang,... \n", "5 Lauren Kipp, Jerry McManus, Markus Kienast \n", "6 Martin Tingley, Peter Huybers \n", - "7 Irene Brunner, Marcus Christl, Hubertus Fische... \n", + "7 Friedhelm Steinhilber, Jose Abreu, Jürg Beer, ... \n", "8 Darren Larsen, Gifford Miller, Áslaug Geirsdót... \n", "9 Joseph Stoner, Mark Abbott, Jason Dorfman \n", "\n", @@ -4497,7 +4786,7 @@ "5 [] \n", "6 [{'fundingAgency': 'US National Science Founda... \n", "7 [{'fundingAgency': 'Swiss National Science Fou... \n", - "8 [] \n", + "8 [{'fundingAgency': 'US National Science Founda... \n", "9 [{'fundingAgency': 'US National Science Founda... " ] }, @@ -4526,7 +4815,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2025-12-11 10:50:47,008][INFO] - search_studies: Limit set to 10.\n" + "[2026-03-24 12:42:52,631][INFO] - search_studies: Limit set to 10.\n", + "[2026-03-24 12:42:52,634][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n" ] }, { @@ -4542,7 +4832,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "Parsing NOAA studies: 100%|██████████| 10/10 [00:00<00:00, 5585.70it/s]" + "Parsing NOAA studies: 100%|██████████| 10/10 [00:00<00:00, 491.82it/s]\n", + "[2026-03-24 12:42:53,591][WARNING] - Retrieved 10 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n", + "[2026-03-24 12:42:53,594][INFO] - Retrieved 10 studies.\n" ] }, { @@ -4552,13 +4844,6 @@ "Current studies in dataset: 10\n" ] }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "\n" - ] - }, { "data": { "text/html": [ @@ -4588,6 +4873,7 @@ " MostRecentYearBP\n", " EarliestYearCE\n", " MostRecentYearCE\n", + " Coverage [S, N, W, E]\n", " StudyNotes\n", " ScienceKeywords\n", " Investigators\n", @@ -4607,6 +4893,7 @@ " -40\n", " 1961\n", " 1990\n", + " (-90.0, 90.0, -180.0, 180.0)\n", " Matlab code for two-factor (location and year)...\n", " None\n", " Martin Tingley\n", @@ -4624,12 +4911,13 @@ " -53\n", " 1772\n", " 2003\n", - " None\n", + " (65.9811, 66.0119, -46.5511, -42.7889)\n", + " Melt layer stratigraphy for 4 Greenland Arctic...\n", " None\n", " Sarah Das\n", " []\n", " [[{'DataTableID': '12448', 'DataTableName': 'A...\n", - " []\n", + " [{'fundingAgency': 'US National Science Founda...\n", " \n", " \n", " 2\n", @@ -4641,6 +4929,7 @@ " -52\n", " 1772\n", " 2002\n", + " (66.0119, 66.0119, -45.158, -45.158)\n", " None\n", " None\n", " Joseph McConnell, Ross Edwards\n", @@ -4658,8 +4947,9 @@ " -56\n", " -8350\n", " 2006\n", + " (64.613723, 65.055846, -21.625757, -19.843769)\n", " Keywords - Iceland, Lake sediment, Holocene pa...\n", - " [Arctic, abrupt climate change, Little Ice Age...\n", + " [Medieval Warm Period, Arctic, abrupt climate ...\n", " Áslaug Geirsdóttir, Gifford Miller, Darren Lar...\n", " [{'Author': 'Geirsdóttir Á., G.H. Miller, D.J....\n", " [[{'DataTableID': '26370', 'DataTableName': 'H...\n", @@ -4675,8 +4965,9 @@ " -65\n", " 1966\n", " 2015\n", + " (68.375, 68.375, -149.295, -149.295)\n", " NOAA Template Raw Measurements file added 2019...\n", - " [thin red willow, diamondleaf willow, Salix pu...\n", + " [Tealeaf Willow, thin red willow, diamondleaf ...\n", " Daniel Ackerman, R. Daniel Griffin, Sarah Hobb...\n", " [{'Author': 'Daniel E. Ackerman, Daniel Griffi...\n", " [[{'DataTableID': '35733', 'DataTableName': 'A...\n", @@ -4692,6 +4983,7 @@ " -66\n", " 1968\n", " 2016\n", + " (69.99, 69.99, -153.04, -153.04)\n", " NOAA Template Raw Measurements file added 2019...\n", " [SAPC, Salix pulchra Cham., Tealeaf Willow, di...\n", " Daniel Ackerman, R. Daniel Griffin, Sarah Hobb...\n", @@ -4709,6 +5001,7 @@ " -66\n", " 1974\n", " 2016\n", + " (69.99, 69.99, -153.04, -153.04)\n", " NOAA Template Raw Measurements file added 2019...\n", " [Tealeaf Willow, thin red willow, Salix pulchr...\n", " Daniel Ackerman, R. Daniel Griffin, Sarah Hobb...\n", @@ -4726,6 +5019,7 @@ " -66\n", " 1962\n", " 2016\n", + " (68.641, 68.641, -149.614, -149.614)\n", " NOAA Template Raw Measurements file added 2019...\n", " [diamondleaf willow, thin red willow, Tealeaf ...\n", " Daniel Ackerman, R. Daniel Griffin, Sarah Hobb...\n", @@ -4743,6 +5037,7 @@ " -65\n", " 1972\n", " 2015\n", + " (68.662, 68.662, -149.43, -149.43)\n", " Each sample represents mean of 4 radii measure...\n", " [thin red willow, SAPC, Tealeaf Willow, diamon...\n", " Daniel Ackerman, R. Daniel Griffin, Sarah Hobb...\n", @@ -4760,8 +5055,9 @@ " -65\n", " 1965\n", " 2015\n", + " (68.662, 68.662, -149.428, -149.428)\n", " Each sample represents mean of 4 radii measure...\n", - " [SAPC, Salix pulchra Cham., Tealeaf Willow, th...\n", + " [diamondleaf willow, SAPC, Salix pulchra Cham....\n", " Daniel Ackerman, R. Daniel Griffin, Sarah Hobb...\n", " [{'Author': 'Daniel Ackerman, Daniel Griffin, ...\n", " [[{'DataTableID': '33152', 'DataTableName': 'A...\n", @@ -4796,29 +5092,41 @@ "8 TREE RING -22 -65 1972 \n", "9 TREE RING -15 -65 1965 \n", "\n", - " MostRecentYearCE StudyNotes \\\n", - "0 1990 Matlab code for two-factor (location and year)... \n", - "1 2003 None \n", - "2 2002 None \n", - "3 2006 Keywords - Iceland, Lake sediment, Holocene pa... \n", - "4 2015 NOAA Template Raw Measurements file added 2019... \n", - "5 2016 NOAA Template Raw Measurements file added 2019... \n", - "6 2016 NOAA Template Raw Measurements file added 2019... \n", - "7 2016 NOAA Template Raw Measurements file added 2019... \n", - "8 2015 Each sample represents mean of 4 radii measure... \n", - "9 2015 Each sample represents mean of 4 radii measure... \n", + " MostRecentYearCE Coverage [S, N, W, E] \\\n", + "0 1990 (-90.0, 90.0, -180.0, 180.0) \n", + "1 2003 (65.9811, 66.0119, -46.5511, -42.7889) \n", + "2 2002 (66.0119, 66.0119, -45.158, -45.158) \n", + "3 2006 (64.613723, 65.055846, -21.625757, -19.843769) \n", + "4 2015 (68.375, 68.375, -149.295, -149.295) \n", + "5 2016 (69.99, 69.99, -153.04, -153.04) \n", + "6 2016 (69.99, 69.99, -153.04, -153.04) \n", + "7 2016 (68.641, 68.641, -149.614, -149.614) \n", + "8 2015 (68.662, 68.662, -149.43, -149.43) \n", + "9 2015 (68.662, 68.662, -149.428, -149.428) \n", + "\n", + " StudyNotes \\\n", + "0 Matlab code for two-factor (location and year)... \n", + "1 Melt layer stratigraphy for 4 Greenland Arctic... \n", + "2 None \n", + "3 Keywords - Iceland, Lake sediment, Holocene pa... \n", + "4 NOAA Template Raw Measurements file added 2019... \n", + "5 NOAA Template Raw Measurements file added 2019... \n", + "6 NOAA Template Raw Measurements file added 2019... \n", + "7 NOAA Template Raw Measurements file added 2019... \n", + "8 Each sample represents mean of 4 radii measure... \n", + "9 Each sample represents mean of 4 radii measure... \n", "\n", " ScienceKeywords \\\n", "0 None \n", "1 None \n", "2 None \n", - "3 [Arctic, abrupt climate change, Little Ice Age... \n", - "4 [thin red willow, diamondleaf willow, Salix pu... \n", + "3 [Medieval Warm Period, Arctic, abrupt climate ... \n", + "4 [Tealeaf Willow, thin red willow, diamondleaf ... \n", "5 [SAPC, Salix pulchra Cham., Tealeaf Willow, di... \n", "6 [Tealeaf Willow, thin red willow, Salix pulchr... \n", "7 [diamondleaf willow, thin red willow, Tealeaf ... \n", "8 [thin red willow, SAPC, Tealeaf Willow, diamon... \n", - "9 [SAPC, Salix pulchra Cham., Tealeaf Willow, th... \n", + "9 [diamondleaf willow, SAPC, Salix pulchra Cham.... \n", "\n", " Investigators \\\n", "0 Martin Tingley \n", @@ -4858,7 +5166,7 @@ "\n", " Funding \n", "0 [] \n", - "1 [] \n", + "1 [{'fundingAgency': 'US National Science Founda... \n", "2 [] \n", "3 [{'fundingAgency': 'US National Science Founda... \n", "4 [] \n", @@ -4914,7 +5222,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2025-12-11 10:53:15,987][INFO] - search_studies: Limit set to 5.\n" + "[2026-03-24 12:42:53,830][INFO] - search_studies: Limit set to 5.\n", + "[2026-03-24 12:42:53,833][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n" ] }, { @@ -4929,9 +5238,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "Parsing NOAA studies: 100%|██████████| 5/5 [00:00<00:00, 15252.01it/s]\n", - "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:501: UserWarning: Retrieved 5 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n", - " warnings.warn(\n" + "Parsing NOAA studies: 100%|██████████| 5/5 [00:00<00:00, 4960.15it/s]\n", + "[2026-03-24 12:42:54,640][WARNING] - Retrieved 5 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n", + "[2026-03-24 12:42:54,643][INFO] - Retrieved 5 studies.\n" ] }, { @@ -5005,7 +5314,8 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2025-12-11 10:53:16,630][INFO] - search_studies: Limit set to 5.\n" + "[2026-03-24 12:42:54,666][INFO] - search_studies: Limit set to 5.\n", + "[2026-03-24 12:42:54,667][INFO] - search_studies: Input Query includes geographical bounds. Inspect the results to ensure they match your intended region as one study can contain sites across various parts of the world.\n" ] }, { @@ -5020,9 +5330,9 @@ "name": "stderr", "output_type": "stream", "text": [ - "Parsing NOAA studies: 100%|██████████| 5/5 [00:00<00:00, 2753.61it/s]\n", - "/Users/dhirenoswal/Desktop/TU corpus/PyleoTUPS/pyleotups/core/Dataset.py:501: UserWarning: Retrieved 5 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n", - " warnings.warn(\n" + "Parsing NOAA studies: 100%|██████████| 5/5 [00:00<00:00, 1001.94it/s]\n", + "[2026-03-24 12:42:55,547][WARNING] - Retrieved 5 studies, which is the specified limit. Consider increasing the limit parameter to fetch more studies.\n", + "[2026-03-24 12:42:55,549][INFO] - Retrieved 5 studies.\n" ] }, { @@ -5622,7 +5932,15 @@ "name": "python3" }, "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", "version": "3.12.3" } }, diff --git a/examples/tutorial-pangaea.ipynb b/examples/tutorial-pangaea.ipynb index b6dcb5a5..2f4c1695 100644 --- a/examples/tutorial-pangaea.ipynb +++ b/examples/tutorial-pangaea.ipynb @@ -247,16 +247,41 @@ "execution_count": null, "id": "1a99f03e", "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2026-03-24 12:40:26,623][INFO] - Retrived 10 studies\n", + "[2026-03-24 12:40:26,629][WARNING] - The search contains dataset(s) [830589, 897517] marked as collection. Refer to the 'CollectionMembers' column toidentify respective child datasets.\n" + ] + } + ], "source": [ "ds = dataset.search_studies(\n", - " q=\"delta 180\",\n", + " q=\"Khider, D.\",\n", " # bbox=(-10, -10, 10, 10), #(min_lon, min_lat, max_lon, max_lat) \n", " # keywords=[\"Sr/Ca\"],\n", - " limit=5\n", + " limit=10\n", ")\n" ] }, + { + "cell_type": "markdown", + "id": "c4444573", + "metadata": {}, + "source": [ + "### 2. EXPLORING RESULTS " + ] + }, + { + "cell_type": "markdown", + "id": "b6fe20f6", + "metadata": {}, + "source": [ + "#### a. Getting a summary. " + ] + }, { "cell_type": "markdown", "id": "9c1fbcfb", @@ -264,7 +289,7 @@ "source": [ "You can access the search results in two ways:\n", "- `dataset.get_summary()` → returns a DataFrame of search results (default behavior)\n", - "- `display=True` → immediately returns summary table \n", + "- ACCESS the DataFrame in return value. i.e. `ds`\n", "\n", "All results are stored internally in `dataset.studies`" ] @@ -279,10 +304,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "[2026-03-04 12:52:48,092][WARNING] - Data set is of type collection, please select one of its child datasets - \n", - "[2026-03-04 12:52:48,880][WARNING] - Dataset is either restricted or of type \"collection\" - https://doi.org/10.1594/PANGAEA.753001\n", - "[2026-03-04 12:52:54,175][WARNING] - Data set is of type collection, please select one of its child datasets - \n", - "[2026-03-04 12:52:54,926][WARNING] - Dataset is either restricted or of type \"collection\" - https://doi.org/10.1594/PANGAEA.787094\n" + "[2026-03-24 12:40:26,647][WARNING] - The search contains dataset(s) [830589, 897517] marked as collection. Refer to the 'CollectionMembers' column toidentify respective child datasets.\n" ] }, { @@ -318,150 +340,246 @@ " Publications\n", " Sites\n", " Funding\n", - " native\n", + " CollectionMembers\n", " \n", " \n", " \n", " \n", " 0\n", - " 10.1594/PANGAEA.753001\n", - " Meteorological observations and eddy covarianc...\n", - " None\n", - " None\n", - " 2006-05-30T07:15:00\n", - " 2006-09-19T06:15:00\n", - " We present the first ecosystem-scale methane f...\n", - " [Arctic Tundra, atmospheric radiation, Eddy Co...\n", - " Sachs, Torsten, Wille, Christian, Boike, Julia...\n", - " Sachs, Torsten; Wille, Christian; Boike, Julia...\n", + " 830589\n", + " Stable isotope record of sediment core MD98-2177\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " We present a reconstruction of El Niño Souther...\n", " []\n", + " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu...\n", + " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu...\n", + " [MD98-2177]\n", " []\n", - " {'raw_uri': 'https://doi.org/10.1594/PANGAEA.7...\n", + " [830586, 830587, 830588]\n", " \n", " \n", " 1\n", - " 10.1594/PANGAEA.853952\n", - " Seaweed - epiphyte - mesograzer communities we...\n", + " 897517\n", + " Globigerinoides ruber sediment trap data in th...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", + " We present results here from a high-resolution...\n", + " []\n", + " Richey, Julie N, Thirumalai, Kaustubh, Khider,...\n", + " Richey, Julie N; Thirumalai, Kaustubh; Khider,...\n", + " []\n", + " []\n", + " [897509, 897512, 897513, 897514, 897515, 897516]\n", + " \n", + " \n", + " 2\n", + " 830588\n", + " (Table 3) Lead 214 and Lead 210 concentration ...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " None\n", + " []\n", + " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu...\n", + " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu...\n", + " [MD98-2177]\n", + " []\n", " None\n", - " 2013-06-19T00:00:00\n", - " 2014-04-12T00:00:00\n", - " Rising seawater temperature and CO2 concentrat...\n", + " \n", + " \n", + " 3\n", + " 897512\n", + " Globigerinoides ruber flux analysis from a lon...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " None\n", - " Werner, Franziska Julie, Graiff, Angelika, Mat...\n", - " Werner, Franziska Julie; Graiff, Angelika; Mat...\n", " []\n", - " [{'url': 'https://www.bioacid.de/', 'fundingGr...\n", - " {'raw_uri': 'https://doi.org/10.1594/PANGAEA.8...\n", + " Richey, Julie N, Thirumalai, Kaustubh, Khider,...\n", + " Richey, Julie N; Thirumalai, Kaustubh; Khider,...\n", + " [GMT_Gulf_of_Mexico]\n", + " []\n", + " None\n", " \n", " \n", - " 2\n", - " 10.1594/PANGAEA.901492\n", - " Beach profile data for the Elwha River Delta, ...\n", + " 4\n", + " 897516\n", + " CTD data from a long-running sediment trap tim...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " None\n", + " []\n", + " Richey, Julie N, Thirumalai, Kaustubh, Khider,...\n", + " Richey, Julie N; Thirumalai, Kaustubh; Khider,...\n", + " [GMT_Gulf_of_Mexico]\n", + " []\n", + " None\n", + " \n", + " \n", + " 5\n", + " 897514\n", + " Magnesium/Calcium ratio of Globigerinoides rub...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " None\n", - " 2011-05-17T02:49:53\n", - " 2011-05-17T08:09:46\n", - " Data were collected using GNSS survey methods,...\n", - " [Beach Nourishment, Elwha, Shoreline Change]\n", - " Miller, Ian\n", - " Miller, Ian (2019): Beach profile data for the...\n", " []\n", + " Richey, Julie N, Thirumalai, Kaustubh, Khider,...\n", + " Richey, Julie N; Thirumalai, Kaustubh; Khider,...\n", + " [GMT_Gulf_of_Mexico]\n", " []\n", - " {'raw_uri': 'https://doi.org/10.1594/PANGAEA.9...\n", + " None\n", " \n", " \n", - " 3\n", - " 10.1594/PANGAEA.901614\n", - " Beach profile data for the Elwha River Delta, ...\n", + " 6\n", + " 830587\n", + " (Table 2) Age determination of sediment core M...\n", + " 98.0\n", + " 1950.0\n", + " 0.0\n", + " 1852.0\n", " None\n", + " []\n", + " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu...\n", + " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu...\n", + " [MD98-2177]\n", + " []\n", + " None\n", + " \n", + " \n", + " 7\n", + " 830586\n", + " (Table S1) Stable carbon and oxygen isotope ra...\n", + " 1231.0\n", + " 103.0\n", + " 704.0\n", + " 1851.0\n", " None\n", - " 2016-10-14T08:05:19\n", - " 2016-10-14T09:32:26\n", - " Data were collected using GNSS survey methods,...\n", - " [Beach Nourishment, Elwha, Shoreline Change]\n", - " Miller, Ian\n", - " Miller, Ian (2019): Beach profile data for the...\n", " []\n", + " Khider, D, Stott, Lowell D, Emile-Geay, J, Thu...\n", + " Khider, D; Stott, Lowell D; Emile-Geay, J; Thu...\n", + " [MD98-2177]\n", " []\n", - " {'raw_uri': 'https://doi.org/10.1594/PANGAEA.9...\n", + " None\n", " \n", " \n", - " 4\n", - " 10.1594/PANGAEA.787094\n", - " Lithologic description and vertical permeabili...\n", + " 8\n", + " 897509\n", + " Carbonate measurements from a long-running sed...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " None\n", + " []\n", + " Richey, Julie N, Thirumalai, Kaustubh, Khider,...\n", + " Richey, Julie N; Thirumalai, Kaustubh; Khider,...\n", + " [GMT_Gulf_of_Mexico]\n", + " []\n", " None\n", - " 1998-06-24T18:30:00\n", - " 1998-07-04T00:15:00\n", - " Vertical permeability testing was conducted on...\n", + " \n", + " \n", + " 9\n", + " 897513\n", + " Isotpes analysis of Globigerinoides ruber from...\n", + " NaN\n", + " NaN\n", + " NaN\n", + " NaN\n", " None\n", - " Stover, S Cheree, Screaton, Elizabeth J, Likos...\n", - " Stover, S Cheree; Screaton, Elizabeth J; Likos...\n", " []\n", - " [{'url': 'https://www-odp.tamu.edu:443/', 'fun...\n", - " {'raw_uri': 'https://doi.org/10.1594/PANGAEA.7...\n", + " Richey, Julie N, Thirumalai, Kaustubh, Khider,...\n", + " Richey, Julie N; Thirumalai, Kaustubh; Khider,...\n", + " [GMT_Gulf_of_Mexico]\n", + " []\n", + " None\n", " \n", " \n", "\n", "" ], "text/plain": [ - " StudyID StudyName \\\n", - "0 10.1594/PANGAEA.753001 Meteorological observations and eddy covarianc... \n", - "1 10.1594/PANGAEA.853952 Seaweed - epiphyte - mesograzer communities we... \n", - "2 10.1594/PANGAEA.901492 Beach profile data for the Elwha River Delta, ... \n", - "3 10.1594/PANGAEA.901614 Beach profile data for the Elwha River Delta, ... \n", - "4 10.1594/PANGAEA.787094 Lithologic description and vertical permeabili... \n", - "\n", - " EarliestYearBP MostRecentYearBP EarliestYearCE MostRecentYearCE \\\n", - "0 None None 2006-05-30T07:15:00 2006-09-19T06:15:00 \n", - "1 None None 2013-06-19T00:00:00 2014-04-12T00:00:00 \n", - "2 None None 2011-05-17T02:49:53 2011-05-17T08:09:46 \n", - "3 None None 2016-10-14T08:05:19 2016-10-14T09:32:26 \n", - "4 None None 1998-06-24T18:30:00 1998-07-04T00:15:00 \n", + " StudyID StudyName EarliestYearBP \\\n", + "0 830589 Stable isotope record of sediment core MD98-2177 NaN \n", + "1 897517 Globigerinoides ruber sediment trap data in th... NaN \n", + "2 830588 (Table 3) Lead 214 and Lead 210 concentration ... NaN \n", + "3 897512 Globigerinoides ruber flux analysis from a lon... NaN \n", + "4 897516 CTD data from a long-running sediment trap tim... NaN \n", + "5 897514 Magnesium/Calcium ratio of Globigerinoides rub... NaN \n", + "6 830587 (Table 2) Age determination of sediment core M... 98.0 \n", + "7 830586 (Table S1) Stable carbon and oxygen isotope ra... 1231.0 \n", + "8 897509 Carbonate measurements from a long-running sed... NaN \n", + "9 897513 Isotpes analysis of Globigerinoides ruber from... NaN \n", "\n", - " StudyNotes \\\n", - "0 We present the first ecosystem-scale methane f... \n", - "1 Rising seawater temperature and CO2 concentrat... \n", - "2 Data were collected using GNSS survey methods,... \n", - "3 Data were collected using GNSS survey methods,... \n", - "4 Vertical permeability testing was conducted on... \n", + " MostRecentYearBP EarliestYearCE MostRecentYearCE \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "5 NaN NaN NaN \n", + "6 1950.0 0.0 1852.0 \n", + "7 103.0 704.0 1851.0 \n", + "8 NaN NaN NaN \n", + "9 NaN NaN NaN \n", "\n", - " ScienceKeywords \\\n", - "0 [Arctic Tundra, atmospheric radiation, Eddy Co... \n", - "1 None \n", - "2 [Beach Nourishment, Elwha, Shoreline Change] \n", - "3 [Beach Nourishment, Elwha, Shoreline Change] \n", - "4 None \n", + " StudyNotes ScienceKeywords \\\n", + "0 We present a reconstruction of El Niño Souther... [] \n", + "1 We present results here from a high-resolution... [] \n", + "2 None [] \n", + "3 None [] \n", + "4 None [] \n", + "5 None [] \n", + "6 None [] \n", + "7 None [] \n", + "8 None [] \n", + "9 None [] \n", "\n", " Investigators \\\n", - "0 Sachs, Torsten, Wille, Christian, Boike, Julia... \n", - "1 Werner, Franziska Julie, Graiff, Angelika, Mat... \n", - "2 Miller, Ian \n", - "3 Miller, Ian \n", - "4 Stover, S Cheree, Screaton, Elizabeth J, Likos... \n", + "0 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n", + "1 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", + "2 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n", + "3 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", + "4 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", + "5 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", + "6 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n", + "7 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n", + "8 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", + "9 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", "\n", - " Publications Sites \\\n", - "0 Sachs, Torsten; Wille, Christian; Boike, Julia... [] \n", - "1 Werner, Franziska Julie; Graiff, Angelika; Mat... [] \n", - "2 Miller, Ian (2019): Beach profile data for the... [] \n", - "3 Miller, Ian (2019): Beach profile data for the... [] \n", - "4 Stover, S Cheree; Screaton, Elizabeth J; Likos... [] \n", + " Publications Sites \\\n", + "0 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n", + "1 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [] \n", + "2 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n", + "3 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n", + "4 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n", + "5 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n", + "6 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n", + "7 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n", + "8 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n", + "9 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n", "\n", - " Funding \\\n", - "0 [] \n", - "1 [{'url': 'https://www.bioacid.de/', 'fundingGr... \n", - "2 [] \n", - "3 [] \n", - "4 [{'url': 'https://www-odp.tamu.edu:443/', 'fun... \n", - "\n", - " native \n", - "0 {'raw_uri': 'https://doi.org/10.1594/PANGAEA.7... \n", - "1 {'raw_uri': 'https://doi.org/10.1594/PANGAEA.8... \n", - "2 {'raw_uri': 'https://doi.org/10.1594/PANGAEA.9... \n", - "3 {'raw_uri': 'https://doi.org/10.1594/PANGAEA.9... \n", - "4 {'raw_uri': 'https://doi.org/10.1594/PANGAEA.7... " + " Funding CollectionMembers \n", + "0 [] [830586, 830587, 830588] \n", + "1 [] [897509, 897512, 897513, 897514, 897515, 897516] \n", + "2 [] None \n", + "3 [] None \n", + "4 [] None \n", + "5 [] None \n", + "6 [] None \n", + "7 [] None \n", + "8 [] None \n", + "9 [] None " ] }, "execution_count": null, @@ -478,7 +596,7 @@ "id": "599109b3", "metadata": {}, "source": [ - "### 3. Geographic Metadata\n", + "#### b. Exploring Geographic Metadata\n", "\n", "Returns site-level geographic information where available.\n", "\n", @@ -518,20 +636,634 @@ " LocationName\n", " Latitude\n", " Longitude\n", - " MinElevation\n", - " MaxElevation\n", - " DataType\n", + " Elevation\n", " \n", " \n", " \n", + " \n", + " 0\n", + " 830589\n", + " 2013989\n", + " MD98-2177\n", + " None\n", + " 1.4\n", + " 119.08\n", + " -968.0\n", + " \n", + " \n", + " 1\n", + " 830588\n", + " 2013989\n", + " MD98-2177\n", + " None\n", + " 1.4\n", + " 119.08\n", + " -968.0\n", + " \n", + " \n", + " 2\n", + " 897512\n", + " 2901221\n", + " GMT_Gulf_of_Mexico\n", + " Gulf of Mexico\n", + " 27.5\n", + " -90.30\n", + " -700.0\n", + " \n", + " \n", + " 3\n", + " 897516\n", + " 2901221\n", + " GMT_Gulf_of_Mexico\n", + " Gulf of Mexico\n", + " 27.5\n", + " -90.30\n", + " -700.0\n", + " \n", + " \n", + " 4\n", + " 897514\n", + " 2901221\n", + " GMT_Gulf_of_Mexico\n", + " Gulf of Mexico\n", + " 27.5\n", + " -90.30\n", + " -700.0\n", + " \n", + " \n", + " 5\n", + " 830587\n", + " 2013989\n", + " MD98-2177\n", + " None\n", + " 1.4\n", + " 119.08\n", + " -968.0\n", + " \n", + " \n", + " 6\n", + " 830586\n", + " 2013989\n", + " MD98-2177\n", + " None\n", + " 1.4\n", + " 119.08\n", + " -968.0\n", + " \n", + " \n", + " 7\n", + " 897509\n", + " 2901221\n", + " GMT_Gulf_of_Mexico\n", + " Gulf of Mexico\n", + " 27.5\n", + " -90.30\n", + " -700.0\n", + " \n", + " \n", + " 8\n", + " 897513\n", + " 2901221\n", + " GMT_Gulf_of_Mexico\n", + " Gulf of Mexico\n", + " 27.5\n", + " -90.30\n", + " -700.0\n", + " \n", " \n", "\n", "" ], "text/plain": [ - "Empty DataFrame\n", - "Columns: [StudyID, SiteID, SiteName, LocationName, Latitude, Longitude, MinElevation, MaxElevation, DataType]\n", - "Index: []" + " StudyID SiteID SiteName LocationName Latitude Longitude \\\n", + "0 830589 2013989 MD98-2177 None 1.4 119.08 \n", + "1 830588 2013989 MD98-2177 None 1.4 119.08 \n", + "2 897512 2901221 GMT_Gulf_of_Mexico Gulf of Mexico 27.5 -90.30 \n", + "3 897516 2901221 GMT_Gulf_of_Mexico Gulf of Mexico 27.5 -90.30 \n", + "4 897514 2901221 GMT_Gulf_of_Mexico Gulf of Mexico 27.5 -90.30 \n", + "5 830587 2013989 MD98-2177 None 1.4 119.08 \n", + "6 830586 2013989 MD98-2177 None 1.4 119.08 \n", + "7 897509 2901221 GMT_Gulf_of_Mexico Gulf of Mexico 27.5 -90.30 \n", + "8 897513 2901221 GMT_Gulf_of_Mexico Gulf of Mexico 27.5 -90.30 \n", + "\n", + " Elevation \n", + "0 -968.0 \n", + "1 -968.0 \n", + "2 -700.0 \n", + "3 -700.0 \n", + "4 -700.0 \n", + "5 -968.0 \n", + "6 -968.0 \n", + "7 -700.0 \n", + "8 -700.0 " + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "geo = dataset.get_geo()\n", + "display(geo)" + ] + }, + { + "cell_type": "markdown", + "id": "2e70d2c2", + "metadata": {}, + "source": [ + "#### c. Explore the listed Publication" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "e1a552c7", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(BibliographyData(\n", + " entries=OrderedCaseInsensitiveDict([\n", + " ('10_1594_PANGAEA_830589_1_1', Entry('misc',\n", + " fields=[\n", + " ('title', 'Stable isotope record of sediment core MD98-2177'),\n", + " ('year', '2011'),\n", + " ('doi', '10.1594/PANGAEA.830589'),\n", + " ('url', 'https://doi.org/10.1594/PANGAEA.830589')],\n", + " persons={'author': [Person('Khider'), Person('D'), Person('Stott'), Person('D, Lowell'), Person('Emile-Geay'), Person('J'), Person('Thunell'), Person('C, Robert'), Person('Hammond'), Person('E, Douglas')]})), \n", + " ('10_1594_PANGAEA_897517_1_2', Entry('misc',\n", + " fields=[\n", + " ('title', 'Globigerinoides ruber sediment trap data in the Gulf of Mexico'),\n", + " ('year', '2019'),\n", + " ('doi', '10.1594/PANGAEA.897517'),\n", + " ('url', 'https://doi.org/10.1594/PANGAEA.897517')],\n", + " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]})), \n", + " ('10_1594_PANGAEA_830588_1_3', Entry('misc',\n", + " fields=[\n", + " ('title', '(Table 3) Lead 214 and Lead 210 concentration of sediment core MD98-2177'),\n", + " ('year', '2011'),\n", + " ('doi', '10.1594/PANGAEA.830588'),\n", + " ('url', 'https://doi.org/10.1594/PANGAEA.830588')],\n", + " persons={'author': [Person('Khider'), Person('D'), Person('Stott'), Person('D, Lowell'), Person('Emile-Geay'), Person('J'), Person('Thunell'), Person('C, Robert'), Person('Hammond'), Person('E, Douglas')]})), \n", + " ('10_1594_PANGAEA_897512_1_4', Entry('misc',\n", + " fields=[\n", + " ('title', 'Globigerinoides ruber flux analysis from a long-running sediment trap time series from the northern Gulf of Mexico'),\n", + " ('year', '2019'),\n", + " ('doi', '10.1594/PANGAEA.897512'),\n", + " ('url', 'https://doi.org/10.1594/PANGAEA.897512')],\n", + " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]})), \n", + " ('10_1594_PANGAEA_897516_1_5', Entry('misc',\n", + " fields=[\n", + " ('title', 'CTD data from a long-running sediment trap time series from the northern Gulf of Mexico'),\n", + " ('year', '2019'),\n", + " ('doi', '10.1594/PANGAEA.897516'),\n", + " ('url', 'https://doi.org/10.1594/PANGAEA.897516')],\n", + " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]})), \n", + " ('10_1594_PANGAEA_897514_1_6', Entry('misc',\n", + " fields=[\n", + " ('title', 'Magnesium/Calcium ratio of Globigerinoides ruber from a long-running sediment trap time series from the northern Gulf of Mexico'),\n", + " ('year', '2019'),\n", + " ('doi', '10.1594/PANGAEA.897514'),\n", + " ('url', 'https://doi.org/10.1594/PANGAEA.897514')],\n", + " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]})), \n", + " ('10_1594_PANGAEA_830587_1_7', Entry('misc',\n", + " fields=[\n", + " ('title', '(Table 2) Age determination of sediment core MD98-2177'),\n", + " ('year', '2011'),\n", + " ('doi', '10.1594/PANGAEA.830587'),\n", + " ('url', 'https://doi.org/10.1594/PANGAEA.830587')],\n", + " persons={'author': [Person('Khider'), Person('D'), Person('Stott'), Person('D, Lowell'), Person('Emile-Geay'), Person('J'), Person('Thunell'), Person('C, Robert'), Person('Hammond'), Person('E, Douglas')]})), \n", + " ('10_1594_PANGAEA_830586_1_8', Entry('misc',\n", + " fields=[\n", + " ('title', '(Table S1) Stable carbon and oxygen isotope ratios of Pulleniatina obliquiloculata of sediment core MD98-2177'),\n", + " ('year', '2011'),\n", + " ('doi', '10.1594/PANGAEA.830586'),\n", + " ('url', 'https://doi.org/10.1594/PANGAEA.830586')],\n", + " persons={'author': [Person('Khider'), Person('D'), Person('Stott'), Person('D, Lowell'), Person('Emile-Geay'), Person('J'), Person('Thunell'), Person('C, Robert'), Person('Hammond'), Person('E, Douglas')]})), \n", + " ('10_1594_PANGAEA_897509_1_9', Entry('misc',\n", + " fields=[\n", + " ('title', 'Carbonate measurements from a long-running sediment trap time series from the northern Gulf of Mexico'),\n", + " ('year', '2019'),\n", + " ('doi', '10.1594/PANGAEA.897509'),\n", + " ('url', 'https://doi.org/10.1594/PANGAEA.897509')],\n", + " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]})), \n", + " ('10_1594_PANGAEA_897513_1_10', Entry('misc',\n", + " fields=[\n", + " ('title', 'Isotpes analysis of Globigerinoides ruber from a long-running sediment trap time series from the northern Gulf of Mexico'),\n", + " ('year', '2019'),\n", + " ('doi', '10.1594/PANGAEA.897513'),\n", + " ('url', 'https://doi.org/10.1594/PANGAEA.897513')],\n", + " persons={'author': [Person('Richey'), Person('N, Julie'), Person('Thirumalai'), Person('Kaustubh'), Person('Khider'), Person('D'), Person('Reynolds'), Person('E, Caitlin'), Person('Partin'), Person('W, Judson'), Person('Quinn'), Person('Michael, Terrence')]}))]),\n", + " \n", + " preamble=[]),\n", + " StudyID StudyName \\\n", + " 0 830589 Stable isotope record of sediment core MD98-2177 \n", + " 1 830589 Stable isotope record of sediment core MD98-2177 \n", + " 2 897517 Globigerinoides ruber sediment trap data in th... \n", + " 3 897517 Globigerinoides ruber sediment trap data in th... \n", + " 4 830588 (Table 3) Lead 214 and Lead 210 concentration ... \n", + " 5 897512 Globigerinoides ruber flux analysis from a lon... \n", + " 6 897516 CTD data from a long-running sediment trap tim... \n", + " 7 897514 Magnesium/Calcium ratio of Globigerinoides rub... \n", + " 8 830587 (Table 2) Age determination of sediment core M... \n", + " 9 830586 (Table S1) Stable carbon and oxygen isotope ra... \n", + " 10 897509 Carbonate measurements from a long-running sed... \n", + " 11 897513 Isotpes analysis of Globigerinoides ruber from... \n", + " \n", + " Author \\\n", + " 0 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n", + " 1 None \n", + " 2 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n", + " 3 None \n", + " 4 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n", + " 5 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n", + " 6 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n", + " 7 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n", + " 8 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n", + " 9 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n", + " 10 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n", + " 11 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n", + " \n", + " Title Journal Year Volume \\\n", + " 0 Stable isotope record of sediment core MD98-2177 PANGAEA 2011 None \n", + " 1 Khider, D et al. (2011): Assessing El Niño Sou... None 2011 None \n", + " 2 Globigerinoides ruber sediment trap data in th... PANGAEA 2019 None \n", + " 3 Richey, JN et al. (2019): Considerations for G... None 2019 None \n", + " 4 (Table 3) Lead 214 and Lead 210 concentration ... PANGAEA 2011 None \n", + " 5 Globigerinoides ruber flux analysis from a lon... PANGAEA 2019 None \n", + " 6 CTD data from a long-running sediment trap tim... PANGAEA 2019 None \n", + " 7 Magnesium/Calcium ratio of Globigerinoides rub... PANGAEA 2019 None \n", + " 8 (Table 2) Age determination of sediment core M... PANGAEA 2011 None \n", + " 9 (Table S1) Stable carbon and oxygen isotope ra... PANGAEA 2011 None \n", + " 10 Carbonate measurements from a long-running sed... PANGAEA 2019 None \n", + " 11 Isotpes analysis of Globigerinoides ruber from... PANGAEA 2019 None \n", + " \n", + " Number Pages Type DOI \\\n", + " 0 None None dataset 10.1594/PANGAEA.830589 \n", + " 1 None None article 10.1029/2011PA002139 \n", + " 2 None None dataset 10.1594/PANGAEA.897517 \n", + " 3 None None article 10.1029/2018PA003417 \n", + " 4 None None dataset 10.1594/PANGAEA.830588 \n", + " 5 None None dataset 10.1594/PANGAEA.897512 \n", + " 6 None None dataset 10.1594/PANGAEA.897516 \n", + " 7 None None dataset 10.1594/PANGAEA.897514 \n", + " 8 None None dataset 10.1594/PANGAEA.830587 \n", + " 9 None None dataset 10.1594/PANGAEA.830586 \n", + " 10 None None dataset 10.1594/PANGAEA.897509 \n", + " 11 None None dataset 10.1594/PANGAEA.897513 \n", + " \n", + " URL \\\n", + " 0 https://doi.org/10.1594/PANGAEA.830589 \n", + " 1 None \n", + " 2 https://doi.org/10.1594/PANGAEA.897517 \n", + " 3 None \n", + " 4 https://doi.org/10.1594/PANGAEA.830588 \n", + " 5 https://doi.org/10.1594/PANGAEA.897512 \n", + " 6 https://doi.org/10.1594/PANGAEA.897516 \n", + " 7 https://doi.org/10.1594/PANGAEA.897514 \n", + " 8 https://doi.org/10.1594/PANGAEA.830587 \n", + " 9 https://doi.org/10.1594/PANGAEA.830586 \n", + " 10 https://doi.org/10.1594/PANGAEA.897509 \n", + " 11 https://doi.org/10.1594/PANGAEA.897513 \n", + " \n", + " CitationKey \n", + " 0 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n", + " 1 Khider, D et al. (2011): Assessing El Niño Sou... \n", + " 2 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n", + " 3 Richey, JN et al. (2019): Considerations for G... \n", + " 4 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n", + " 5 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n", + " 6 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n", + " 7 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n", + " 8 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n", + " 9 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... \n", + " 10 Richey, Julie N; Thirumalai, Kaustubh; Khider,... \n", + " 11 Richey, Julie N; Thirumalai, Kaustubh; Khider,... )" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "pubs = dataset.get_publications()\n", + "display(pubs)" + ] + }, + { + "cell_type": "markdown", + "id": "78e7e978", + "metadata": {}, + "source": [ + "Save the publications directly to bibtex. Follwoing saves the publications to a file named `pangaea-datasets.bib` in your workng directory. Alter the path to save to your desired location " + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "b053f5b6", + "metadata": {}, + "outputs": [], + "source": [ + "save = dataset.get_publications(save = True, path=\"./pnagaea-datasets.bib\")" + ] + }, + { + "cell_type": "markdown", + "id": "2bf2ead1", + "metadata": {}, + "source": [ + "### 3. Extract Data from studies" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "665a4353", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[2026-03-24 12:53:26,636][WARNING] - The search contains dataset(s) [830589, 897517] marked as collection. Refer to the 'CollectionMembers' column toidentify respective child datasets.\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
StudyIDStudyNameEarliestYearBPMostRecentYearBPEarliestYearCEMostRecentYearCEStudyNotesScienceKeywordsInvestigatorsPublicationsSitesFundingCollectionMembers
0830589Stable isotope record of sediment core MD98-2177NaNNaNNaNNaNWe present a reconstruction of El Niño Souther...[]Khider, D, Stott, Lowell D, Emile-Geay, J, Thu...Khider, D; Stott, Lowell D; Emile-Geay, J; Thu...[MD98-2177][][830586, 830587, 830588]
1897517Globigerinoides ruber sediment trap data in th...NaNNaNNaNNaNWe present results here from a high-resolution...[]Richey, Julie N, Thirumalai, Kaustubh, Khider,...Richey, Julie N; Thirumalai, Kaustubh; Khider,...[][][897509, 897512, 897513, 897514, 897515, 897516]
2830588(Table 3) Lead 214 and Lead 210 concentration ...NaNNaNNaNNaNNone[]Khider, D, Stott, Lowell D, Emile-Geay, J, Thu...Khider, D; Stott, Lowell D; Emile-Geay, J; Thu...[MD98-2177][]None
3897512Globigerinoides ruber flux analysis from a lon...NaNNaNNaNNaNNone[]Richey, Julie N, Thirumalai, Kaustubh, Khider,...Richey, Julie N; Thirumalai, Kaustubh; Khider,...[GMT_Gulf_of_Mexico][]None
4897516CTD data from a long-running sediment trap tim...NaNNaNNaNNaNNone[]Richey, Julie N, Thirumalai, Kaustubh, Khider,...Richey, Julie N; Thirumalai, Kaustubh; Khider,...[GMT_Gulf_of_Mexico][]None
5897514Magnesium/Calcium ratio of Globigerinoides rub...NaNNaNNaNNaNNone[]Richey, Julie N, Thirumalai, Kaustubh, Khider,...Richey, Julie N; Thirumalai, Kaustubh; Khider,...[GMT_Gulf_of_Mexico][]None
6830587(Table 2) Age determination of sediment core M...98.01950.00.01852.0None[]Khider, D, Stott, Lowell D, Emile-Geay, J, Thu...Khider, D; Stott, Lowell D; Emile-Geay, J; Thu...[MD98-2177][]None
7830586(Table S1) Stable carbon and oxygen isotope ra...1231.0103.0704.01851.0None[]Khider, D, Stott, Lowell D, Emile-Geay, J, Thu...Khider, D; Stott, Lowell D; Emile-Geay, J; Thu...[MD98-2177][]None
8897509Carbonate measurements from a long-running sed...NaNNaNNaNNaNNone[]Richey, Julie N, Thirumalai, Kaustubh, Khider,...Richey, Julie N; Thirumalai, Kaustubh; Khider,...[GMT_Gulf_of_Mexico][]None
9897513Isotpes analysis of Globigerinoides ruber from...NaNNaNNaNNaNNone[]Richey, Julie N, Thirumalai, Kaustubh, Khider,...Richey, Julie N; Thirumalai, Kaustubh; Khider,...[GMT_Gulf_of_Mexico][]None
\n", + "
" + ], + "text/plain": [ + " StudyID StudyName EarliestYearBP \\\n", + "0 830589 Stable isotope record of sediment core MD98-2177 NaN \n", + "1 897517 Globigerinoides ruber sediment trap data in th... NaN \n", + "2 830588 (Table 3) Lead 214 and Lead 210 concentration ... NaN \n", + "3 897512 Globigerinoides ruber flux analysis from a lon... NaN \n", + "4 897516 CTD data from a long-running sediment trap tim... NaN \n", + "5 897514 Magnesium/Calcium ratio of Globigerinoides rub... NaN \n", + "6 830587 (Table 2) Age determination of sediment core M... 98.0 \n", + "7 830586 (Table S1) Stable carbon and oxygen isotope ra... 1231.0 \n", + "8 897509 Carbonate measurements from a long-running sed... NaN \n", + "9 897513 Isotpes analysis of Globigerinoides ruber from... NaN \n", + "\n", + " MostRecentYearBP EarliestYearCE MostRecentYearCE \\\n", + "0 NaN NaN NaN \n", + "1 NaN NaN NaN \n", + "2 NaN NaN NaN \n", + "3 NaN NaN NaN \n", + "4 NaN NaN NaN \n", + "5 NaN NaN NaN \n", + "6 1950.0 0.0 1852.0 \n", + "7 103.0 704.0 1851.0 \n", + "8 NaN NaN NaN \n", + "9 NaN NaN NaN \n", + "\n", + " StudyNotes ScienceKeywords \\\n", + "0 We present a reconstruction of El Niño Souther... [] \n", + "1 We present results here from a high-resolution... [] \n", + "2 None [] \n", + "3 None [] \n", + "4 None [] \n", + "5 None [] \n", + "6 None [] \n", + "7 None [] \n", + "8 None [] \n", + "9 None [] \n", + "\n", + " Investigators \\\n", + "0 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n", + "1 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", + "2 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n", + "3 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", + "4 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", + "5 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", + "6 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n", + "7 Khider, D, Stott, Lowell D, Emile-Geay, J, Thu... \n", + "8 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", + "9 Richey, Julie N, Thirumalai, Kaustubh, Khider,... \n", + "\n", + " Publications Sites \\\n", + "0 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n", + "1 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [] \n", + "2 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n", + "3 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n", + "4 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n", + "5 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n", + "6 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n", + "7 Khider, D; Stott, Lowell D; Emile-Geay, J; Thu... [MD98-2177] \n", + "8 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n", + "9 Richey, Julie N; Thirumalai, Kaustubh; Khider,... [GMT_Gulf_of_Mexico] \n", + "\n", + " Funding CollectionMembers \n", + "0 [] [830586, 830587, 830588] \n", + "1 [] [897509, 897512, 897513, 897514, 897515, 897516] \n", + "2 [] None \n", + "3 [] None \n", + "4 [] None \n", + "5 [] None \n", + "6 [] None \n", + "7 [] None \n", + "8 [] None \n", + "9 [] None " ] }, "execution_count": null, @@ -540,8 +1272,75 @@ } ], "source": [ - "geo = dataset.get_geo()\n", - "geo.head()" + "dataset.get_summary()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "4c00a283", + "metadata": {}, + "outputs": [], + "source": [ + "dfs = dataset.get_data([830586, 830587])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "baabaa90", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + " Depth sed Depth top Depth bot Age Age_2 Age_3 \\\n", + "0 0.005 0.0 0.01 1843 1851 0.103 \n", + "1 0.005 0.0 0.01 1843 1851 0.103 \n", + "2 0.005 0.0 0.01 1843 1851 0.103 \n", + "3 0.005 0.0 0.01 1843 1851 0.103 \n", + "4 0.005 0.0 0.01 1843 1851 0.103 \n", + "\n", + " P. obliquiloculata δ13C P. obliquiloculata δ18O Mass Event \\\n", + "0 0.936 -2.254 34 MD98-2177 \n", + "1 0.895 -2.356 37 MD98-2177 \n", + "2 0.514 -2.630 20 MD98-2177 \n", + "3 0.900 -2.480 25 MD98-2177 \n", + "4 0.957 -2.094 29 MD98-2177 \n", + "\n", + " Latitude Longitude Elevation Date/Time \n", + "0 1.4 119.08 -968.0 NaT \n", + "1 1.4 119.08 -968.0 NaT \n", + "2 1.4 119.08 -968.0 NaT \n", + "3 1.4 119.08 -968.0 NaT \n", + "4 1.4 119.08 -968.0 NaT \n", + " Depth sed Lab Lab label Age dated \\\n", + "0 0.00 Lawrence Livermore National Laboratory 95299 0.580 \n", + "1 0.12 University of California, Irvine OS-38302 0.395 \n", + "2 0.50 Lawrence Livermore National Laboratory 100234 1.110 \n", + "3 0.94 Lawrence Livermore National Laboratory 100235 1.745 \n", + "4 1.09 University of California, Irvine OS-38335 1.870 \n", + "\n", + " Age dated std dev Age Age e Comm Event Latitude Longitude \\\n", + "0 0.045 1852 51 NaN MD98-2177 1.4 119.08 \n", + "1 0.090 0 0 modern MD98-2177 1.4 119.08 \n", + "2 0.060 1350 70 NaN MD98-2177 1.4 119.08 \n", + "3 0.045 730 82 NaN MD98-2177 1.4 119.08 \n", + "4 0.110 584 136 NaN MD98-2177 1.4 119.08 \n", + "\n", + " Elevation Date/Time \n", + "0 -968.0 NaT \n", + "1 -968.0 NaT \n", + "2 -968.0 NaT \n", + "3 -968.0 NaT \n", + "4 -968.0 NaT \n" + ] + } + ], + "source": [ + "for df in dfs:\n", + " print(df.head())" ] } ], From 48235bb882787ad8435ee309b2b59c99c041f84a Mon Sep 17 00:00:00 2001 From: Dhiren Date: Thu, 26 Mar 2026 14:42:46 -0700 Subject: [PATCH 7/9] Fixes #33 --- pyleotups/utils/Parser/ExcelParser.py | 12 +- .../utils/Parser/NonStandardParserUtils.py | 22 +- pyleotups/utils/Parser/StandardParser.py | 474 +----------------- 3 files changed, 32 insertions(+), 476 deletions(-) diff --git a/pyleotups/utils/Parser/ExcelParser.py b/pyleotups/utils/Parser/ExcelParser.py index aad6d5f4..23c09690 100644 --- a/pyleotups/utils/Parser/ExcelParser.py +++ b/pyleotups/utils/Parser/ExcelParser.py @@ -6,6 +6,7 @@ from dataclasses import dataclass, field from typing import Any, List, Optional, Tuple, Iterable, Dict from enum import Enum +from NonStandardParserUtils import auto_cast_df NUMERIC_THRESHOLD_HEADER = 0.25 @@ -416,7 +417,8 @@ def _process_block(self, block: Block, idx: int): block.df = None elif block.block_type == BlockType.COMPLETE_TABULAR and merged_headers: - block.df = self._generate_df(block, grid, merged_headers, hdr_info) + df = self._generate_df(block, grid, merged_headers, hdr_info) + block.df = auto_cast_df(df) else: block.df = None @@ -766,13 +768,7 @@ def _ensure_unique(names: List[str]) -> List[str]: return out if __name__ == "__main__": - # Example usage - # parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/NonStandardParser/Correspondence/notebook/frank1999.xls") - # parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/ExcelParser/Data/orig-ocean99-xls/Clemens/Clemens1996/clemens1996.xls") - # parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/ExcelParser/Data/orig-ocean99-xls/Ishiwatari/ishiwatari1999.xls") - parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/ExcelParser/Data/orig-ocean99-xls/Overpeck1996/overpeck1996.xls") - # parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/ExcelParser/Data/orig-ocean99-xls/Bond/bond1992.xls") - # parser = ExcelParser("/Users/dhirenoswal/Desktop/TU corpus/ExcelParser/Data/orig-ocean99-xls/Charles/charles1996.xls") + parser = ExcelParser("https://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/frank1999/frank1999.xls") blocks = parser.parse() diff --git a/pyleotups/utils/Parser/NonStandardParserUtils.py b/pyleotups/utils/Parser/NonStandardParserUtils.py index 31813c9f..660825a0 100644 --- a/pyleotups/utils/Parser/NonStandardParserUtils.py +++ b/pyleotups/utils/Parser/NonStandardParserUtils.py @@ -415,7 +415,9 @@ def generate_df(lines_info, delimiter, headers, header_extent=0): raise ValueError(f"Column count ({len(rows[0])}) " f"does not match header count ({len(col_names)})") - return pd.DataFrame(rows, columns=col_names) + df = pd.DataFrame(rows, columns=col_names) + df = auto_cast_df(df) + return df def assign_tokens_by_overlap(lines_info, delimiter, headers, header_extent=0): @@ -629,4 +631,20 @@ def refine_headers_by_correspondence(header_lines, data_lines, delimiter, broadc last_token_identity = current_token_identity - return refined_headers \ No newline at end of file + return refined_headers + +def auto_cast_df(df: pd.DataFrame) -> pd.DataFrame: + """ + Attempt to convert object columns to numeric where possible. + Leaves non-convertible columns unchanged. + """ + for col in df.columns: + if df[col].dtype == "object": + # Try numeric conversion + converted = pd.to_numeric(df[col], errors="ignore") + + # Only replace if conversion actually changed type + if converted.dtype != "object": + df[col] = converted + + return df \ No newline at end of file diff --git a/pyleotups/utils/Parser/StandardParser.py b/pyleotups/utils/Parser/StandardParser.py index dbf2ce52..5a018038 100644 --- a/pyleotups/utils/Parser/StandardParser.py +++ b/pyleotups/utils/Parser/StandardParser.py @@ -4,6 +4,8 @@ import pandas as pd import re +from NonStandardParserUtils import auto_cast_df + @DeprecationWarning class DataFetcher: """ @@ -398,6 +400,7 @@ def _construct_dataframe(self): df = pd.DataFrame(padded, columns=self.variables) df.attrs['variables'] = self.variables + df = auto_cast_df(df) return df def _skip_empty_lines(self, index): @@ -472,469 +475,8 @@ def _extract_first_non_digit_token(self, line): return token return None -# def fetch_file(url): -# """ -# Download a file from the given URL and split its content into lines. - -# Parameters -# ---------- -# url : str -# The URL of the file to fetch. - -# Returns -# ------- -# list of str -# The file content split into individual lines. - -# Raises -# ------ -# requests.HTTPError -# If the HTTP request returned an unsuccessful status code. -# """ -# response = requests.get(url) -# response.raise_for_status() -# return response.text.splitlines() - - -# def identify_metadata(lines): -# """ -# Identify the metadata block in the file by finding lines that start with '#'. - -# Parameters -# ---------- -# lines : list of str -# All lines from the file. - -# Returns -# ------- -# tuple of (int, int) or (None, None) -# A tuple containing the first and last indices of metadata lines. -# Returns (None, None) if no metadata lines are found. -# """ -# metadata_indices = [i for i, line in enumerate(lines) if line.lstrip().startswith('#')] -# if metadata_indices: -# return metadata_indices[0], metadata_indices[-1] -# return None, None - - -# def extract_first_non_digit_token(line): -# """ -# Remove any leading comment markers from a line and return the first token that is not purely numeric. - -# Parameters -# ---------- -# line : str -# A line of text (typically from metadata). - -# Returns -# ------- -# str or None -# The first non-digit token, or None if no valid token is found. -# """ -# pattern = r'^\s*(.*?)(?:\t|\s{2,})(?:[^,\n]*,){0,9}[^,\n]*$' -# match = re.match(pattern, line) -# if match: -# return match.group(1).strip() -# tokens = re.split(r'[\s,]+', line.strip()) -# for token in tokens: -# if token and not token.isdigit(): -# return token -# return None - - -# def parse_metadata_variables(lines, meta_start, meta_end): -# """ -# Extract variable names from a metadata block when an explicit "Variables" block exists. - -# This function attempts to extract variables by looking for a metadata line that starts with -# "# variables" (case-insensitive). If found, it first searches for lines starting with '##' -# following the marker. If no such lines exist, it falls back to splitting other non-comment lines. - -# Parameters -# ---------- -# lines : list of str -# All lines from the file. -# meta_start : int -# Index of the first metadata line. -# meta_end : int -# Index of the last metadata line. - -# Returns -# ------- -# tuple of (list of str, int) -# A tuple where the first element is a list of extracted variable names and the second element is -# the header skip count (usually 1 if variables are successfully extracted). -# """ -# variables = [] -# header_skip_count = 0 -# variable_block_index = None - -# for i in range(meta_start, meta_end + 1): -# if re.match(r'^#\s*variables', lines[i], re.IGNORECASE): -# variable_block_index = i -# break - -# if variable_block_index is not None: -# # CASE 1A: Look for lines starting with '##' -# for i in range(variable_block_index + 1, meta_end + 1): -# if lines[i].lstrip().startswith('##'): -# token = extract_first_non_digit_token(lines[i].lstrip('#')) -# if token: -# variables.append(token) -# # CASE 1B: Fallback if no '##' lines found. -# if not variables: -# for i in range(variable_block_index + 1, meta_end + 1): -# if lines[i].strip() and not lines[i].startswith("#"): -# if len(re.split(r',', lines[i].strip())) >= 9: -# token = extract_first_non_digit_token(lines[i]) -# if token: -# variables.append(token) -# if variables: -# header_skip_count = 1 -# return variables, header_skip_count - - -# def parse_data_header_variables(lines, meta_end): -# """ -# Extract variable names from the data header when no explicit metadata "Variables" block exists. - -# It searches from the line immediately after the metadata block until a non-comment line is found -# that, when split by either tab or comma, yields at least 9 tokens. - -# Parameters -# ---------- -# lines : list of str -# All lines from the file. -# meta_end : int -# The index of the last metadata line. - -# Returns -# ------- -# tuple of (list of str, int) -# A tuple containing the extracted variable names and a header skip count (typically 1). -# """ -# variables = [] -# header_skip_count = 1 -# for i in range(meta_end + 1, len(lines)): -# line = lines[i].strip() -# if line and not line.lstrip().startswith('#'): -# tokens_tab = re.split(r'\t', line) -# tokens_comma = re.split(r',', line) -# if len(tokens_tab) >= 9 or len(tokens_comma) >= 9: -# variables = tokens_tab if len(tokens_tab) >= len(tokens_comma) else tokens_comma -# break -# return variables, header_skip_count - - -# def fallback_variable_extraction(lines, meta_end): -# """ -# Fallback extraction: use the first non-empty line in the data block, split by tabs. - -# Parameters -# ---------- -# lines : list of str -# All lines from the file. -# meta_end : int -# The index of the last metadata line. - -# Returns -# ------- -# tuple of (list of str, int) -# A tuple containing variable names (or autogenerated names for empty tokens) and a header skip count. -# """ -# variables = [] -# header_skip_count = 1 -# for i in range(meta_end + 1, len(lines)): -# if lines[i].strip(): -# tokens = re.split(r'\t', lines[i].strip()) -# if len(tokens) > 1: -# variables = [f"Unnamed_{idx}" if not token else token for idx, token in enumerate(tokens)] -# break -# return variables, header_skip_count - - -# def variable_parser(lines, meta_start, meta_end): -# """ -# Extract variable names (column headers) from a NOAA text file using multiple methods. - -# The function first attempts to extract variables from a metadata block containing an explicit -# "Variables" marker. If that fails, it attempts extraction from the first data header line. If that -# fails too, it uses a fallback method on the first non-empty data line. - -# Parameters -# ---------- -# lines : list of str -# All lines from the file. -# meta_start : int -# The index of the first metadata line. -# meta_end : int -# The index of the last metadata line. - -# Returns -# ------- -# tuple of (list of str, str, int) -# A tuple (variables, source, header_skip_count) where: -# - variables is the list of extracted variable names, -# - source is "metadata" if variables were extracted from the metadata block, -# or "data" if extracted from the data header, -# - header_skip_count indicates how many header lines should be skipped. -# """ -# variables, header_skip_count = parse_metadata_variables(lines, meta_start, meta_end) -# if variables: -# return variables, "metadata", header_skip_count - -# variables, header_skip_count = parse_data_header_variables(lines, meta_end) -# if variables: -# return variables, "data", header_skip_count - -# variables, header_skip_count = fallback_variable_extraction(lines, meta_end) -# if variables: -# return variables, "data", header_skip_count - -# return [], None, 0 - - -# def skip_empty_lines(lines, index): -# """ -# Advance the index until a non-empty line is encountered. - -# Parameters -# ---------- -# lines : list of str -# The file lines. -# index : int -# The starting index. - -# Returns -# ------- -# int -# The index of the first non-empty line. -# """ -# while index < len(lines) and not lines[index].strip(): -# index += 1 -# return index - - -# def detect_delimiter(data_lines): -# r""" -# Detect the delimiter used in a set of data lines. - -# It first tries tab-delimitation; if token counts are inconsistent, it falls back to splitting -# on two or more spaces. - -# Parameters -# ---------- -# data_lines : list of str -# A list of non-empty data lines. - -# Returns -# ------- -# str -# The detected delimiter, either the tab character ('\t') or a regex pattern (r'\s{2,}'). -# """ -# non_empty = [line.strip() for line in data_lines if line.strip()] -# if not non_empty: -# return '\t' -# tab_counts = [len(line.split('\t')) for line in non_empty] -# if len(set(tab_counts)) == 1 and tab_counts[0] > 1: -# return '\t' -# space_counts = [len(re.split(r'\s{2,}', line)) for line in non_empty] -# if len(set(space_counts)) == 1 and space_counts[0] > 1: -# return r'\s{2,}' -# return '\t' - - -# def data_parser(lines, meta_end, skip_lines=0): -# """ -# Parse the data block of the file, skipping empty lines and header lines. - -# This function detects the delimiter used in the data block and ensures that all rows are padded -# to have a uniform number of columns. - -# Parameters -# ---------- -# lines : list of str -# All lines from the file. -# meta_end : int -# The index of the last metadata line. -# skip_lines : int, optional -# Number of header lines to skip in the data block, by default 0. - -# Returns -# ------- -# tuple of (list, int) or (None, None) -# A tuple (data, row_len) where data is a list of rows (each row is a list of tokens) and row_len -# is the uniform number of columns. Returns (None, None) if parsing fails. -# """ -# data = [] -# index = meta_end + 1 -# index = skip_empty_lines(lines, index) -# index += skip_lines -# remaining_lines = lines[index:] -# delimiter = detect_delimiter(remaining_lines) -# for line in remaining_lines: -# if not line.strip(): -# continue -# if delimiter == '\t': -# row = line.split('\t') -# else: -# row = re.split(delimiter, line.strip()) -# data.append(row) -# if not data or (data and len(data[0]) < 2): -# return None, None -# max_len = max(len(row) for row in data) -# for i in range(len(data)): -# if len(data[i]) < max_len: -# data[i] = data[i] + [''] * (max_len - len(data[i])) -# return data, max_len - - -# def dataframe_constructor(data, variables): -# """ -# Construct a pandas DataFrame from parsed data rows and variable names. - -# Handles three cases: -# - Exact match: The number of variables equals the number of columns. -# - Extra columns: More columns than variables (trims extra columns). -# - Missing columns: Fewer columns than variables (pads rows with empty strings). - -# Parameters -# ---------- -# data : list of list of str -# Parsed data rows. -# variables : list of str -# Column headers. - -# Returns -# ------- -# pandas.DataFrame or None -# The constructed DataFrame with an attribute 'variables' set, or None if data or variables are missing. -# """ -# if not data or not variables: -# return None - -# row_len = len(data[0]) -# var_len = len(variables) - -# if var_len == row_len: -# df = pd.DataFrame(data, columns=variables) -# elif var_len < row_len: -# data_trimmed = [row[:var_len] for row in data] -# df = pd.DataFrame(data_trimmed, columns=variables) -# elif var_len > row_len: -# data_padded = [row + [''] * (var_len - len(row)) for row in data] -# df = pd.DataFrame(data_padded, columns=variables) - -# df.attrs['variables'] = variables -# return df - -# # --------------------------------------------------------------------------- -# # StandardParser Class -# # --------------------------------------------------------------------------- -# class StandardParser: -# """ -# StandardParser encapsulates the complete workflow for downloading and parsing a NOAA text file. - -# The class maintains attributes such as the URL, file lines, metadata boundaries, extracted variable names, -# header skip count, parsed data, and the final DataFrame. - -# Attributes -# ---------- -# url : str -# The URL of the file to parse. -# lines : list of str -# The content of the file split into lines. -# meta_start : int -# The index of the first metadata line. -# meta_end : int -# The index of the last metadata line. -# variables : list of str -# The extracted variable names. -# skip_lines : int -# The number of header lines to skip in the data block. -# data : list of list of str -# The parsed data rows. -# df : pandas.DataFrame -# The constructed DataFrame. - -# Methods -# ------- -# parse(url=None) -# Execute the full parsing workflow and return the constructed DataFrame. -# _fetch_file() -# Fetch the file and set the 'lines' attribute. -# _identify_metadata() -# Identify metadata boundaries and set 'meta_start' and 'meta_end'. -# _extract_variables() -# Extract variable names and header skip count, setting 'variables' and 'skip_lines'. -# _parse_data() -# Parse the data block from the file and set the 'data' attribute. -# _construct_dataframe() -# Construct the final DataFrame from parsed data and variables. -# """ -# def __init__(self, url=None): -# self.url = url -# self.lines = None -# self.meta_start = None -# self.meta_end = None -# self.variables = None -# self.skip_lines = 0 -# self.data = None -# self.df = None - -# def parse(self, url=None): -# """ -# Orchestrate the full parsing process. - -# Parameters -# ---------- -# url : str, optional -# The URL to parse. If provided, it overrides the existing URL attribute. - -# Returns -# ------- -# pandas.DataFrame -# The constructed DataFrame. - -# Raises -# ------ -# ParsingError -# If any step of the parsing process fails. -# """ -# if url is not None: -# self.url = url -# if not self.url: -# raise ParsingError("No URL provided.") -# try: -# self._fetch_file() -# except Exception as e: -# raise ParsingError(f"Error fetching file: {e}") -# self.meta_start, self.meta_end = self._identify_metadata() -# if self.meta_start is None: -# raise ParsingError("Invalid file format." -# "Wrapper can only parse stndard NOAA template formatted files") -# self.variables, _, self.skip_lines = self._extract_variables() -# if not self.variables: -# raise ParsingError("Failed to extract variable names from file.") -# self.data, _ = self._parse_data() -# if self.data is None: -# raise ParsingError("No valid data block found.") -# self.df = self._construct_dataframe() -# if self.df is None: -# raise ParsingError("DataFrame construction failed.") -# return self.df - -# def _fetch_file(self): -# self.lines = fetch_file(self.url) - -# def _identify_metadata(self): -# return identify_metadata(self.lines) - -# def _extract_variables(self): -# return variable_parser(self.lines, self.meta_start, self.meta_end) - -# def _parse_data(self): -# return data_parser(self.lines, self.meta_end, self.skip_lines) - -# def _construct_dataframe(self): -# return dataframe_constructor(self.data, self.variables) \ No newline at end of file +if __name__ == "__main__": + parser = StandardParser("https://www.ncei.noaa.gov/pub/data/paleo/contributions_by_author/khider2014/khider2014-benth.txt") + dfs = parser.parse() + print(dfs) + print(dfs["depth_cm"].dtype) \ No newline at end of file From f8e2d7751c18f24849a420fe69f87329bddd2331 Mon Sep 17 00:00:00 2001 From: Dhiren Date: Thu, 26 Mar 2026 15:05:07 -0700 Subject: [PATCH 8/9] COrrceting import statements --- pyleotups/utils/Parser/ExcelParser.py | 2 +- pyleotups/utils/Parser/StandardParser.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyleotups/utils/Parser/ExcelParser.py b/pyleotups/utils/Parser/ExcelParser.py index 23c09690..561c8479 100644 --- a/pyleotups/utils/Parser/ExcelParser.py +++ b/pyleotups/utils/Parser/ExcelParser.py @@ -6,7 +6,7 @@ from dataclasses import dataclass, field from typing import Any, List, Optional, Tuple, Iterable, Dict from enum import Enum -from NonStandardParserUtils import auto_cast_df +from .NonStandardParserUtils import auto_cast_df NUMERIC_THRESHOLD_HEADER = 0.25 diff --git a/pyleotups/utils/Parser/StandardParser.py b/pyleotups/utils/Parser/StandardParser.py index 5a018038..0bc9ed88 100644 --- a/pyleotups/utils/Parser/StandardParser.py +++ b/pyleotups/utils/Parser/StandardParser.py @@ -4,7 +4,7 @@ import pandas as pd import re -from NonStandardParserUtils import auto_cast_df +from .NonStandardParserUtils import auto_cast_df @DeprecationWarning class DataFetcher: From 3dcac9eacb100b693c0be7237bd993494163f78b Mon Sep 17 00:00:00 2001 From: Dhiren Date: Thu, 26 Mar 2026 15:29:23 -0700 Subject: [PATCH 9/9] Changing errors ignore to coerce --- pyleotups/utils/Parser/NonStandardParserUtils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyleotups/utils/Parser/NonStandardParserUtils.py b/pyleotups/utils/Parser/NonStandardParserUtils.py index 660825a0..3b594f04 100644 --- a/pyleotups/utils/Parser/NonStandardParserUtils.py +++ b/pyleotups/utils/Parser/NonStandardParserUtils.py @@ -641,7 +641,7 @@ def auto_cast_df(df: pd.DataFrame) -> pd.DataFrame: for col in df.columns: if df[col].dtype == "object": # Try numeric conversion - converted = pd.to_numeric(df[col], errors="ignore") + converted = pd.to_numeric(df[col], errors="coerce") # Only replace if conversion actually changed type if converted.dtype != "object":