Closed jordanpadams closed 8 months ago
To update harvest config and test on pds-gamma:
Create a new policy/harvest-policy-releases+external.xml
either with just this config or just add this to policy/harvest-policy-releases.xml
<productMetadata harvest:objectType="Product_External">
<!-- Identification_Area -->
<xPath harvest:slotName="information_model_version">
//Identification_Area/information_model_version
</xPath>
<xPath harvest:slotName="product_class">
//Identification_Area/product_class
</xPath>
<xPath harvest:slotName="alternate_id">
//Identification_Area/Alias_List/Alias/alternate_id
</xPath>
<xPath harvest:slotName="alternate_title">
//Identification_Area/Alias_List/Alias/alternate_title
</xPath>
<xPath harvest:slotName="citation_author_list">
//Identification_Area/Citation_Information/author_list
</xPath>
<xPath harvest:slotName="citation_editor_list">
//Identification_Area/Citation_Information/editor_list
</xPath>
<xPath harvest:slotName="citation_publication_year">
//Identification_Area/Citation_Information/publication_year
</xPath>
<xPath harvest:slotName="citation_keyword">
//Identification_Area/Citation_Information/keyword
</xPath>
<xPath harvest:slotName="citation_description">
//Identification_Area/Citation_Information/description
</xPath>
<xPath harvest:slotName="citation_doi">
//Identification_Area/Citation_Information/doi
</xPath>
<xPath harvest:slotName="instrument_host_name">
//Observing_System/Observing_System_Component[type='Spacecraft']/name
</xPath>
<xPath harvest:slotName="instrument_name">
//Observing_System/Observing_System_Component[type='Instrument']/name
</xPath>
<xPath harvest:slotName="modification_date">
//Identification_Area/Modification_History/Modification_Detail/modification_date
</xPath>
<xPath harvest:slotName="modification_version_id">
//Identification_Area/Modification_History/Modification_Detail/version_id
</xPath>
<xPath harvest:slotName="modification_description">
//Identification_Area/Modification_History/Modification_Detail/description
</xPath>
<!-- Observation_Area -->
<xPath harvest:slotName="observation_comment">
//Observation_Area/comment
</xPath>
<xPath harvest:slotName="observation_start_date_time">
//Observation_Area/Time_Coordinates/start_date_time
</xPath>
<xPath harvest:slotName="observation_stop_date_time">
//Observation_Area/Time_Coordinates/stop_date_time
</xPath>
<xPath harvest:slotName="observation_local_mean_solar_time">
//Observation_Area/Time_Coordinates/local_mean_solar_time
</xPath>
<xPath harvest:slotName="observation_local_true_solar_time">
//Observation_Area/Time_Coordinates/local_true_solar_time
</xPath>
<xPath harvest:slotName="observation_solar_longitute">
//Observation_Area/Time_Coordinates/solar_longitude
</xPath>
<xPath harvest:slotName="primary_result_purpose">
//Observation_Area/Primary_Result_Summary/purpose
</xPath>
<xPath harvest:slotName="primary_result_processing_level">
//Observation_Area/Primary_Result_Summary/processing_level
</xPath>
<xPath harvest:slotName="primary_result_description">
//Observation_Area/Primary_Result_Summary/description
</xPath>
<xPath harvest:slotName="primary_result_wavelength_range">
//Observation_Area/Primary_Result_Summary/Science_Facets/wavelength_range
</xPath>
<xPath harvest:slotName="primary_result_domain">
//Observation_Area/Primary_Result_Summary/Science_Facets/domain
</xPath>
<xPath harvest:slotName="primary_result_discipline_name">
//Observation_Area/Primary_Result_Summary/Science_Facets/discipline_name
</xPath>
<xPath harvest:slotName="primary_result_facet1">
//Observation_Area/Primary_Result_Summary/Science_Facets/facet1
</xPath>
<xPath harvest:slotName="primary_result_subfacet1">
//Observation_Area/Primary_Result_Summary/Science_Facets/subfacet1
</xPath>
<xPath harvest:slotName="primary_result_facet2">
//Observation_Area/Primary_Result_Summary/Science_Facets/facet2
</xPath>
<xPath harvest:slotName="primary_result_subfacet2">
//Observation_Area/Primary_Result_Summary/Science_Facets/subfacet2
</xPath>
<xPath harvest:slotName="investigation_name">
//Observation_Area/Investigation_Area/name
</xPath>
<xPath harvest:slotName="investigation_type">
//Observation_Area/Investigation_Area/type
</xPath>
<xPath harvest:slotName="observing_system_name">
//Observation_Area/Observing_System/name
</xPath>
<xPath harvest:slotName="observing_system_description">
//Observation_Area/Observing_System/description
</xPath>
<xPath harvest:slotName="observing_system_component_name">
//Observation_Area/Observing_System/Observing_System_Component/name
</xPath>
<xPath harvest:slotName="observing_system_component_type">
//Observation_Area/Observing_System/Observing_System_Component/type
</xPath>
<xPath harvest:slotName="observing_system_component_description">
//Observation_Area/Observing_System/Observing_System_Component/description
</xPath>
<xPath harvest:slotName="target_name">
//Observation_Area/Target_Identification/name
</xPath>
<xPath harvest:slotName="target_alternate_designation">
//Observation_Area/Target_Identification/alternate_designation
</xPath>
<xPath harvest:slotName="target_type">
//Observation_Area/Target_Identification/type
</xPath>
<xPath harvest:slotName="target_description">
//Observation_Area/Target_Identification/description
</xPath>
<!-- Reference_List -->
<xPath harvest:slotName="external_reference_doi">
//Reference_List/External_Reference/doi
</xPath>
<xPath harvest:slotName="external_reference_text">
//Reference_List/External_Reference/reference_text
</xPath>
<xPath harvest:slotName="external_reference_description">
//Reference_List/External_Reference/description
</xPath>
<xPath harvest:slotName="source_product_curating_facility">
//Reference_List/Source_Product_External/curating_facility
</xPath>
<xPath harvest:slotName="source_product_description">
//Reference_List/Source_Product_External/description
</xPath>
<xPath harvest:slotName="source_product_doi">
//Reference_List/Source_Product_External/doi
</xPath>
<xPath harvest:slotName="source_product_identifier">
//Reference_List/Source_Product_External/external_source_product_identifier
</xPath>
<xPath harvest:slotName="source_product_reference_type">
//Reference_List/Source_Product_External/reference_type
</xPath>
<!-- File_Area_External -->
<xPath harvest:slotName="file_name">
//File_Area_External/File/file_name
</xPath>
</productMetadata>
Add the following config file to /usr/local/search-core/conf/defaults/pds/pds4/
: https://raw.githubusercontent.com/NASA-PDS/registry-harvest-legacy/i8/src/main/resources/conf/search/defaults/pds/pds4/external.xml
Add the following file (rename it to .xml
) to the appropriate release directory that will be picked up and loaded by harvest. Product_External_Example_OccPred_DickFrench.xml.txt
Harvest the data like a normal data release.
Verify the data appears in keyword search.
@c-suh not sure if you are working tomorrow, but we would like to deploy a new ds-view when you are available. ping me offline for more information on how to replace old version of ds-view. you should be able to drop that tar.gz somewhere, and update the catalina config to point to it.
@rchenatjpl you should be able to test loading this data into the Registry per the config updates above without the new ds-view. let me know if you have any questions. To check out what it looks like with new ds-view deployed, you should be able to test out the new landing page here: https://pds.nasa.gov/ds-view/pds/viewProductExternal.jsp?identifier=urn:nasa:rms-annex:french23_occult_pred . Without the new ds-view, you should still be able to test out most of it using one of the other landing pages: https://pds.nasa.gov/ds-view/pds/viewCollection.jsp?identifier=urn:nasa:rms-annex:french23_occult_pred
@jordanpadams I created a separate policy/harvest-policy-releases+external.xml on pdscloud-prod2. Do we have Product_External data? I've never seen any either for ingestion or for a review. And we're going to ingest into the legacy database? Or does the new harvest config file also have a productMetadata element? Thanks
@jordanpadams I'm working at least partial today. I figured out deployment of a new version of ds-view just before we went local! I'll update the config on the wiki to reflect that. I'll ping you again when it's done.
@rchenatjpl
Do we have Product_External data?
Step 3 in the comment above includes an example product: https://github.com/NASA-PDS/operations/files/12695845/Product_External_Example_OccPred_DickFrench.xml.txt
And we're going to ingest into the legacy database?
This is all specifically intended for the legacy registry that drives our production keyword search and ds-view. All the new registry stuff doesn't care about any of these configs.
Or does the new harvest config file also have a productMetadata element?
Hmmm. So I think I'm missing something. I noted above the productMetadata
element that needs to be added to the harvest policy. It may not be clear since that XML snippet is so large, but there are actually 5 steps outlined in that comment above if that didn't come through.
For loading into the legacy registry we run the legacy harvest software with the policy noted above, and then run search-core with the additional search-core config noted above.
@jordanpadams and @rchenatjpl ds-view-2.15.0-SNAPSHOT with the changes for Product External have been deployed to the new machine!
@rchenatjpl new link to config file: https://github.com/NASA-PDS/registry-harvest-legacy/blob/main/src/main/resources/conf/search/defaults/pds/pds4/external.xml
per the manifest txt file, do we really need that? or can we move forward without it? I will ping the user for it.
also as an update, here are the resource links they want for this data product:
Shoot, I responded through email last time, didn't I? Sorry.
I just finished harvesting the Product_External on both -prod1 and -prod2, and I'm pretty confident that product is NOT going to show up in the product search because of this log message (~pds4/log/harvest-releaseX-20231009.log) from search-core: ERROR: [400 BAD_REQUEST] [400 BAD_REQUEST] Not a valid object type "Product_External"
The harvesting did kind of work. Does ds-view read the solr index tables or work off the registry? I don't know what that URL will be. For documents or whatever, the URL is https://pds.nasa.gov/ds-view/pds/viewDocument.jsp?identifier=SOMELID&version=1.0 but I don't see anything like viewExternal in pdscloud-prod1's /usr/local/tomcat/webapps/ds-view/pds/
@c-suh @jordanpadams Catherine, I'm going to paraphrase your email here. Sorry I started that email thread:
I just looked on pdscloud-prod1, and there’s an external.xml at /usr/local/search-core/conf/defaults/pds/pds4 with a timestamp of Oct 9 (yesterday) at 10:43 am, ... Yes, I think I copied it there based on something Jordan pointed to.
About the occultation_prediction_som_manifest.txt, could the file or mention of the file be found somewhere in https://pds.nasa.gov/data/pds4/ ? I built a dummy file. It would be nice to have the real thing because one of these processes will probably save it somewhere, but meh.
So my current status is that I am 90% sure harvest did NOT work, as described in the previous comment. I'm waiting for search-core to finish to verify. Notes: 1) probably a separate problem: On pdscloud-prod1-el7, /usr/local/tomcat/webapps has no viewProductExternal.jsp, which is what external.xml points to 2) I modified the policy/harvest-policy-releases+external.xml Jordan created at the top of this issue to: a) add the policy and candidates elements and b) remove the harvest: namespace because harvest-pds4 choked on it. 3) Inside the Product_External xml file: a) changed the LID to something with 5 colons instead of 3, b) removed the checksum and file_size since I created the dummy file, c) added the two requested resource_links, as I do for all bundle,collection,document products that we ingest at EN.
@rchenatjpl
ERROR: [400 BAD_REQUEST] [400 BAD_REQUEST] Not a valid object type "Product_External"
Ugh. That's not good. This looks like a hard-coded registry thing. Do we maybe just "hack" this and change this to some other product for the time being to get it in the registry? viewProductExternal.jsp
should still work regardless of the product type.
Does ds-view read the solr index tables or work off the registry?
Most of it works off Solr except for some of the context product landing pages.
@jordanpadams @c-suh 1) Sure, I'll try Product_Document. 2) Am I looking in the wrong place for viewProductExternal.jsp? $ pwd /usr/local/tomcat/webapps/ds-view/pds [tomcat@pdscloud-prod1-el7 pds]$ ls advanced.jsp powerSearchParams.jsp viewDocument.jsp advancedSearchParams.jsp quick.jsp viewHostProfile.jsp css quickSearchParams.jsp viewInstrumentProfile.jsp data_search_help.jsp results.jsp viewMissionProfile.jsp ds_footer.html searchParamsJSP.jsp viewNodeProfile.jsp ds_map.html search_result_help.html viewPersonProfile.jsp error.jsp utils.js viewProduct.jsp glossary.html viewBundle.jsp viewProductProfile.jsp images viewCollection.jsp viewProfile.jsp index.jsp viewContext.jsp viewTargetProfile.jsp power.jsp viewDataset.jsp viewVolumeProfile.jsp
@rchenatjpl no that is the right place, but I think we are testing on a different machine. Will ping over Slack.
So search-core finished, but the link below fails, but hopefully it's just some web service not configured yet? https://pdscloud-internal-lb-1618002203.us-west-2.elb.amazonaws.com/datasearch/keyword-search/search.jsp?q=occultation&fq=facet_type%3A%221%2Cdocument%22&f.facet_type.facet.prefix=2%2Cdocument%2C
Bad logic: I actually didn't need viewProductExternal.jsp since I changed the .xml file to a Product_Document instead of a Product_External. Sorry, Catherine.
Oh, previously ingested documents also cause the same error: https://pdscloud-internal-lb-1618002203.us-west-2.elb.amazonaws.com/ds-view/pds/viewDocument.jsp?identifier=urn%3Anasa%3Apds%3Avex-aspera4-els%3Adocument%3Asis&version=1.0
while this works (same doc, operational machine): https://pds.nasa.gov/ds-view/pds/viewDocument.jsp?identifier=urn%3Anasa%3Apds%3Avex-aspera4-els%3Adocument%3Asis&version=1.0
@rchenatjpl @c-suh FYI, the error we were seeing ERROR: [400 BAD_REQUEST] [400 BAD_REQUEST] Not a valid object type "Product_External"
comes from registry-core, so we will just need to shove this product in there for now.
@jordanpadams @c-suh Regarding the SEARCH/ACCESS DATA field, this existing document has values there on pds.nasa.gov but not on prod1. Hopefully, solving this will get the faux-productexternal to have such values, https://pds.nasa.gov/ds-view/pds/viewDocument.jsp?identifier=urn:nasa:pds:vex-aspera4-els:document:sis&version=1.0
@rchenatjpl are you loading everything the same on the test machine? that seems to happen with ds-view when the resources are not being loaded.
@rchenatjpl nevermind. this is a bug somewhere in ds-view
Status: v2.14.4 on prod1/prod2 and loaded successfully
💡 Description
We need to deploy ds-view v2.14.0: