Closed staplegun closed 5 years ago
Moved media mime type to separate issue #54.
A media's parent object appears in the isVersionOf
field: https://csapi-test.nma.gov.au/media?text=*
The object's ID is searchable in the media_object_id
field: https://csapi-test.nma.gov.au/media?media_object_id=33281
However, the mapping is not accounting for multiple parent object IDs, e.g.
<doc>
<field name="EMu IRN for Related Objects">74498, 220558, 74401</field>
<field name="Multimedia ID">MA35642101</field>
There are exactly 2 Piction doc
elements which have more than one field[@name='EMu IRN for Related Objects']
; on inspection it seems that in both cases the duplicate fields have the exact same value.
xmllint --format --xpath "/add/doc[1 < count(field[@name='EMu IRN for Related Objects'])]" solr_prod1.xml
<doc>
<field name="EMu IRN for Media Asset"/>
<field name="EMu IRN for Media Asset"/>
<field name="EMu IRN for Related Objects">31190</field>
<field name="EMu IRN for Related Objects">31190</field>
<field name="Multimedia ID">MA23119421</field>
<field name="Other Numbers Kind">Accession Number</field>
<field name="Other Numbers Value">1986.0117.0529</field>
<field name="Page Number"/>
<field name="Photographer"/>
<field name="Title">The Museum, Sydney, N.S.W (Australian Museum)</field>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderivT\PH2725\Collection\Working Masters\nma_23119421.jpg" name="thumbnail"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderivW\PH2725\Collection\Working Masters\nma_23119421.jpg" name="web"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv2\Working Masters\nma.img-ci20112725-084-wm-vs1_o2.jpg" name="original_2"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv3\Working Masters\nma.img-ci20112725-084-wm-vs1_o3.jpg" name="original_3"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv4\Working Masters\nma.img-ci20112725-084-wm-vs1_o4.jpg" name="original_4"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv5\Working Masters\nma.img-ci20112725-084-wm-vs1_o5.jpg" name="original_5"/>
</doc>
<doc>
<field name="EMu IRN for Media Asset"/>
<field name="EMu IRN for Media Asset"/>
<field name="EMu IRN for Related Objects">131792</field>
<field name="EMu IRN for Related Objects">131792</field>
<field name="Multimedia ID">MA23279211</field>
<field name="Other Numbers Kind">Accession Number</field>
<field name="Other Numbers Kind">Incoming Receipt Number</field>
<field name="Other Numbers Value">IR 4096.0001</field>
<field name="Other Numbers Value">2008.0006.0001</field>
<field name="Page Number"/>
<field name="Photographer"/>
<field name="Title">Maris Pacifici quod vulgo Mar del Zur</field>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderivT\PH2088\Collection\Working Masters\nma_23279211.jpg" name="thumbnail"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderivW\PH2088\Collection\Working Masters\nma_23279211.jpg" name="web"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv2\Working Masters\nma.img-ci20082088-143-wm-vs1_o2.jpg" name="original_2"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv3\Working Masters\nma.img-ci20082088-143-wm-vs1_o3.jpg" name="original_3"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv4\Working Masters\nma.img-ci20082088-143-wm-vs1_o4.jpg" name="original_4"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv5\Working Masters\nma.img-ci20082088-143-wm-vs1_o5.jpg" name="original_5"/>
</doc>
There are three piction doc
records whose EMu object IRN field contain commas:
xmllint --xpath /add/doc[field[@name='EMu IRN for Related Objects'][contains(., ',')]]" solr_prod1.xml
<doc>
<field name="EMu IRN for Related Objects">74498, 220558, 74401</field>
<field name="Multimedia ID">MA35642101</field>
<field name="Photographer">Katie Shanahan</field>
<field name="Title">Afternoon dress, lilac silk, dating from 1913</field>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderivT\DAMS_INGEST\JOBS\WM_35592499\nma_35642101.jpg" name="thumbnail"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderivW\DAMS_INGEST\JOBS\WM_35592499\nma_35642101.jpg" name="web"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv2\WM_35592499\nma.img-ci20122915-252-wm-vs1_o2.jpg" name="original_2"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv3\WM_35592499\nma.img-ci20122915-252-wm-vs1_o3.jpg" name="original_3"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv4\WM_35592499\nma.img-ci20122915-252-wm-vs1_o4.jpg" name="original_4"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv5\WM_35592499\nma.img-ci20122915-252-wm-vs1_o5.jpg" name="original_5"/>
</doc>
<doc>
<field name="EMu IRN for Related Objects">220564, 110608</field>
<field name="Multimedia ID">MA35642438</field>
<field name="Photographer">Jason McCarthy</field>
<field name="Title">JF Holle & Co. 3 piece morning suit, Sydney 1913, Black top hat</field>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderivT\DAMS_INGEST\JOBS\WM_35592499\nma_35642438.jpg" name="thumbnail"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderivW\DAMS_INGEST\JOBS\WM_35592499\nma_35642438.jpg" name="web"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv2\WM_35592499\nma.img-ci20122915-248-wm-vs1_o2.jpg" name="original_2"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv3\WM_35592499\nma.img-ci20122915-248-wm-vs1_o3.jpg" name="original_3"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv4\WM_35592499\nma.img-ci20122915-248-wm-vs1_o4.jpg" name="original_4"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv5\WM_35592499\nma.img-ci20122915-248-wm-vs1_o5.jpg" name="original_5"/>
</doc>
<doc>
<field name="EMu IRN for Related Objects">74498, 220558, 74401</field>
<field name="Multimedia ID">MA35658518</field>
<field name="Photographer">Katie Shanahan</field>
<field name="Title">Afternoon dress, lilac silk, dating from 1913</field>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderivT\DAMS_INGEST\JOBS\WM_35592499\nma_35658518.jpg" name="thumbnail"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderivW\DAMS_INGEST\JOBS\WM_35592499\nma_35658518.jpg" name="web"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv2\WM_35592499\nma.img-ci20122915-253-wm-vs1_o2.jpg" name="original_2"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv3\WM_35592499\nma.img-ci20122915-253-wm-vs1_o3.jpg" name="original_3"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv4\WM_35592499\nma.img-ci20122915-253-wm-vs1_o4.jpg" name="original_4"/>
<dataSource type="URLDataSource" baseUrl="\\nma-isilon1\dams_data\Collectionsearch\prodderiv5\WM_35592499\nma.img-ci20122915-253-wm-vs1_o5.jpg" name="original_5"/>
</doc>
We could tokenize()
those fields and catch those three.
Example: media MA35658518 shows it belongs to objects 74498, 220558, 74401. Those objects include the media as a representation (except is not released to the API).
Looks good to me (from what I can tell)
Add parent object link inside media records.