Subject: | |
From: | |
Reply To: | |
Date: | Mon, 7 Sep 2015 05:07:24 -0400 |
Content-Type: | text/plain |
Parts/Attachments: |
|
|
Hi all! I asked for help on this a few months ago and creating crosswalk to transform SKOSXL/RDF-XML is a very difficult task, so I did not go further. Instead, I downloaded the html files which I got the term ID from SPARQL and then worked on the html tables and used some regexes with Notepad++ to form in such a way like below:
<root>
<html>
<table class=" table search-results-property-table"><prefterm>eviction</prefterm> <tr><td><span class="versal property-click" title="A complete explanation of the intended meaning of a concept">DEFINITION</span></td><td><ul>
<li>
<a class="versal" href="../../../agrovoc/en/page/def_b8511a37">Eviction is the removal of someone from their occupation of land or property.</a>
</li>
</ul></td></tr>
<tr><td><span class="versal property-click" title="Broader concept">BROADER CONCEPT</span></td><td><ul>
<li>
<a class="versal" href="../../../agrovoc/en/page/c_9000090">land access</a>
</li>
</ul></td></tr>
<tr><td><otherlang class="versal property-click" title="Other language" >IN OTHER LANGUAGES</otherlang></td><td>
<table>
<tr class="other-languages first-of-language"><td class="versal versal-pref">déguerpissement$cFrench</td></tr>
<tr class="other-languages first-of-language"><td class="versal versal-pref">desalojo$cSpanish</td></tr>
</table></td></tr>
<tr><td><span class="versal">URI</span></td>
<td><uri>http://aims.fao.org/aos/agrovoc/c_0b88a82c</uri></td></tr>
<tr><td><span class="versal">Download this concept:
</span></td><td><span class="versal"><a href="../../../rest/v1/agrovoc/data?uri=http%3A%2F%2Faims.fao.org%2Faos%2Fagrovoc%2Fc_0b88a82c&format=application/rdf%2Bxml">RDF/XML</a>
<a href="../../../rest/v1/agrovoc/data?uri=http%3A%2F%2Faims.fao.org%2Faos%2Fagrovoc%2Fc_0b88a82c&format=text/turtle">
TURTLE</a>
</span></td></tr>
</table></html>
<html>
<table class=" table search-results-property-table"><prefterm>investment funds</prefterm> <tr><td><span class="versal property-click" title="Broader concept">BROADER CONCEPT</span></td><td><ul>
<li>
<a class="versal" href="../../../agrovoc/en/page/c_28898">investment banks</a>
</li>
</ul></td></tr>
<tr><td><otherlang class="versal property-click" title="Other language" >IN OTHER LANGUAGES</otherlang></td><td>
<table>
<tr class="other-languages first-of-language"><td class="versal versal-pref">fonds d' investissement $cFrench</td></tr>
</table></td></tr>
<tr><td><span class="versal">URI</span></td>
<td><uri>http://aims.fao.org/aos/agrovoc/c_0d7ef890</uri></td></tr>
<tr><td><span class="versal">Download this concept:
</span></td><td><span class="versal"><a href="../../../rest/v1/agrovoc/data?uri=http%3A%2F%2Faims.fao.org%2Faos%2Fagrovoc%2Fc_0d7ef890&format=application/rdf%2Bxml">RDF/XML</a>
<a href="../../../rest/v1/agrovoc/data?uri=http%3A%2F%2Faims.fao.org%2Faos%2Fagrovoc%2Fc_0d7ef890&format=text/turtle">
TURTLE</a>
</span></td></tr>
</table></html>
</root>
I created the following crosswalk to convert the file into MARC:
<?xml version="1.0" encoding="UTF-8"?>
<xsl:stylesheet version="1.0"
xmlns:xsl="http://www.w3.org/1999/XSL/Transform">
<xsl:output method="text" omit-xml-declaration="yes" indent="no"/>
<xsl:template match="root">
<xsl:for-each select="html">
<xsl:text>START HERE</xsl:text>
<xsl:text> </xsl:text>
<xsl:text>=LDR 00000nam 2200000Ia 4500</xsl:text>
<xsl:text> </xsl:text>
<xsl:apply-templates select="table/tr/td/uri" />
<xsl:text> </xsl:text>
<xsl:apply-templates select="table/prefterm" />
<xsl:text> </xsl:text>
<xsl:apply-templates select="table/tr/td/span" />
<xsl:text> </xsl:text>
<!--<xsl:apply-templates select="table/tr"/>
<xsl:text> </xsl:text>-->
<xsl:apply-templates select="table/tr/td/narrow" />
<xsl:text> </xsl:text>
<xsl:apply-templates select="table/tr/td/altlabel" />
<xsl:text> </xsl:text>
<xsl:for-each select="table/tr[td/span/@class='versal']">
<xsl:variable name="span" select="td/span" />
<xsl:for-each select="td/ul/li/a">
<xsl:text>=305 \\$a</xsl:text>
<xsl:value-of select="$span"/>
<xsl:text>$b</xsl:text>
<xsl:value-of select="."/>
<xsl:text> </xsl:text>
</xsl:for-each>
<xsl:for-each select="td/ul/li/p">
<xsl:text>=305 \\$a</xsl:text>
<xsl:value-of select="$span"/>
<xsl:text>$b</xsl:text>
<xsl:value-of select="."/>
<xsl:text> </xsl:text>
</xsl:for-each>
</xsl:for-each>
<xsl:for-each select="table/tr[td/span/@class='versal property-click']">
<xsl:variable name="span" select="td/span" />
<xsl:for-each select="td/ul/li/a">
<xsl:text>=305 \\$a</xsl:text>
<xsl:value-of select="$span"/>
<xsl:text>$b</xsl:text>
<xsl:value-of select="."/>
<xsl:text>$b</xsl:text>
<xsl:call-template name="tokenizeString">
<xsl:with-param name="list" select="@href"/>
<xsl:with-param name="delimiter" select="'/'"/>
</xsl:call-template>
<xsl:text> </xsl:text>
</xsl:for-each>
</xsl:for-each>
<xsl:apply-templates select="table/tr/td/otherlang" />
<xsl:text> </xsl:text>
</xsl:for-each>
</xsl:template>
<xsl:template match="table/tr/td/uri">
<xsl:text>=013 \\$acontrolno$b</xsl:text><xsl:value-of select="." />
</xsl:template>
<xsl:template match="table/prefterm">
<xsl:text>=300 \\$aprefferedterm$b</xsl:text><xsl:value-of select="." />
</xsl:template>
<xsl:template match="table/tr/td/span">
<xsl:choose>
<xsl:when test="@title='Broader concept'">
<xsl:text>=301 \\$a</xsl:text><xsl:value-of select="." />
<xsl:text>$b</xsl:text>
<xsl:value-of select="../../td/ul/li/a" />
<xsl:text>$c</xsl:text>
<xsl:call-template name="tokenizeString">
<xsl:with-param name="list" select="../../td/ul/li/a/@href"/>
<xsl:with-param name="delimiter" select="'/'"/>
</xsl:call-template>
<xsl:text> </xsl:text>
</xsl:when>
</xsl:choose>
</xsl:template>
<xsl:template match="table/tr/td/narrow">
<xsl:for-each select="../../td/ul/li/a">
<xsl:text>=302 \\$anarrowerterm$b</xsl:text>
<xsl:value-of select="." />
<xsl:text>$c</xsl:text>
<xsl:call-template name="tokenizeString">
<xsl:with-param name="list" select="@href"/>
<xsl:with-param name="delimiter" select="'/'"/>
</xsl:call-template>
<xsl:text> </xsl:text>
</xsl:for-each>
</xsl:template>
<xsl:template match="table/tr/td/altlabel">
<xsl:for-each select="../../td/ul/li/p">
<xsl:text>=303 \\$aaltlabel$b</xsl:text>
<xsl:value-of select="." />
<xsl:text> </xsl:text>
</xsl:for-each>
</xsl:template>
<xsl:template match="table/tr/td/otherlang">
<xsl:for-each select="../../td/table/tr/td[@class='versal versal-pref']">
<xsl:text>=304 \\$aotherlangterm$b</xsl:text>
<xsl:value-of select="." />
<xsl:text> </xsl:text>
</xsl:for-each>
</xsl:template>
<xsl:template name="tokenizeString">
<xsl:param name="list"/>
<xsl:param name="delimiter"/>
<xsl:choose>
<xsl:when test="contains($list, $delimiter)">
<!-- do nothing, in essence deleting the values -->
<xsl:call-template name="tokenizeString">
<xsl:with-param name="list" select="substring-after($list,$delimiter)"/>
<xsl:with-param name="delimiter" select="$delimiter"/>
</xsl:call-template>
</xsl:when>
<xsl:otherwise>
<xsl:value-of select="$list"/>
</xsl:otherwise>
</xsl:choose>
</xsl:template>
</xsl:stylesheet>
Thanks also to Dana for off list conversation. Cheers and regards!
________________________________________________________________________
This message comes to you via MARCEDIT-L, a Listserv(R) list for technical and instructional support in MarcEdit. If you wish to communicate directly with the list owners, write to [log in to unmask] To unsubscribe, send a message "SIGNOFF MARCEDIT-L" to [log in to unmask]
|
|
|