<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type='text/xsl' href='/oai/static/oai2.xsl' ?><OAI-PMH xmlns="http://www.openarchives.org/OAI/2.0/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/ http://www.openarchives.org/OAI/2.0/OAI-PMH.xsd">
  <responseDate>2026-05-01T04:49:45Z</responseDate>
  <request identifier="8e14841648a020ab4d1e49d00114ef59225c2d668f1f261abbc9534b87c34692" metadataPrefix="oai_ddi25" verb="GetRecord">https://datacatalogue.cessda.eu/oai-pmh/v0/oai</request>
  <GetRecord>
    <record>
    <header>
      <identifier>8e14841648a020ab4d1e49d00114ef59225c2d668f1f261abbc9534b87c34692</identifier>
      <datestamp>2025-09-29T01:09:34Z</datestamp>
      <setSpec>language:en</setSpec><setSpec>openaire_data</setSpec>
    </header>
      <metadata>
        <codeBook xmlns="ddi:codebook:2_5" version="2.5" xsi:schemaLocation="ddi:codebook:2_5 http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd">
    <docDscr>
      <citation>
        <titlStmt>
          <titl xml:lang="en">ParlaCAP: Dataset for tracking political agenda-setting across European parliaments</titl>
        </titlStmt>
        <prodStmt>
        </prodStmt>
      </citation>
    </docDscr>
  <stdyDscr>
    <citation>
      <titlStmt>
        <titl xml:lang="en">ParlaCAP: Dataset for tracking political agenda-setting across European parliaments</titl>
        <IDNo xml:lang="en" agency="DOI">doi:10.23669/1ZTELP</IDNo>
      </titlStmt>
      <rspStmt>
        <AuthEnty affiliation="Jožef Stefan Institute; University of Ljubljana" xml:lang="en">Ljubešić, Nikola
        </AuthEnty><AuthEnty affiliation="Jožef Stefan Institute" xml:lang="en">Rupnik, Peter
        </AuthEnty><AuthEnty affiliation="Jožef Stefan Institute" xml:lang="en">Kuzman Pungeršek, Taja
        </AuthEnty><AuthEnty affiliation="Jožef Stefan Institute" xml:lang="en">Porupski, Ivan
        </AuthEnty><AuthEnty affiliation="Radboud University Nijmegen" xml:lang="en">Mochtak, Michal
        </AuthEnty><AuthEnty affiliation="Jožef Stefan Institute" xml:lang="en">Dinić, Vuk
        </AuthEnty><AuthEnty affiliation="University of Zagreb Faculty of Political Science" xml:lang="en">Širinić, Daniela
        </AuthEnty><AuthEnty affiliation="Charles University, Faculty of Mathematics and Physics" xml:lang="en">Kopp, Matyáš
        </AuthEnty><AuthEnty affiliation="Jožef Stefan Institute; Research Centre of the Slovenian Academy of Sciences and Arts (ZRC SAZU)" xml:lang="en">Erjavec, Tomaž
        </AuthEnty>
      </rspStmt>
      <prodStmt>
        <grantNo agency="European Commission, Horizon Europe Research and Innovation programme" xml:lang="en">101129751</grantNo><grantNo agency="Slovenian Research Agency (ARIS)" xml:lang="en">CLARIN.SI</grantNo>
      </prodStmt>
      <distStmt>
        <distrbtr xml:lang="en">CROSSDA</distrbtr><distrbtr abbr="CROSSDA" xml:lang="en">Croatian Social Science Data Archive</distrbtr>
      </distStmt>
      <verStmt>
      </verStmt>
      <holdings xml:lang="en" URI="https://doi.org/10.23669/1ZTELP"/>
    </citation>
    <stdyInfo>
      <subject>
        <keyword xml:lang="en">Social Sciences</keyword><keyword xml:lang="en">parliamentary debates</keyword><keyword xml:lang="en">sentiment analysis</keyword><keyword xml:lang="en">topic classification</keyword><keyword xml:lang="en" vocab="ELSST" vocabURI="https://elsst.cessda.eu/id/5/5fe1de27-c874-4fac-93d5-25c572528bd8">PARLIAMENT</keyword><keyword xml:lang="en" vocab="ELSST" vocabURI="https://elsst.cessda.eu/id/5/d72db1f8-a53a-4780-9b79-8daa59a365cc">MEMBERS OF PARLIAMENT</keyword><keyword xml:lang="en" vocab="ELSST" vocabURI="https://elsst.cessda.eu/id/5/1951407b-586e-422a-92d7-7551e93e9277">LINGUISTIC ANALYSIS</keyword>
        <topcClas xml:lang="en" vocab="CESSDA Topic Classification" vocabURI="https://vocabularies.cessda.eu/vocabulary/TopicClassification?v=3.0">Government, political systems and organisations</topcClas>
      </subject>
      <abstract xml:lang="en">The ParlaCAP dataset consists of 8 million speeches from 28 European national and regional parliaments, with each speech coded with the sentiment expressed (&lt;a href="https://aclanthology.org/2024.lrec-main.1393/"&gt;ParlaSent&lt;/a&gt; coding from negative, over neutral, to positive) and the topic discussed (&lt;a href="https://www.comparativeagendas.net/pages/master-codebook"&gt;Comparative Agendas Project&lt;/a&gt; coding with 22 topics), and rich metadata on the speakers, parties and democracies. The dataset is an extension of the &lt;a href="https://hdl.handle.net/11356/2004"&gt;ParlaMint 5.0&lt;/a&gt; dataset, which was primarily focused on the transcripts of parliamentary speeches and their metadata. The ParlaCAP dataset extends the ParlaMint dataset via the “text as data” paradigm by automatically coding topics and sentiment for each speech, simplifying the data to a tabular form, and thereby empowering social science research on agenda setting and negativity in political discourse across a broad set of parliaments. For automatic coding, multilingual transformer models were used, with the &lt;a href="https://huggingface.co/classla/ParlaCAP-Topic-Classifier"&gt;ParlaCAP&lt;/a&gt; model for topic, and the &lt;a href="https://huggingface.co/classla/xlm-r-parlasent"&gt;ParlaSent&lt;/a&gt; model for sentiment.</abstract>
      <sumDscr>
        <collDate xml:lang="en" date="2020-01-15" event="start">2020-01-15</collDate><collDate xml:lang="en" date="2025-07-08" event="end">2025-07-08</collDate>
        <nation xml:lang="en">Austria</nation><nation xml:lang="en">Bosnia and Herzegovina</nation><nation xml:lang="en">Belgium</nation><nation xml:lang="en">Bulgaria</nation><nation xml:lang="en">Czech Republic</nation><nation xml:lang="en">Denmark</nation><nation xml:lang="en">Estonia</nation><nation xml:lang="en">Spain</nation><nation xml:lang="en">Spain</nation><nation xml:lang="en">Spain</nation><nation xml:lang="en">Spain</nation><nation xml:lang="en">France</nation><nation xml:lang="en">United Kingdom</nation><nation xml:lang="en">Greece</nation><nation xml:lang="en">Croatia</nation><nation xml:lang="en">Hungary</nation><nation xml:lang="en">Iceland</nation><nation xml:lang="en">Italy</nation><nation xml:lang="en">Latvia</nation><nation xml:lang="en">Netherlands</nation><nation xml:lang="en">Norway</nation><nation xml:lang="en">Poland</nation><nation xml:lang="en">Portugal</nation><nation xml:lang="en">Serbia</nation><nation xml:lang="en">Sweden</nation><nation xml:lang="en">Slovenia</nation><nation xml:lang="en">Turkey</nation><nation xml:lang="en">Ukraine</nation>
        <geogCover xml:lang="en">Basque country</geogCover><geogCover xml:lang="en">Galicia</geogCover><geogCover xml:lang="en">Catalonia</geogCover>
        <anlyUnit xml:lang="en">Media unit: Text<concept/></anlyUnit>
        <universe xml:lang="en" clusion="I">Members of parliament</universe><universe xml:lang="en" clusion="I">Members of government</universe><universe xml:lang="en" clusion="I">Guest speakers in parliament</universe>
        <dataKind xml:lang="en">Text</dataKind><dataKind xml:lang="en">Numeric</dataKind>
      </sumDscr>
    </stdyInfo>
    <method>
      <dataColl>
        <sampProc xml:lang="en">Total universe/Complete enumeration<concept/></sampProc>
        <collMode xml:lang="en">Automated data extraction: Web scraping<concept/></collMode><collMode xml:lang="en">Automated data extraction: Database query<concept/></collMode><collMode xml:lang="en">Content coding<concept/></collMode>
        <resInstru xml:lang="en">Programming script<concept/></resInstru>
      </dataColl>
    </method>
    <dataAccs>
      <useStmt>
      </useStmt>
    </dataAccs>
    <othrStdyMat>
      <relPubl xml:lang="en">
        <citation>
          <titlStmt>
            <titl xml:lang="en">Erjavec, T., Kopp, M., Ljubešić, N. et al. (2025). ParlaMint II: advancing comparable parliamentary corpora across Europe. Lang Resources &amp; Evaluation 59, 2071–2102. https://doi.org/10.1007/s10579-024-09798-w</titl>
            <IDNo xml:lang="en" agency="doi">10.1007/s10579-024-09798-w</IDNo>
          </titlStmt>
          <distStmt>
            <distDate xml:lang="en"/>
          </distStmt>
        </citation><citation>
          <titlStmt>
            <titl xml:lang="en">Mochtak, M., Rupnik, P., Kuzman, T., &amp; Ljubešić, N. (2025). Parlasent: mapping sentiment in political discourse with large language models. Political Research Exchange, 7(1). https://doi.org/10.1080/2474736X.2025.2508377</titl>
            <IDNo xml:lang="en" agency="doi">10.1080/2474736X.2025.2508377</IDNo>
          </titlStmt>
          <distStmt>
            <distDate xml:lang="en"/>
          </distStmt>
        </citation><citation>
          <titlStmt>
            <titl xml:lang="en">Mochtak, M., Rupnik, P., and Ljubešić, N. (2024). The ParlaSent Multilingual Training Dataset for Sentiment Identification in Parliamentary Proceedings. In Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024) (pp. 16024-16036). Torino, Italia. ELRA and ICCL. https://doi.org/10.48550/arXiv.2309.09783</titl>
            <IDNo xml:lang="en" agency="doi">10.48550/arXiv.2309.09783</IDNo>
          </titlStmt>
          <distStmt>
            <distDate xml:lang="en"/>
          </distStmt>
        </citation><citation>
          <titlStmt>
            <titl xml:lang="en">Kuzman, T., &amp; Ljubešić, N. (2025). LLM Teacher-Student Framework for Text Classification With No Manually Annotated Data: A Case Study in IPTC News Topic Classification. IEEE Access. https://doi.org/10.1109/ACCESS.2025.3544814</titl>
            <IDNo xml:lang="en" agency="doi">10.1109/ACCESS.2025.3544814</IDNo>
          </titlStmt>
          <distStmt>
            <distDate xml:lang="en"/>
          </distStmt>
        </citation>
      </relPubl>
    </othrStdyMat>
  </stdyDscr>
  <fileDscr>
  </fileDscr>
</codeBook>
      </metadata>
      <about>
        <provenance xmlns="http://www.openarchives.org/OAI/2.0/provenance" xsi:schemaLocation="http://www.openarchives.org/OAI/2.0/provenance http://www.openarchives.org/OAI/2.0/provenance.xsd">
    <originDescription harvestDate="2025-09-29T01:09:33Z" altered="true">
      <baseURL>https://data.crossda.hr/oai</baseURL>
      <identifier>doi:10.23669/1ZTELP</identifier>
      <datestamp>2025-09-27T02:00:00Z</datestamp>
      <metadataNamespace>ddi:codebook:2_5</metadataNamespace>
    </originDescription>
</provenance>
      </about>
    </record>
  </GetRecord>
</OAI-PMH>