Automatically assigned DDC number: 00635

Manually assigned DDC number: 00635

Number of references: 8

Title: Unsupervised Learning of Subcategorisation Information and Its Application in a Parsing Subtask

Author:

Subject: Sabine Buchholz Unsupervised Learning of Subcategorisation Information and Its Application in a Parsing Subtask

Description: This paper is about two aspects of subcategorisation in NLP. First, it is about the automatic extraction of subcategorisation information from corpora. More specifically, we are concerned with unsupervised learning of subcategorisation information from tagged text by means of hierarchical clustering. The second aspect of the paper is the usage of this subcategorisation information for parsing, especially for the distinction between complements and adjuncts. We show that the information learned by unsupervised clustering can be exploited by a memory-based learner, to improve upon the complement-adjunct distinction. We compare the improvement gained by the use of this unsupervised information (1%) to that of different representations of subcategorisation information extracted from the tree-bank annotation (maximum 1.5%). The unsupervised information thus achieves two thirds of the improvement that can be obtained from the hand-crafted treebank information. 1 1 Introduction Subcategoris...

Contributor: The Pennsylvania State University CiteSeer Archives

Publisher: unknown

Date: 1998-11-30

Format: ps

Identifier: http://citeseer.ist.psu.edu/168744.html

Source: ftp://ilk.kub.nl/pub/papers/ilk.9811.ps.gz

Language: en

Relation:

Relation:

Relation:

Relation:

Relation:

Relation:

Relation:

Relation:

Rights: unrestricted

Graph

<?xml   version="1.0"   encoding="UTF-8"?>

<references_metadata>

      <rec   ID="/569344.html"   Type="inproceedings"   CiteSeer_Book="ACL   Proceedings   25th   Annual   Meeting"   CiteSeer_Volume=""   Title="The   derivation   of   grammatically   indexed   lexicon   from   the   Longman   Dictionary   of   Contemporary   English,"   />

      <rec   ID="/552546.html"   Type="incollection"   CiteSeer_Book="Proceedings   of   the   Second   Conference   on   Empirical   Methods   in   Natural   Language   Processing"   CiteSeer_Volume=""   Title="Tagging   Grammatical   Functions,">

            <identifier   Org="ISBN:1402013345"   Paper_ID="/552546.html"   Extracted="1402013345"   DDC="415"   Normalized_DDC="415"   Normalized_Weight="0.16666666666666666"   />

            <identifier   Org="ISBN:140202293X"   Paper_ID="/552546.html"   Extracted="140202293X"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.16666666666666666"   />

            <identifier   Org="ISBN:3110176157"   Paper_ID="/552546.html"   Extracted="3110176157"   DDC="410"   Normalized_DDC="41"   Normalized_Weight="0.16666666666666666"   />

            <identifier   Org="ISBN:3540230491"   Paper_ID="/552546.html"   Extracted="3540230491"   DDC="006.35"   Normalized_DDC="00635"   Normalized_Weight="0.16666666666666666"   />

            <identifier   Org="ISBN:3540245235"   Paper_ID="/552546.html"   Extracted="3540245235"   DDC="025.04"   Normalized_DDC="02504"   Normalized_Weight="0.16666666666666666"   />

            <identifier   Org="ISBN:8483382822"   Paper_ID="/552546.html"   Extracted="8483382822"   />

            <identifier   Org="ISBN:9027249911"   Paper_ID="/552546.html"   Extracted="9027249911"   DDC="410/.285"   Normalized_DDC="410285"   Normalized_Weight="0.16666666666666666"   />

      </rec>

      <rec   ID="/582398.html"   Type="misc"   CiteSeer_Book=""   CiteSeer_Volume=""   Title="Automatic   Extraction   of   Subcategorization   from   Corpora,">

            <identifier   Org="ISBN:0130950696"   Paper_ID="/582398.html"   Extracted="0130950696"   DDC="410/.285"   Normalized_DDC="410285"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:0199292345"   Paper_ID="/582398.html"   Extracted="0199292345"   DDC="413.028"   Normalized_DDC="413028"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:0824790006"   Paper_ID="/582398.html"   Extracted="0824790006"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:140202293X"   Paper_ID="/582398.html"   Extracted="140202293X"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:1558607862"   Paper_ID="/582398.html"   Extracted="1558607862"   />

            <identifier   Org="ISBN:1588111563"   Paper_ID="/582398.html"   Extracted="1588111563"   DDC="415"   Normalized_DDC="415"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:1860672302"   Paper_ID="/582398.html"   Extracted="1860672302"   />

            <identifier   Org="ISBN:354000680X"   Paper_ID="/582398.html"   Extracted="354000680X"   DDC="006.3/3"   Normalized_DDC="00633"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3540214593"   Paper_ID="/582398.html"   Extracted="3540214593"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3540240179"   Paper_ID="/582398.html"   Extracted="3540240179"   DDC="025.04"   Normalized_DDC="02504"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3540244751"   Paper_ID="/582398.html"   Extracted="3540244751"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3540287892"   Paper_ID="/582398.html"   Extracted="3540287892"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3540335870"   Paper_ID="/582398.html"   Extracted="3540335870"   DDC="005.1"   Normalized_DDC="0051"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3540425578"   Paper_ID="/582398.html"   Extracted="3540425578"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3540650687"   Paper_ID="/582398.html"   Extracted="3540650687"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:354078134X"   Paper_ID="/582398.html"   Extracted="354078134X"   DDC="005.52"   Normalized_DDC="00552"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:382336099X"   Paper_ID="/582398.html"   Extracted="382336099X"   DDC="401/.43"   Normalized_DDC="40143"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:8483382822"   Paper_ID="/582398.html"   Extracted="8483382822"   />

      </rec>

      <rec   ID="/46270.html"   Type="misc"   CiteSeer_Book=""   CiteSeer_Volume=""   Title="Distinguishing   complements   from   adjuncts   using   memory-based   learning,">

            <identifier   Org="ISBN:0521808901"   Paper_ID="/46270.html"   Extracted="0521808901"   DDC="006.35"   Normalized_DDC="00635"   Normalized_Weight="0.3333333333333333"   />

            <identifier   Org="ISBN:3540660445"   Paper_ID="/46270.html"   Extracted="3540660445"   DDC="006.3/31"   Normalized_DDC="006331"   Normalized_Weight="0.3333333333333333"   />

            <identifier   Org="ISBN:9027249911"   Paper_ID="/46270.html"   Extracted="9027249911"   DDC="410/.285"   Normalized_DDC="410285"   Normalized_Weight="0.3333333333333333"   />

            <identifier   Org="ISBN:9042005998"   Paper_ID="/46270.html"   Extracted="9042005998"   />

            <identifier   Org="ISBN:9042006099"   Paper_ID="/46270.html"   Extracted="9042006099"   />

      </rec>

      <rec   ID="/585525.html"   Type="misc"   CiteSeer_Book=""   CiteSeer_Volume=""   Title="Can   subcategorisation   probabilities   help   a   statistical   parser,">

            <identifier   Org="ISBN:0792366166"   Paper_ID="/585525.html"   Extracted="0792366166"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.3333333333333333"   />

            <identifier   Org="ISBN:1402013345"   Paper_ID="/585525.html"   Extracted="1402013345"   DDC="415"   Normalized_DDC="415"   Normalized_Weight="0.3333333333333333"   />

            <identifier   Org="ISBN:140202293X"   Paper_ID="/585525.html"   Extracted="140202293X"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.3333333333333333"   />

      </rec>

      <rec   ID="/565817.html"   Type="inproceedings"   CiteSeer_Book="Proc   ARPA   Human   Language   Technology   Workshop   93"   CiteSeer_Volume=""   Title="The   Comlex   Syntax   Project,">

            <identifier   Org="ISBN:026206197X"   Paper_ID="/565817.html"   Extracted="026206197X"   DDC="423/.1"   Normalized_DDC="4231"   Normalized_Weight="0.3333333333333333"   />

            <identifier   Org="ISBN:3540662235"   Paper_ID="/565817.html"   Extracted="3540662235"   DDC="006.3/3"   Normalized_DDC="00633"   Normalized_Weight="0.3333333333333333"   />

            <identifier   Org="ISBN:9051993161"   Paper_ID="/565817.html"   Extracted="9051993161"   DDC="005.1"   Normalized_DDC="0051"   Normalized_Weight="0.3333333333333333"   />

      </rec>

      <rec   ID="/580256.html"   Type="inproceedings"   CiteSeer_Book="Meeting   of   the   Association   for   Computational   Linguistics"   CiteSeer_Volume=""   Title="Automatic   Acquisition   of   a   Large   Subcategorization   Dictionary   from   Corpora,">

            <identifier   Org="ISBN:0130950696"   Paper_ID="/580256.html"   Extracted="0130950696"   DDC="410/.285"   Normalized_DDC="410285"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:019927634X"   Paper_ID="/580256.html"   Extracted="019927634X"   DDC="410.285"   Normalized_DDC="410285"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:026206197X"   Paper_ID="/580256.html"   Extracted="026206197X"   DDC="423/.1"   Normalized_DDC="4231"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:0262523388"   Paper_ID="/580256.html"   Extracted="0262523388"   DDC="410/.1/5192"   Normalized_DDC="41015192"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:0262611228"   Paper_ID="/580256.html"   Extracted="0262611228"   DDC="410.72"   Normalized_DDC="41072"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:0521592771"   Paper_ID="/580256.html"   Extracted="0521592771"   DDC="410/.285"   Normalized_DDC="410285"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:0792344634"   Paper_ID="/580256.html"   Extracted="0792344634"   DDC="410/.285"   Normalized_DDC="410285"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:0792354990"   Paper_ID="/580256.html"   Extracted="0792354990"   DDC="415"   Normalized_DDC="415"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:079236368X"   Paper_ID="/580256.html"   Extracted="079236368X"   DDC="413/.028"   Normalized_DDC="413028"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:0824790006"   Paper_ID="/580256.html"   Extracted="0824790006"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:1558607862"   Paper_ID="/580256.html"   Extracted="1558607862"   />

            <identifier   Org="ISBN:3540250565"   Paper_ID="/580256.html"   Extracted="3540250565"   DDC="006.3"   Normalized_DDC="0063"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3540425578"   Paper_ID="/580256.html"   Extracted="3540425578"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3540441484"   Paper_ID="/580256.html"   Extracted="3540441484"   DDC="006.4"   Normalized_DDC="0064"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3540590404"   Paper_ID="/580256.html"   Extracted="3540590404"   DDC="418/.02/0285"   Normalized_DDC="418020285"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3540609253"   Paper_ID="/580256.html"   Extracted="3540609253"   DDC="006.3/5"   Normalized_DDC="00635"   Normalized_Weight="0.06666666666666667"   />

            <identifier   Org="ISBN:3642008305"   Paper_ID="/580256.html"   Extracted="3642008305"   />

      </rec>

      <rec   ID="/190241.html"   Type="misc"   CiteSeer_Book=""   CiteSeer_Volume=""   Title="The   Language   Environment   and   Syntactic   Word-Class   Acquisition,">

            <identifier   Org="ISBN:0546649289"   Paper_ID="/190241.html"   Extracted="0546649289"   />

            <identifier   Org="ISBN:3110113082"   Paper_ID="/190241.html"   Extracted="3110113082"   DDC="401.4"   Normalized_DDC="4014"   Normalized_Weight="0.5"   />

            <identifier   Org="ISBN:3540762639"   Paper_ID="/190241.html"   Extracted="3540762639"   DDC="006.3/2"   Normalized_DDC="00632"   Normalized_Weight="0.5"   />

      </rec>

      <rec   ID="SELF"   Type="SELF"   CiteSeer_Book="SELF"   CiteSeer_Volume="SELF"   Title="Unsupervised   Learning   of   Subcategorisation   Information   and   Its   Application   in   a   Parsing   Subtask"   />

</references_metadata>

www.000webhost.com