%PDF-1.4
%
1 0 obj
<<
/Creator (aaai-dc02_print.doc - Microsoft Word)
/CreationDate (D:20020403134246Z)
/Title (Generalized Features: Their Application to Classification)
/Author (Kiritchenko, Svetlana;Matwin, Stan)
/Producer (Acrobat PDFWriter 3.03 for Windows NT)
/Keywords ()
/Subject (SIGART/AAAI Doctoral Consortium)
/ModDate (D:20020524205805-07'00')
>>
endobj
2 0 obj
[
/PDF /Text
]
endobj
3 0 obj
<<
/Pages 5 0 R
/Type /Catalog
/FICL:Enfocus 14 0 R
/Metadata 17 0 R
>>
endobj
4 0 obj
<<
/Type /Page
/Parent 5 0 R
/Resources << /Font << /F0 6 0 R /F1 8 0 R /F2 12 0 R >> /ProcSet 2 0 R >>
/Contents 10 0 R
>>
endobj
5 0 obj
<<
/Kids [ 4 0 R ]
/Count 1
/Type /Pages
/MediaBox [ 0 0 612 792 ]
>>
endobj
6 0 obj
<<
/Type /Font
/Subtype /TrueType
/Name /F0
/BaseFont /TimesNewRoman,Bold
/FirstChar 32
/LastChar 255
/Widths [ 248 330 556 503 503 992 834 278 330 330 503 571 248 330 248 278 503
503 503 503 503 503 503 503 503 503 330 330 571 571 571 503 932
721 669 721 721 669 609 774 774 391 503 774 669 947 721 774 609
774 721 556 669 721 721 1000 714 721 654 330 278 330 578 503 330
503 556 443 556 443 330 503 556 278 330 548 278 819 556 503 556
556 443 391 330 556 503 736 503 503 436 391 218 391 518 774 571
571 571 571 571 571 571 571 571 571 571 571 571 571 571 571 571
571 571 571 571 571 571 571 330 571 571 571 571 571 571 571 248
571 571 571 503 571 218 503 571 744 571 503 571 330 744 571 398
548 571 571 571 578 541 248 571 571 571 503 571 571 571 571 571
571 571 571 571 571 571 571 571 571 571 571 571 571 571 571 571
571 571 571 571 571 571 571 571 571 571 571 571 571 571 571 571
571 571 571 571 571 571 571 571 571 571 571 571 571 571 571 571
571 571 571 571 571 571 571 571 571 571 571 571 571 571 571 ]
/Encoding /WinAnsiEncoding
/FontDescriptor 7 0 R
>>
endobj
7 0 obj
<<
/Type /FontDescriptor
/FontName /TimesNewRoman,Bold
/Flags 16418
/FontBBox [ -250 -216 1200 1040 ]
/MissingWidth 331
/StemV 136
/StemH 136
/ItalicAngle 0
/CapHeight 891
/XHeight 446
/Ascent 891
/Descent -216
/Leading 149
/MaxWidth 1000
/AvgWidth 427
>>
endobj
8 0 obj
<<
/Type /Font
/Subtype /TrueType
/Name /F1
/BaseFont /TimesNewRoman
/FirstChar 32
/LastChar 255
/Widths [ 253 320 400 506 506 826 773 173 333 333 466 560 253 333 253 280 506
506 506 506 506 506 506 506 506 506 280 280 560 560 560 440 920
706 666 666 720 613 560 706 720 333 386 720 600 893 720 720 573
720 666 560 600 720 733 933 733 720 613 346 266 346 466 506 320
440 506 440 506 440 320 493 506 280 280 493 280 760 506 506 506
506 333 386 280 506 493 706 493 480 440 480 173 480 546 773 560
560 560 560 560 560 560 560 560 560 560 560 560 560 560 560 560
560 560 560 560 560 560 560 333 560 560 560 560 560 560 560 253
560 560 560 506 560 173 506 560 760 560 466 560 333 760 560 386
546 560 560 560 573 453 253 560 560 560 466 560 560 560 560 560
560 560 560 560 560 560 560 560 560 560 560 560 560 560 560 560
560 560 560 560 560 560 560 560 560 560 560 560 560 560 560 560
560 560 560 560 560 560 560 560 560 560 560 560 560 560 560 560
560 560 560 560 560 560 560 560 560 560 560 560 560 560 560 ]
/Encoding /WinAnsiEncoding
/FontDescriptor 9 0 R
>>
endobj
9 0 obj
<<
/Type /FontDescriptor
/FontName /TimesNewRoman
/Flags 34
/FontBBox [ -250 -216 1120 1040 ]
/MissingWidth 347
/StemV 73
/StemH 73
/ItalicAngle 0
/CapHeight 891
/XHeight 446
/Ascent 891
/Descent -216
/Leading 149
/MaxWidth 933
/AvgWidth 401
>>
endobj
10 0 obj
<< /Length 11 0 R >>
stream
BT
109.44 718.92 TD
0 0 0 rg
/F0 15.96 Tf
0.0074 Tc -0.0055 Tw (Generalized Features. Their Application to Classification.) Tj
393.24 0 TD 0 Tc 0.0019 Tw ( ) Tj
-297.12 -28.8 TD /F0 12 Tf
0.0023 Tc 0.0217 Tw (Svetlana Kiritchenko and Stan Matwin) Tj
200.88 0 TD 0 Tc 0.024 Tw ( ) Tj
-100.44 -12.6 TD /F1 9 Tf
0.003 Tw ( ) Tj
-80.52 -12 TD 0.0019 Tc 0.0011 Tw (SITE, University of Ottawa, Ottawa, Canada) Tj
161.04 0 TD 0 Tc 0.003 Tw ( ) Tj
-134.16 -12 TD 0.0021 Tc 0.0009 Tw ({svkir, stan}@site.uottawa.ca ) Tj
109.56 0 TD 0 Tc 0.003 Tw ( ) Tj
-55.92 -12 TD ( ) Tj
0 -12 TD /F1 9.96 Tf
0 Tw ( ) Tj
T* ( ) Tj
-252.12 -11.28 TD -0.009 Tc 4.2291 Tw (Classification learning algorithms in general, and text) Tj
238.68 3 TD /F0 9.96 Tf
0 Tc 0.0499 Tw ( ) Tj
2.52 -3 TD /F1 9.96 Tf
0 Tw ( ) Tj
-241.2 -11.04 TD -0.0056 Tc 4.3257 Tw (classification methods i) Tj
102.48 0 TD -0.0127 Tc 4.3089 Tw (n particular, tend to focus on) Tj
0 Tc 0 Tw ( ) Tj
-102.48 -11.04 TD -0.0108 Tc 0.6709 Tw (features of individual training examples, rather than on the) Tj
0 Tc 0 Tw ( ) Tj
0 -11.04 TD -0.0061 Tc 3.2262 Tw (relationships between the examples. However, in many) Tj
0 Tc 0 Tw ( ) Tj
T* -0.0141 Tc 0.1222 Tw (situations a set of items contains more information than just ) Tj
T* -0 Tc 0.3093 Tw (feature values of individual items. For example, ) Tj
195.6 0 TD -0.0277 Tc 0.2079 Tw (taking into ) Tj
-195.6 -11.04 TD 0.0015 Tc 1.624 Tw (account the articles that are cited by or cite an article in) Tj
0 Tc 0 Tw ( ) Tj
0 -11.04 TD -0.0037 Tc 8.6038 Tw (question would increase our chances of correct) Tj
0 Tc 0 Tw ( ) Tj
T* 0 Tc 2.9094 Tw (classification. We propose to recognize and put in use) Tj
0 Tc 0 Tw ( ) Tj
T* 0.0104 Tc 3.0926 Tw (generalized features \(or set features\), which describe a) Tj
0 Tc 0 Tw ( ) Tj
T* -0.0077 Tc 2.0478 Tw (training example, but de) Tj
102.84 0 TD -0.0085 Tc 2.0286 Tw (pend on the dataset as a whole,) Tj
0 Tc 0 Tw ( ) Tj
-102.84 -11.04 TD -0 Tc 2.0746 Tw (with the goal of achieving better classification accuracy.) Tj
0 Tc 0 Tw ( ) Tj
0 -11.04 TD -0.0102 Tc 0.3437 Tw (Although the idea of generalized features is consistent with ) Tj
T* 0.0024 Tc 2.7427 Tw (the objectives of relational learning \(ILP\), we feel that) Tj
0 Tc 0 Tw ( ) Tj
T* -0.0151 Tc 8.5553 Tw (instead of using the computationally heavy and) Tj
238.68 0 TD 0 Tc 0 Tw ( ) Tj
-238.68 -11.04 TD 0.0017 Tc 1.1384 Tw (conceptually general ILP methods, there may be a benefit) Tj
0 Tc 0 Tw ( ) Tj
0 -11.04 TD -0.0029 Tc 4.7173 Tw (in looking for approaches that use specific relations) Tj
0 Tc 0 Tw ( ) Tj
T* -0.0047 Tc 0.0048 Tw (between texts, and in particular, between emails.) Tj
193.56 0 TD 0 Tc 0 Tw ( ) Tj
-193.56 -11.04 TD ( ) Tj
9.96 0 TD 0.0115 Tc 5.9201 Tw (Generalized features are the way to capture the) Tj
0 Tc 0 Tw ( ) Tj
-9.96 -11.04 TD -0.0061 Tc 5.9262 Tw (information that lies beyond a particular ite) Tj
208.08 0 TD -0.0201 Tc 5.9002 Tw (m, the) Tj
0 Tc 0 Tw ( ) Tj
-208.08 -11.04 TD -0.0082 Tc 3.3983 Tw (information that combines the dataset in some sort of) Tj
0 Tc 0 Tw ( ) Tj
0 -11.04 TD -0 Tc 1.9208 Tw (structure. Different datasets have different structures, but) Tj
0 Tc 0 Tw ( ) Tj
T* -0.02 Tc 1.1668 Tw (we could guess what kind of information would be useful) Tj
0 Tc 0 Tw ( ) Tj
T* -0 Tc 1.6808 Tw (for classification. It is similar to the process of choosing) Tj
0 Tc 0 Tw ( ) Tj
T* 0.0103 Tc 3.5899 Tw (relevant features) Tj
69.6 0 TD -0.0055 Tc 3.5371 Tw (. For example, we can guess that the) Tj
0 Tc 0 Tw ( ) Tj
-69.6 -11.04 TD 0.0056 Tc 1.8306 Tw (references are relevant to the topic of an article, but the) Tj
0 Tc 0 Tw ( ) Tj
0 -11.04 TD -0.0136 Tc 0.0137 Tw (relative length is not.) Tj
84 0 TD 0 Tc 0 Tw ( ) Tj
-84 -11.04 TD ( ) Tj
9.96 0 TD 0 Tc 2.6565 Tw (There have been some attempts to include additional) Tj
0 Tc 0 Tw ( ) Tj
-9.96 -11.04 TD -0.004 Tc 1.9585 Tw (information about a dataset to the standard classification) Tj
0 Tc 0 Tw ( ) Tj
0 -11.04 TD 0.0012 Tc 2.999 Tw (process based on plain) Tj
0 Tc 0 Tw ( ) Tj
104.76 0 TD -0.0027 Tc 2.9428 Tw (features. One example is using) Tj
0 Tc 0 Tw ( ) Tj
-104.76 -11.04 TD 0.0031 Tc 1.9341 Tw (references to classify technical articles and hyperlinks to) Tj
0 Tc 0 Tw ( ) Tj
0 -11.04 TD 0.0017 Tc 2.0234 Tw (classify web pages. This research shows that some links) Tj
0 Tc 0 Tw ( ) Tj
T* -0.0115 Tc 1.1067 Tw (could be confusing while others are very helpful. Another) Tj
0 Tc 0 Tw ( ) Tj
T* 0.005 Tc 1.3151 Tw (example is character recognition. The recognition p) Tj
213.72 0 TD 0.0249 Tc -0.0248 Tw (rocess ) Tj
-213.72 -11.04 TD -0.0025 Tc 0.5627 Tw (can be based not only on the shape of a character, but also) Tj
0 Tc 0 Tw ( ) Tj
0 -11.04 TD 0.0039 Tc -0.0038 Tw (on preceding characters and even preceding words. ) Tj
206.64 0 TD 0 Tc 0 Tw ( ) Tj
-206.64 -11.04 TD ( ) Tj
9.96 0 TD -0.0095 Tc 3.8325 Tw (Our attention is focused on the email classification) Tj
0 Tc 0 Tw ( ) Tj
-9.96 -11.04 TD 0.0029 Tc 0.1322 Tw (problem. Nowadays, when a typical user receives about 40) Tj
235.44 0 TD -0.0767 Tc 0 Tw (-) Tj
-235.44 -11.04 TD 0.0034 Tc 0.5967 Tw (50 email messages daily, th) Tj
111.84 0 TD 0.0049 Tc 0.4067 Tw (ere is a great need in automatic ) Tj
-111.84 -11.04 TD -0.0016 Tc 2.4874 Tw (classification systems that could sort, archive, and filter) Tj
0 Tc 0 Tw ( ) Tj
0 -11.04 TD 0.0114 Tc 0.1537 Tw (messages accurately. Typically, people work with emails as ) Tj
T* -0.0076 Tc 1.1777 Tw (with general texts and base the classification decisions on) Tj
0 Tc 0 Tw ( ) Tj
T* -0.0117 Tc 1.0518 Tw (the words that appear in the header and in t) Tj
181.32 0 TD -0.0051 Tc 0.9653 Tw (he body of an) Tj
0 Tc 0 Tw ( ) Tj
-181.32 -12.96 TD /F1 12 Tf
-0.036 Tw ( ) Tj
90 0 TD ( ) Tj
ET
53.88 132.12 144 0.6 re f
BT
197.88 129.6 TD
( ) Tj
-144 -10.08 TD /F1 8.04 Tf
-0.0026 Tc 0.0085 Tw (Copyright \251 2002, American Association for Artificial Intel) Tj
193.8 0 TD -0.0153 Tc 0.0212 Tw (ligence ) Tj
25.08 0 TD 0 Tc 0.0059 Tw ( ) Tj
-218.88 -9.12 TD -0.0148 Tc 0.0207 Tw ( \(www.aaai.org\). All rights reserved.) Tj
117.36 0 TD /F1 9.96 Tf
0 Tc 0 Tw ( ) Tj
-117.36 -10.8 TD ( ) Tj
265.68 506.64 TD 0 Tc 1.0801 Tw (email \(the) Tj
0 Tc 0 Tw ( ) Tj
44.64 0 TD /F2 9.96 Tf
0.002 Tc 1.0781 Tw (bag of words) Tj
ET
q
418.68 603.84 139.56 11.28 re h W n
BT
418.68 606.24 TD
/F1 9.96 Tf
0.0028 Tc 0.9813 Tw ( approach\). But emails have other) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 592.8 238.68 11.28 re h W n
BT
319.56 595.2 TD
/F1 9.96 Tf
-0.0095 Tc 3.6396 Tw (important sources of information, and one of them is) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 581.76 238.68 11.28 re h W n
BT
319.56 584.16 TD
/F1 9.96 Tf
0 Tc 1.5594 Tw (particularly interesting for us: the time they are received.) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 570.72 238.68 11.28 re h W n
BT
319.56 573.12 TD
/F1 9.96 Tf
0.0102 Tc 0.5899 Tw (Time can be useful even as a plain feature. For example, a) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
BT
319.56 562.08 TD
/F1 9.96 Tf
0.0345 Tc 1.1656 Tw (message rece) Tj
ET
q
373.44 559.68 184.8 11.28 re h W n
BT
373.44 562.08 TD
-0.0116 Tc 1.1584 Tw (ived in the middle of the night is probably a) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
318.72 548.64 239.52 11.28 re h W n
BT
319.56 551.04 TD
-0.0026 Tc 1.4209 Tw (junk message or has been sent from the other part of the) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 537.6 238.68 11.28 re h W n
BT
319.56 540 TD
0.0008 Tc 1.2113 Tw (world. Besides that, we could notice a pattern that a Java) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 526.56 238.68 11.28 re h W n
BT
319.56 528.96 TD
0 Tc 2.0911 Tw (newsletter is sent every Friday morning. However, more) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
BT
319.56 517.92 TD
-0.0102 Tc 0.9704 Tw (important than plain time is a temporal) Tj
ET
q
480 515.52 78.24 11.28 re h W n
BT
480 517.92 TD
-0.0296 Tc 0.8698 Tw ( sequence in which) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 504.48 238.68 11.28 re h W n
BT
319.56 506.88 TD
0.0096 Tc 3.2305 Tw (the messages arrive and/or are sent. Messages are not) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 493.44 238.68 11.28 re h W n
BT
319.56 495.84 TD
-0.0042 Tc 1.237 Tw (independent of each other. In fact, once a user has sent a) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 482.4 238.68 11.28 re h W n
BT
319.56 484.8 TD
0.0067 Tc 0.6588 Tw (message, he or she would expect to receive a reply. At the) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 471.36 238.68 11.28 re h W n
BT
319.56 473.76 TD
-0.0084 Tc 0.2605 Tw (office when a working group is discussing a problem, users ) Tj
ET
Q
BT
319.56 462.72 TD
0.0202 Tc 0.3399 Tw (are li) Tj
ET
q
340.2 460.32 218.04 11.28 re h W n
BT
340.2 462.72 TD
0.0033 Tc 0.2368 Tw (kely to receive a bunch of messages on the same topic ) Tj
ET
Q
q
319.56 449.28 238.68 11.28 re h W n
BT
319.56 451.68 TD
-0.008 Tc 6.5931 Tw (during a day or two. This information can help) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 438.24 238.68 11.28 re h W n
BT
319.56 440.64 TD
-0.0043 Tc 1.7294 Tw (classification dramatically, though only a small part of it) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 427.2 238.68 11.28 re h W n
BT
319.56 429.6 TD
-0 Tc 2.2204 Tw (has been used in previous research. Messages that form) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
BT
319.56 418.56 TD
0.0008 Tc 0.1193 Tw (threads ) Tj
-0.1367 Tc 0.1368 Tw (\223message ) Tj
71.88 0 TD -0.5376 Tc 0 Tw (\226) Tj
5.04 0 TD 0.0183 Tc 0.1018 Tw ( reply) Tj
-1.1376 Tc 1.2577 Tw (\224 ) Tj
-0.0067 Tc 0.1268 Tw (have been inve) Tj
ET
q
486.48 416.16 71.76 11.28 re h W n
BT
486.48 418.56 TD
0.0078 Tc 0.0724 Tw (stigated. We want ) Tj
ET
Q
q
319.56 405.12 238.68 11.28 re h W n
BT
319.56 407.52 TD
0 Tc 3.1463 Tw (to go further and extract all possible patterns that are) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 394.08 238.68 11.28 re h W n
BT
319.56 396.48 TD
-0.007 Tc 0.4434 Tw (present in a given email sequence and use these patterns to ) Tj
ET
Q
BT
319.56 385.44 TD
0.0117 Tc -0.0116 Tw (increase classification accuracy.) Tj
127.92 0 TD 0 Tc 0 Tw ( ) Tj
-127.92 -11.04 TD ( ) Tj
ET
q
329.52 372 228.72 11.28 re h W n
BT
329.52 374.4 TD
0.0026 Tc 1.5875 Tw (The proposed learning process can be divided into the) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
BT
319.56 363.36 TD
-0.0161 Tc 0.0162 Tw (following phases:) Tj
70.2 0 TD 0 Tc 0 Tw ( ) Tj
-52.2 -11.04 TD 0 Tc 0 Tw (1.) Tj
7.56 0 TD 0 Tc 0 Tw ( ) Tj
10.44 0 TD 0.0357 Tc -0.0356 Tw (To discover al) Tj
57.36 0 TD -0.0021 Tc 0.0022 Tw (l temporal patterns in data; ) Tj
109.44 0 TD 0 Tc 0 Tw ( ) Tj
-184.8 -11.04 TD 0 Tc 0 Tw (2.) Tj
7.56 0 TD 0 Tc 0 Tw ( ) Tj
ET
q
355.56 338.88 202.68 11.28 re h W n
BT
355.56 341.28 TD
0.0013 Tc 3.7702 Tw (To analyze the patterns and choose the most) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
BT
355.56 330.24 TD
0.003 Tc -0.0029 Tw (predictive ones;) Tj
63.48 0 TD 0 Tc 0 Tw ( ) Tj
-81.48 -11.04 TD 0 Tc 0 Tw (3.) Tj
7.56 0 TD 0 Tc 0 Tw ( ) Tj
ET
q
355.56 316.8 202.68 11.28 re h W n
BT
355.56 319.2 TD
0.012 Tc 5.4282 Tw (To employ the best patterns as generalized) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
BT
355.56 308.16 TD
0.0011 Tc -0.0009 Tw (features in the classification process.) Tj
146.28 0 TD 0 Tc 0 Tw ( ) Tj
ET
q
319.56 294.72 238.68 11.28 re h W n
BT
319.56 297.12 TD
0.0047 Tc 0.1394 Tw (As the first phase, we have developed an algorithm MINTS ) Tj
ET
Q
BT
319.56 286.08 TD
0.0047 Tc 4.6754 Tw (\(MINing Temporal Sequenti) Tj
ET
q
443.04 283.68 115.2 11.28 re h W n
BT
443.04 286.08 TD
-0.0078 Tc 4.5979 Tw (al patterns\) that can find) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 272.64 238.68 11.28 re h W n
BT
319.56 275.04 TD
-0.0049 Tc 6.4651 Tw (frequently occurring temporal patterns in an email) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 261.6 238.68 11.28 re h W n
BT
319.56 264 TD
-0.0032 Tc 1.03 Tw (sequence. The important feature of the algorithm is that it) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 250.56 238.68 11.28 re h W n
BT
319.56 252.96 TD
-0.0145 Tc 1.746 Tw (finds frequently occurring patterns consisting not only of) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
BT
319.56 241.92 TD
-0.0079 Tc 0.6347 Tw (event sequences, but also of the time intervals between the) Tj
ET
q
319.56 228.48 238.68 11.28 re h W n
BT
319.56 230.88 TD
0.0036 Tc 4.0423 Tw (events. Therefore, the approach predicts not only the) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 217.44 238.68 11.28 re h W n
BT
319.56 219.84 TD
-0.0096 Tc 1.5217 Tw (expected event in a sequence, but also when the event is) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 206.4 238.68 11.28 re h W n
BT
319.56 208.8 TD
-0 Tc 2.1726 Tw (likely to happen. The algorithm is general, so it can be) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 195.36 238.68 11.28 re h W n
BT
319.56 197.76 TD
0.0012 Tc 4.8161 Tw (applied to any domain where temporal relations are) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
BT
319.56 186.72 TD
-0.0109 Tc 2.291 Tw (present. Having found the patterns,) Tj
0 Tc 0 Tw ( ) Tj
ET
q
473.4 184.32 84.84 11.28 re h W n
BT
473.4 186.72 TD
-0.0104 Tc 2.2906 Tw (we choose the most) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
q
319.56 173.28 238.68 11.28 re h W n
BT
319.56 175.68 TD
0 Tc 0.0596 Tw (predictive ones and discard the noise. Then, we develop the ) Tj
ET
Q
q
319.56 162.24 238.68 11.28 re h W n
BT
319.56 164.64 TD
0.003 Tc 4.4772 Tw (generalized features based on pattern predictions and) Tj
0 Tc 0 Tw ( ) Tj
ET
Q
BT
319.56 153.6 TD
-0.0051 Tc 10.6132 Tw (incorporate them into the classical word) Tj
212.64 0 TD -0.0767 Tc 0 Tw (-) Tj
ET
q
535.44 151.2 22.8 11.28 re h W n
BT
535.44 153.6 TD
0.0222 Tc -0.0221 Tw (based ) Tj
ET
Q
BT
319.56 142.56 TD
0 Tc (classification.) Tj
54.96 0 TD 0 Tc 0 Tw ( ) Tj
-54.96 -11.04 TD ( ) Tj
ET
endstream
endobj
11 0 obj
14215
endobj
12 0 obj
<<
/Type /Font
/Subtype /TrueType
/Name /F2
/BaseFont /TimesNewRoman,Italic
/FirstChar 32
/LastChar 255
/Widths [ 253 337 433 518 506 831 734 216 325 325 506 674 253 337 253 277 506
506 506 506 506 506 506 506 506 506 337 337 674 674 674 506 915
614 614 662 722 614 614 722 722 337 445 662 554 831 662 722 614
722 614 506 554 722 614 819 614 554 554 397 277 433 421 506 361
506 506 445 506 445 277 506 506 277 277 445 277 722 506 506 506
506 385 385 277 506 445 662 445 445 385 397 277 397 542 783 674
674 674 674 674 674 674 674 674 674 674 674 674 674 674 674 674
674 674 674 674 674 674 674 337 674 674 674 674 674 674 674 253
674 674 674 722 674 277 506 674 734 674 506 674 337 734 674 397
554 674 674 674 578 518 253 674 674 674 506 674 674 674 674 674
674 674 674 674 674 674 674 674 674 674 674 674 674 674 674 674
674 674 674 674 674 674 674 674 674 674 674 674 674 674 674 674
674 674 674 674 674 674 674 674 674 674 674 674 674 674 674 674
674 674 674 674 674 674 674 674 674 674 674 674 674 674 674 ]
/Encoding /WinAnsiEncoding
/FontDescriptor 13 0 R
>>
endobj
13 0 obj
<<
/Type /FontDescriptor
/FontName /TimesNewRoman,Italic
/Flags 98
/FontBBox [ -250 -216 1099 1040 ]
/MissingWidth 434
/StemV 73
/StemH 73
/ItalicAngle -11
/CapHeight 891
/XHeight 446
/Ascent 891
/Descent -216
/Leading 149
/MaxWidth 916
/AvgWidth 402
>>
endobj
14 0 obj
<<
/PitStop 15 0 R
>>
endobj
15 0 obj
<<
/CC 16 0 R
>>
endobj
16 0 obj
<<
>>
endobj
17 0 obj
<< /Type /Metadata /Subtype /XML /Length 1842 >>
stream
aaai-dc02_print.doc - Microsoft Word
2002-04-03T13:42:46Z
Generalized Features: Their Application to Classification
Kiritchenko, Svetlana;Matwin, Stan
Acrobat PDFWriter 3.03 for Windows NT
SIGART/AAAI Doctoral Consortium
2002-05-24T20:58:05-07:00
2002-04-03T13:42:46Z
Generalized Features: Their Application to Classification
Kiritchenko, Svetlana;Matwin, Stan
SIGART/AAAI Doctoral Consortium
2002-05-24T20:58:05-07:00
2002-05-24T20:58:05-07:00
Generalized Features: Their Application to Classification
Kiritchenko, Svetlana;Matwin, Stan
SIGART/AAAI Doctoral Consortium
endstream
endobj
xref
0 18
0000000000 65535 f
0000000016 00000 n
0000000374 00000 n
0000000406 00000 n
0000000499 00000 n
0000000644 00000 n
0000000735 00000 n
0000001837 00000 n
0000002125 00000 n
0000003221 00000 n
0000003498 00000 n
0000017769 00000 n
0000017792 00000 n
0000018897 00000 n
0000019184 00000 n
0000019225 00000 n
0000019261 00000 n
0000019286 00000 n
trailer
<<
/Size 18
/Info 1 0 R
/Root 3 0 R
/ID[<2fb8dc10533b2f09739992a8d785160f><41c4427bfb7cc2ddf92147087cb676d9>]
>>
startxref
21213
%%EOF
1 0 obj
<<
/Creator (aaai-dc02_print.doc - Microsoft Word)
/CreationDate (D:20020403134246Z)
/Title (Generalized Features: Their Application to Classification)
/Author (Kiritchenko, Svetlana;Matwin, Stan)
/Producer (Acrobat PDFWriter 3.03 for Windows NT)
/Keywords ()
/Subject (SIGART/AAAI Doctoral Consortium)
/ModDate (D:20020528174632-07'00')
>>
endobj
3 0 obj
<<
/Pages 5 0 R
/Type /Catalog
/FICL:Enfocus 14 0 R
/Metadata 23 0 R
>>
endobj
4 0 obj
<<
/Type /Page
/Parent 5 0 R
/Resources << /Font << /F0 6 0 R /F1 8 0 R /F2 12 0 R /NewFont:43 20 0 R >> /ProcSet 2 0 R >>
/SaveStreams << /q 18 0 R /Q 19 0 R >>
/Contents [ 18 0 R 10 0 R 19 0 R 22 0 R ]
>>
endobj
18 0 obj
<< /Length 3 >>
stream
q
endstream
endobj
19 0 obj
<< /Length 3 >>
stream
Q
endstream
endobj
20 0 obj
<<
/Type /Font
/Subtype /Type1
/Name /NewFont:43
/BaseFont /Helvetica
/Encoding /MacRomanEncoding
>>
endobj
21 0 obj
0
endobj
22 0 obj
<< /Length 178 >>
stream
q 0 0 612 792 re W n BT /NewFont:43 10 Tf 0 Tr 0.0000 g 1.0000 0.0000 0.0000 1.0000 439.0700 54.0000 Tm 0 Tw 0 Tc (Doctoral Consortium 985) Tj 118.9299 -12.8830 Td () Tj ET Q
endstream
endobj
23 0 obj
<< /Type /Metadata /Subtype /XML /Length 1842 >>
stream
aaai-dc02_print.doc - Microsoft Word
2002-04-03T13:42:46Z
Generalized Features: Their Application to Classification
Kiritchenko, Svetlana;Matwin, Stan
Acrobat PDFWriter 3.03 for Windows NT
SIGART/AAAI Doctoral Consortium
2002-05-28T17:46:32-07:00
2002-04-03T13:42:46Z
Generalized Features: Their Application to Classification
Kiritchenko, Svetlana;Matwin, Stan
SIGART/AAAI Doctoral Consortium
2002-05-28T17:46:32-07:00
2002-05-28T17:46:32-07:00
Generalized Features: Their Application to Classification
Kiritchenko, Svetlana;Matwin, Stan
SIGART/AAAI Doctoral Consortium
endstream
endobj
xref
0 2
0000000000 65535 f
0000021729 00000 n
3 2
0000022087 00000 n
0000022180 00000 n
18 6
0000022409 00000 n
0000022464 00000 n
0000022519 00000 n
0000022643 00000 n
0000022662 00000 n
0000022894 00000 n
trailer
<<
/Size 24
/Info 1 0 R
/Root 3 0 R
/Prev 21213
/ID[<2fb8dc10533b2f09739992a8d785160f><4d68b580e7822c19d77b7ddb1c9bab0c>]
>>
startxref
24821
%%EOF
1 0 obj
<<
/Creator (aaai-dc02_print.doc - Microsoft Word)
/CreationDate (D:20020403134246Z)
/Title (Generalized Features: Their Application to Classification)
/Author (Kiritchenko, Svetlana;Matwin, Stan)
/Producer (Acrobat PDFWriter 3.03 for Windows NT)
/Keywords ()
/Subject (SIGART/AAAI Doctoral Consortium)
/ModDate (D:20020821210800-07'00')
>>
endobj
3 0 obj
<<
/Pages 5 0 R
/Type /Catalog
/FICL:Enfocus 14 0 R
/Metadata 41 0 R
>>
endobj
4 0 obj
<<
/Type /Page
/Parent 5 0 R
/Resources << /ColorSpace << /CS0 25 0 R /CS1 26 0 R /CS2 27 0 R >> /Font << /TT0 6 0 R /TT1 8 0 R /TT2 12 0 R /T1_0 20 0 R /T1_1 30 0 R /T1_2 32 0 R
/T1_3 33 0 R /T1_4 36 0 R >>
/ExtGState << /GS0 29 0 R /GS1 24 0 R >> /ProcSet [ /PDF /Text ] >>
/SaveStreams << /q 18 0 R /Q 19 0 R >>
/Contents 39 0 R
>>
endobj
6 0 obj
<<
/Type /Font
/Subtype /TrueType
/FontDescriptor 7 0 R
/BaseFont /TimesNewRoman,Bold
/FirstChar 32
/LastChar 255
/Widths [ 248 330 556 503 503 992 834 278 330 330 503 571 248 330 248 278 503
503 503 503 503 503 503 503 503 503 330 330 571 571 571 503 932
721 669 721 721 669 609 774 774 391 503 774 669 947 721 774 609
774 721 556 669 721 721 1000 714 721 654 330 278 330 578 503 330
503 556 443 556 443 330 503 556 278 330 548 278 819 556 503 556
556 443 391 330 556 503 736 503 503 436 391 218 391 518 774 571
571 571 571 571 571 571 571 571 571 571 571 571 571 571 571 571
571 571 571 571 571 571 571 330 571 571 571 571 571 571 571 248
571 571 571 503 571 218 503 571 744 571 503 571 330 744 571 398
548 571 571 571 578 541 248 571 571 571 503 571 571 571 571 571
571 571 571 571 571 571 571 571 571 571 571 571 571 571 571 571
571 571 571 571 571 571 571 571 571 571 571 571 571 571 571 571
571 571 571 571 571 571 571 571 571 571 571 571 571 571 571 571
571 571 571 571 571 571 571 571 571 571 571 571 571 571 571 ]
/Encoding /WinAnsiEncoding
>>
endobj
8 0 obj
<<
/Type /Font
/Subtype /TrueType
/FontDescriptor 9 0 R
/BaseFont /TimesNewRoman
/FirstChar 32
/LastChar 255
/Widths [ 253 320 400 506 506 826 773 173 333 333 466 560 253 333 253 280 506
506 506 506 506 506 506 506 506 506 280 280 560 560 560 440 920
706 666 666 720 613 560 706 720 333 386 720 600 893 720 720 573
720 666 560 600 720 733 933 733 720 613 346 266 346 466 506 320
440 506 440 506 440 320 493 506 280 280 493 280 760 506 506 506
506 333 386 280 506 493 706 493 480 440 480 173 480 546 773 560
560 560 560 560 560 560 560 560 560 560 560 560 560 560 560 560
560 560 560 560 560 560 560 333 560 560 560 560 560 560 560 253
560 560 560 506 560 173 506 560 760 560 466 560 333 760 560 386
546 560 560 560 573 453 253 560 560 560 466 560 560 560 560 560
560 560 560 560 560 560 560 560 560 560 560 560 560 560 560 560
560 560 560 560 560 560 560 560 560 560 560 560 560 560 560 560
560 560 560 560 560 560 560 560 560 560 560 560 560 560 560 560
560 560 560 560 560 560 560 560 560 560 560 560 560 560 560 ]
/Encoding /WinAnsiEncoding
>>
endobj
12 0 obj
<<
/Type /Font
/Subtype /TrueType
/FontDescriptor 13 0 R
/BaseFont /TimesNewRoman,Italic
/FirstChar 32
/LastChar 255
/Widths [ 253 337 433 518 506 831 734 216 325 325 506 674 253 337 253 277 506
506 506 506 506 506 506 506 506 506 337 337 674 674 674 506 915
614 614 662 722 614 614 722 722 337 445 662 554 831 662 722 614
722 614 506 554 722 614 819 614 554 554 397 277 433 421 506 361
506 506 445 506 445 277 506 506 277 277 445 277 722 506 506 506
506 385 385 277 506 445 662 445 445 385 397 277 397 542 783 674
674 674 674 674 674 674 674 674 674 674 674 674 674 674 674 674
674 674 674 674 674 674 674 337 674 674 674 674 674 674 674 253
674 674 674 722 674 277 506 674 734 674 506 674 337 734 674 397
554 674 674 674 578 518 253 674 674 674 506 674 674 674 674 674
674 674 674 674 674 674 674 674 674 674 674 674 674 674 674 674
674 674 674 674 674 674 674 674 674 674 674 674 674 674 674 674
674 674 674 674 674 674 674 674 674 674 674 674 674 674 674 674
674 674 674 674 674 674 674 674 674 674 674 674 674 674 674 ]
/Encoding /WinAnsiEncoding
>>
endobj
20 0 obj
<<
/Type /Font
/Subtype /Type1
/Encoding /MacRomanEncoding
/BaseFont /Helvetica
>>
endobj
24 0 obj
<<
/Type /ExtGState
/SA false
/OP false
/op false
/OPM 0
/BG2 /Default
/UCR2 /Default
/TR2 /Default
/HT /Default
/CA 1
/ca 1
/SMask /None
/AIS false
/BM /Normal
/TK true
>>
endobj
25 0 obj
/DeviceRGB
endobj
26 0 obj
/DeviceGray
endobj
27 0 obj
[
/ICCBased 28 0 R
]
endobj
28 0 obj
<< /N 4 /Alternate /DeviceCMYK /Length 389758 /Filter /FlateDecode >>
stream
HuTKtKKJI,t(4K%ҹH4J#Ғ(H
wqyy~3̙g<3 Y9El
@ ]!O-@ \+BVKK
:OX~WCaiHKL 0qY `5 ck
X] x=8 X Ŀ>.f#aPn D^{y8 dpH st:Yc xc
IV?S!:_9[YbQP~+rA
ShHht^
'0߅kYXY9Yqqpl'WzEE$%D>,^|t*K)%/`\ҫ:&D[7dplDa5|mb4,yy{e5
3⚅,t+whlA
mk
xYUH&%Ȥ
qO'Mz3KT@v[NUnn^\o]abTrtlmE]e~U+jאZ:zaqi5};CS[\_ۆwCaQ1;>L$Lz}4:%8M7l̎Χ/}XT^]X>\Ym[n!ycskkƶʷ;v{pIs0Xݯ3s&$WWW*)!$$%!e$cHNOAKIMEqƕ;KLw@YX;ؚ8^+DspfKOTCPpJ%D=++O%$*8IZ\Z^UK_wL"dx]} >9=;s_G8/̹N!Gz[<=2|B}PQzlH0Wc(Een|Pds::5&89yFT"od䳔i/ZK^&gd:fgQl
kJХeJ*+篍kj5U[ZUh0|em6]B@`PpH?QM1Msψ*iϛ.Z[JYZ)X-]RѺپw?@?5 ǖ'vNg
W3gLC#u!MMMEvAms˔FVNA̝GLwA̬,llؿsݛnͽ+!B²" 'R&k?3?4+:6oT\ұڿ6VʝoF?LT;:>::>:;eqvx^sawݥʕ'_ EFO\DKLtAnFF)F|ԭ6\`@z?m+F;LwiA hy͖)Mgw~_
@ZH_XA,"F)%/*9aZ:Q,\B^_AU2
*'[jo5[uR1uh`fm$1xJgBdrltlyyEe$feg-g#`dGbwj0TOC9; ܨݿxz6zx8IP=A!.aAxۑ ϊ}bG-ޒēx`G/Ԝq_O?0"۬խЮ˯ǰı²µŶȷӹۺ 0@RfzƏǦȾ*GcЀџҿ'LsٛFsM6+1MZ:{ T?~ò~i~L}~cbA~Dad~ty~W~O>~\/~|~`C x}%H}1X}%z}K}
{N}<_~7A~-ψ||Dz|+E|[s|z}^}wO@}-~ċ {Gu{Dz{]Ĭ{f{Zx|[]|ϕM?}R<}Ǝz]YzHħz|z={LNw{\|=>|v|ېI8z/r
z;bz'sMzd6zɬqv{D[{0>|;|yyaIy?yazYvzݮ[{^=c{ФI{R*y߄yfUy`VyyuKzZi{<{z%zȎ~+~ }͇}W0}3}HtЄ}Zk}=~zɇ }!~Єd*s}Y<9wpSwuuVrUW؈|;,뇔{RsѲ;:8q)PCV:4.8Ȅ2?UpVu9ScbփR.ՁNn U388A/ͬδz6߆өn1T\e7݀tXT)$̯̕6;eCʷˆ imw3SƀV7M
\lGNػځNāa5tNzlߴS<H6*