%PDF-1.3 1 0 obj << /Kids [ 3 0 R 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R ] /Type /Pages /Count 9 >> endobj 2 0 obj << /Title (Common Subspace for Model and Similarity\072 Phrase Learning for Caption Generation From Images) /Producer (PyPDF2) /Author (Yoshitaka Ushiku\054 Masataka Yamaguchi\054 Yusuke Mukuta\054 Tatsuya Harada) /Subject (2015 IEEE International Conference on Computer Vision) >> endobj 3 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 13 0 R /Resources << /XObject << /x8 14 0 R /x6 17 0 R /x12 20 0 R /R33 23 0 R /x10 26 0 R >> /ExtGState << /s9 29 0 R /s11 32 0 R /a0 << /CA 1 /ca 1 >> /R20 35 0 R /s5 36 0 R /s7 39 0 R >> /Font << /F2 42 0 R /R29 43 0 R /R21 47 0 R /F1 51 0 R /R39 52 0 R /R36 58 0 R /R34 62 0 R /R25 66 0 R /R23 70 0 R /R27 74 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] /ColorSpace << /R32 24 0 R >> >> /MediaBox [ 0 0 612 792 ] /Annots [ 77 0 R 78 0 R 79 0 R 80 0 R 81 0 R 82 0 R 83 0 R 84 0 R 85 0 R 86 0 R 87 0 R 88 0 R 89 0 R ] >> endobj 4 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 90 0 R /Resources << /ColorSpace << /R32 24 0 R >> /ExtGState << /R20 35 0 R /R78 91 0 R /R75 92 0 R >> /Font << /R79 93 0 R /R76 96 0 R /F2 101 0 R /R36 58 0 R /F1 102 0 R /R21 47 0 R /R23 70 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 103 0 R 104 0 R 105 0 R 106 0 R 107 0 R 108 0 R 109 0 R 110 0 R 111 0 R 112 0 R 113 0 R 114 0 R 115 0 R 116 0 R 117 0 R 118 0 R 119 0 R 120 0 R 121 0 R 122 0 R 123 0 R 124 0 R 125 0 R 126 0 R 127 0 R 128 0 R ] >> endobj 5 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 129 0 R /Resources << /ExtGState << /R20 35 0 R >> /Font << /F2 130 0 R /R21 47 0 R /R140 131 0 R /R76 96 0 R /R130 136 0 R /R126 139 0 R /R132 144 0 R /F1 147 0 R /R134 148 0 R /R136 151 0 R /R138 155 0 R /R23 70 0 R /R128 159 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 163 0 R 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R 171 0 R 172 0 R 173 0 R 174 0 R 175 0 R 176 0 R 177 0 R 178 0 R 179 0 R 180 0 R 181 0 R 182 0 R 183 0 R 184 0 R 185 0 R 186 0 R 187 0 R 188 0 R 189 0 R 190 0 R 191 0 R 192 0 R 193 0 R 194 0 R 195 0 R 196 0 R 197 0 R 198 0 R 199 0 R 200 0 R 201 0 R ] >> endobj 6 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 202 0 R /Resources << /ExtGState << /R20 35 0 R >> /Font << /R156 203 0 R /F2 208 0 R /R154 209 0 R /R21 47 0 R /R140 131 0 R /R76 96 0 R /R158 212 0 R /R130 136 0 R /R126 139 0 R /R132 144 0 R /F1 217 0 R /R134 148 0 R /R136 151 0 R /R138 155 0 R /R23 70 0 R /R128 159 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 218 0 R 219 0 R 220 0 R 221 0 R 222 0 R 223 0 R 224 0 R ] >> endobj 7 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 225 0 R /Resources << /ExtGState << /R20 35 0 R /R75 92 0 R >> /Font << /R156 203 0 R /F2 226 0 R /R154 209 0 R /R140 131 0 R /R195 227 0 R /F1 230 0 R /R76 96 0 R /R29 43 0 R /R126 139 0 R /R36 58 0 R /R134 148 0 R /R136 151 0 R /R21 47 0 R /R23 70 0 R /R128 159 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 231 0 R 232 0 R 233 0 R 234 0 R 235 0 R 236 0 R 237 0 R 238 0 R 239 0 R 240 0 R 241 0 R 242 0 R 243 0 R 244 0 R 245 0 R 246 0 R 247 0 R 248 0 R 249 0 R 250 0 R 251 0 R 252 0 R 253 0 R 254 0 R 255 0 R 256 0 R 257 0 R 258 0 R 259 0 R 260 0 R ] >> endobj 8 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 261 0 R /Resources << /XObject << /R220 262 0 R /R221 263 0 R /R222 264 0 R /R223 265 0 R /R224 266 0 R >> /ExtGState << /R20 35 0 R /R75 92 0 R >> /Font << /R154 209 0 R /R140 131 0 R /R217 267 0 R /F1 273 0 R /R76 96 0 R /F2 274 0 R /R126 139 0 R /R36 58 0 R /R136 151 0 R /R21 47 0 R /R23 70 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] /ColorSpace << /R32 24 0 R >> >> /MediaBox [ 0 0 612 792 ] /Annots [ 275 0 R 276 0 R 277 0 R 278 0 R 279 0 R 280 0 R 281 0 R 282 0 R 283 0 R 284 0 R 285 0 R 286 0 R 287 0 R 288 0 R ] >> endobj 9 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 289 0 R /Resources << /ExtGState << /R20 35 0 R >> /Font << /R21 47 0 R /F2 290 0 R /R23 70 0 R /F1 291 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 292 0 R 293 0 R 294 0 R 295 0 R 296 0 R 297 0 R 298 0 R 299 0 R 300 0 R 301 0 R 302 0 R 303 0 R 304 0 R 305 0 R 306 0 R 307 0 R 308 0 R 309 0 R 310 0 R 311 0 R 312 0 R 313 0 R 314 0 R 315 0 R 316 0 R 317 0 R 318 0 R 319 0 R 320 0 R 321 0 R 322 0 R 323 0 R 324 0 R 325 0 R 326 0 R 327 0 R 328 0 R ] >> endobj 10 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 329 0 R /Resources << /XObject << /R288 330 0 R /R289 331 0 R /R283 332 0 R /R286 333 0 R /R287 334 0 R /R284 335 0 R /R285 336 0 R >> /ExtGState << /R20 35 0 R /R75 92 0 R >> /Font << /R217 267 0 R /F2 337 0 R /R36 58 0 R /F1 338 0 R /R21 47 0 R /R23 70 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] /ColorSpace << /R32 24 0 R >> >> /MediaBox [ 0 0 612 792 ] /Annots [ 339 0 R 340 0 R 341 0 R 342 0 R 343 0 R 344 0 R 345 0 R 346 0 R 347 0 R ] >> endobj 11 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 348 0 R /Resources << /ExtGState << /R20 35 0 R >> /Font << /R21 47 0 R /R29 43 0 R /R23 70 0 R /F1 349 0 R /F2 350 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 351 0 R 352 0 R 353 0 R 354 0 R 355 0 R 356 0 R 357 0 R 358 0 R 359 0 R 360 0 R 361 0 R 362 0 R 363 0 R 364 0 R 365 0 R 366 0 R 367 0 R 368 0 R 369 0 R 370 0 R 371 0 R 372 0 R 373 0 R 374 0 R 375 0 R 376 0 R 377 0 R 378 0 R 379 0 R 380 0 R 381 0 R 382 0 R 383 0 R 384 0 R 385 0 R 386 0 R 387 0 R 388 0 R 389 0 R 390 0 R 391 0 R 392 0 R 393 0 R 394 0 R 395 0 R 396 0 R 397 0 R 398 0 R 399 0 R 400 0 R 401 0 R 402 0 R 403 0 R 404 0 R 405 0 R 406 0 R 407 0 R 408 0 R 409 0 R 410 0 R 411 0 R 412 0 R 413 0 R 414 0 R 415 0 R 416 0 R 417 0 R 418 0 R 419 0 R 420 0 R 421 0 R 422 0 R 423 0 R 424 0 R 425 0 R 426 0 R 427 0 R 428 0 R 429 0 R 430 0 R 431 0 R 432 0 R 433 0 R 434 0 R 435 0 R 436 0 R 437 0 R 438 0 R 439 0 R 440 0 R 441 0 R 442 0 R 443 0 R 444 0 R 445 0 R 446 0 R 447 0 R 448 0 R 449 0 R 450 0 R 451 0 R 452 0 R 453 0 R 454 0 R 455 0 R 456 0 R 457 0 R 458 0 R ] >> endobj 12 0 obj << /Type /Catalog /Pages 1 0 R >> endobj 13 0 obj << /Length 19318 >> stream q q q 0.10000 0 0 0.10000 0 0 cm /R20 gs 0 g q 10 0 0 10 0 0 cm BT /R21 14.34600 Tf 1 0 0 1 158.51000 675.07000 Tm [ (Common) -250.00500 (Subspace) -249.99500 (f) 24.00730 (or) -249.00400 (Model) -250.00200 (and) -249.99000 (Similarity\072) ] TJ -26.24020 -17.93980 Td [ (Phrase) -248.99100 (Lear) 14.00750 (ning) -248.98900 (f) 24.00730 (or) -249.00400 (Caption) -250.00800 (Generation) -250 (fr) 16.98910 (om) -248.99800 (Images) ] TJ /R23 11.95500 Tf -12.15000 -37.85000 Td [ (Y) 110.00400 (oshitaka) -248.98700 (Ushiku\054) -250.01200 (Masataka) -250.00800 (Y) 98.99390 (amaguchi\054) -248.99100 (Y) 110.00500 (usuk) 10.01270 (e) -248.99700 (Mukuta\054) -250.00200 (T) 78.99290 (atsuya) -249.00500 (Harada) ] TJ 118.70000 -13.95000 Td [ (The) -248.99900 (Uni) 23.98930 (v) 15.00990 (ersity) -248.98500 (of) -250 (T) 78.99290 (ok) 14.99150 (yo) ] TJ -36.35000 -13.95000 Td [ (7\0553\0551) -249.99300 (Hongo) -249.98500 (Bunk) 14.00520 (yo\055ku\054) -249.00700 (T) 78.99290 (ok) 14.99150 (yo\054) -249.01300 (Japan) ] TJ /R25 8.96600 Tf -49.33980 -11.11020 Td [ (f) -0.89994 ] TJ /R27 8.96600 Tf 4.60977 0 Td [ (ushiku\054) -598.98900 (yamaguchi\054) -600.02100 (mukuta\054) -599.99100 (harada) ] TJ /R25 8.96600 Tf 177.53000 0 Td [ (g) -0.89858 ] TJ /R27 8.96600 Tf 4.61016 0 Td (\100mi\056t\056u\055tokyo\056ac\056jp) Tj /R21 11.95500 Tf -193.89000 -41.05000 Td (Abstract) Tj /R29 9.96300 Tf -83.91990 -23.90980 Td [ (Gener) 15.00490 (ating) -307.98200 (captions) -309.01800 (to) -308.99100 (describe) -308.99900 (ima) 9.01529 (g) 9.98323 (es) -307 (is) -308.99500 (a) -309.01000 (fundamen\055) ] TJ -11.95980 -11.95000 Td [ (tal) -491.01300 (pr) 43.99820 (oblem) -491 (that) -490.98800 (combines) -491.99900 (computer) -491.01800 (vision) -492.01100 (and) -490.99900 (n) -0.99489 (atur) 14.00940 (al) ] TJ 11.96020 TL T* [ (langua) 9.00488 (g) 9.98384 (e) -414.01800 (pr) 43.99820 (ocessing) 14.98460 (\056) -804.01700 (Recent) -415.01700 (works) -415.00700 (focus) -414.98800 (on) -415.00200 (descriptive) ] TJ /R23 9.96300 Tf 11.95000 TL (phrases) ' /R29 9.96300 Tf 29.87970 0 Td [ (\054) -331.99700 (suc) 14.01850 (h) -315.00800 (as) -316.00600 (\223a) -315.99100 (white) -316.01500 (do) 8.98956 (g\224) -315.01100 (to) -316.00900 (e) 18.98990 (xplain) -314.98600 (the) -315.99600 (visual) -316.00800 (com\055) ] TJ -29.87970 -11.96020 Td [ (posites) -386.99200 (of) -385.99400 (an) -387.00700 (input) -386.98500 (ima) 9.01406 (g) 9.98323 (e) 14.99080 (\056) -719.01500 (The) -386.98200 (phr) 13.99100 (ases) -385.99600 (can) -386.99300 (not) -386.98800 (only) -385.99600 (e) 18.99120 (x\055) ] TJ T* [ (pr) 36 (ess) -266.00600 (objects\054) -271.01100 (attrib) 18.98630 (utes\054) -270.01000 (e) 14.01060 (vents\054) -271.01500 (and) -267.00700 (their) -266.98400 (r) 36.01460 (elations) -266.01400 (b) 19 (ut) -266.01800 (can) ] TJ 11.95980 TL T* [ (also) -339.99600 (r) 36.01400 (educe) -339.01600 (visual) -340.00300 (comple) 18.98630 (xity) 54.99350 (\056) -579.00500 (A) -340.01900 (caption) -340.01500 (for) -339.98100 (an) -339.99800 (input) -340.01300 (im\055) ] TJ 11.95000 TL T* [ (a) 9.00365 (g) 9.98384 (e) -304.00200 (can) -305.99100 (be) -304.99600 (g) 9.00365 (ener) 13.99220 (ated) -304.01200 (by) -306.01500 (connecting) -304.98800 (estimated) -305.98600 (phr) 13.98970 (ases) -304.99300 (us\055) ] TJ 11.96020 TL T* [ (ing) -261.99500 (a) -263.01800 (gr) 14.00450 (ammar) -260.98600 (model\056) -347.99800 (Howe) 13.98970 (ver) 111.00600 (\054) -264.99200 (because) -262.00500 (phr) 13.98970 (ases) -261.98300 (ar) 36 (e) -260.99000 (com\055) ] TJ 11.95000 TL T* [ (binations) -286.01300 (of) -286.01500 (various) -287.00600 (wor) 36.00110 (ds\054) -293.99800 (the) -286.00200 (number) -287.01400 (of) -286.01400 (phr) 13.98970 (ases) -284.99700 (is) -287 (muc) 13.98970 (h) ] TJ 11.95980 TL T* [ (lar) 36.01830 (g) 9.98384 (er) -293.98200 (than) -294.00200 (the) -295.02000 (number) -295.01200 (of) -294.99400 (single) -294.99300 (wor) 36.00110 (ds\056) -442.98700 (Consequently) 54.01820 (\054) -306.00300 (the) ] TJ T* [ (accur) 14.01790 (acy) -256.99300 (of) -259 (phr) 13.99100 (ase) -258.00400 (estimation) -258.00500 (suf) 17.00260 (fer) 10.01260 (s) -258.00300 (fr) 43.99090 (om) -256.98800 (too) -259.01500 (fe) 13.98970 (w) -257.98600 (tr) 13.99710 (aining) ] TJ 11.95000 TL T* [ (samples) -249.98700 (per) -250.00500 (phr) 13.99100 (ase) 14.99810 (\056) ] TJ 11.95980 -11.96020 Td [ (In) -613.01400 (this) -615.00500 (paper) 110.00900 (\054) -703.99900 (we) -614.01500 (pr) 43.99820 (opose) -613.01800 (a) -614.98500 (no) 8.98956 (vel) -613.00300 (phr) 13.98970 (ase\055learning) ] TJ -11.95980 -11.95000 Td [ (method\072) -289.98700 (Common) -208.99300 (Subspace) -210.01800 (for) -210.00900 (Model) -209.99600 (and) -209.99800 (Similarity) -209.00300 (\050CoS\055) ] TJ 11.95980 TL T* [ (MoS\051\056) -336.01800 (In) -336.99300 (or) 36 (der) -335.98800 (to) -336.00500 (o) 9.00304 (ver) 36.98250 (come) -336.00300 (the) -337.01100 (shorta) 8.99814 (g) 9.98323 (e) -335.01400 (of) -336.98400 (tr) 13.99710 (aining) -335.98900 (sam\055) ] TJ 11.95000 TL T* [ (ples\054) -361.01000 (CoSMoS) -339.00900 (obtains) -338.00600 (a) -339.00300 (subspace) -339.01400 (in) -338.98500 (whic) 14.01670 (h) -337.98400 (\050a\051) -338.98100 (all) -339.00600 (featur) 35.99740 (e) ] TJ 11.96020 TL T* [ (vector) 9.00610 (s) -442.98500 (associated) -444.99500 (with) -444.00200 (the) -444.98900 (same) -444.01200 (phr) 13.98970 (ase) -444.00400 (ar) 36 (e) -442.99200 (mapped) -445.01600 (as) ] TJ 11.95000 TL T* [ (mutually) -299.99700 (close) 8.99201 (\054) -312.00200 (\050b\051) -300.01000 <636c61737369026572> 9 (s) -299.01400 (for) -301.01000 (eac) 14.01060 (h) -299.01200 (phr) 13.98970 (ase) -299.01400 (ar) 36 (e) -300.00200 (learned\054) ] TJ 11.95980 TL T* [ (and) -392.98000 (\050c\051) -392.00300 (tr) 13.99770 (aining) -392.01700 (samples) -393.01700 (ar) 36 (e) -391.98400 (shar) 36.00720 (ed) -391.99700 (among) -391.98300 (co\055occurring) ] TJ 11.95000 TL T* [ (phr) 13.99040 (ases\056) -402.01800 (Experimental) -282.00300 (r) 36.01460 (esults) -280.01900 (demonstr) 13.98240 (ate) -280.00300 (that) -280.99300 (our) -281.98800 (system) ] TJ 11.96020 TL T* [ (is) -235.99000 (mor) 35.99250 (e) -236.01600 (accur) 14.01790 (ate) -234.99300 (than) -236.01500 (those) -236.98400 (in) -235.98700 (earlier) -236.99500 (work) -236.00700 (and) -237.01200 (that) -235.98300 (the) -236.99300 (ac\055) ] TJ 11.95000 TL T* [ (cur) 14.01850 (acy) -296.00400 (incr) 35.99250 (eases) -296 (when) -297.99100 (the) -297.01900 (dataset) -297.00700 (fr) 43.99210 (om) -297.01900 (the) -297.01900 (web) -298.01700 (incr) 35.99250 (eases\056) ] TJ /R21 11.95500 Tf 32.81990 TL T* [ (1\056) -250.00700 (Intr) 17.00610 (oduction) ] TJ /R23 9.96300 Tf 11.95980 -18.93010 Td [ (Object\054) -563.00500 (e) 24.00850 (v) 15.00180 (ent\054) -562.99000 (and) -501.00900 (attrib) 19.00220 (ute) -501.01600 (recognition) -501.01100 (from) -500.98400 (images) ] TJ -11.95980 -11.95000 Td [ (ha) 19.01510 (v) 15.00240 (e) -454.99000 (been) -455.98500 (widely) -456.98900 (in) 39.01520 (v) 15.00180 (estig) 4.99774 (ated\056) -927.01900 (Recently) 63.99660 (\054) -506.98100 (se) 23.99010 (v) 15.00180 (eral) -456.00600 (w) 9.01039 (orks) ] TJ 11.96020 TL T* [ (ha) 19.01510 (v) 15.00240 (e) -371.00800 (tackled) -371.01100 (the) -371.98400 (sentential) -372.01100 (description) -371.98400 (of) -372.00600 (images) -371.98400 (to) -371.99600 (more) ] TJ 11.95000 TL T* [ <0365> 14.00810 (xibly) -249.01300 (e) 14.01060 (xplain) -249 (the) -250.01000 (contents) -249.98400 (of) -249.99100 (images\056) ] TJ 11.95980 -11.95980 Td [ (In) -378.98400 (general\054) -412.99800 (collecting) -380.00700 (a) -379.98600 (lar) 17.01850 (ge) -378.98000 (amount) -380.98700 (of) -380.00500 (data) -380.00700 (from) -379.99000 (the) ] TJ -11.95980 -11.95000 Td [ (web) -463.99000 (is) -463.98200 (a) -464.00800 (common) -464.98300 (means) -463.98200 (to) -464.01800 (understand) -464.01400 (v) 23.98030 (arious) -463.98700 (images\056) ] TJ 11.96020 TL T* [ (What) -387.98100 (we) -389.01100 (can) -387.98500 (collect) -389.01400 (automatically) -388.00800 (are) -389.02000 (images) -389 (associated) ] TJ 11.95000 TL T* [ (not) -224.00300 (with) -224.01700 (semantically) -223.98500 (clear) -224.00300 (labels) -224 (b) 19 (ut) -223.00900 (with) -224.01700 (surrounding) -223.99300 (sen\055) ] TJ 11.96020 TL T* [ (tences\056) -290.01800 (Hence\054) -202.00500 (the) -189.98300 (requirements) -189.99700 (for) -189.99400 (caption) -190.01700 (generation) -190.98700 (from) ] TJ 11.95000 TL T* [ (images) -345.00900 (are\072) -499.01700 (scalability) 63.99530 (\054) -367.99000 (learning) -344.99500 (image) -344.99200 (contents\054) -369.00400 (and) -345.00200 (cap\055) ] TJ 11.95980 TL T* [ (tion) -250.01800 (generation) -249.99600 (using) -249.99000 (estimated) -249.99700 (content\056) ] TJ ET Q q 3108.20000 4521.93000 2330.47000 959.94500 re W n q 818.01700 0 0 -533.51200 3108.21000 5481.88000 cm /R33 Do Q /R32 cs 1 1 0 0 scn q 10 0 0 10 0 0 cm BT /R34 7.96080 Tf 1.01000 0 0 1 395.29300 541.57100 Tm (BabyTalk) Tj /R36 7.96080 Tf 33.62660 0 Td (\072) Tj ET Q 0 0 0 1 scn q 10 0 0 10 0 0 cm BT /R36 7.96080 Tf 1.01000 0 0 1 431.49100 541.57100 Tm ( This is a picture of three persons\054 ) Tj -35.83960 -7.96055 Td (one bottle and one diningtable\056 The first rusty ) Tj 7.96094 TL (person is beside the second person\056 The rusty ) ' (bottle is near the first rusty person\054 and within ) ' (the colorful diningtable\056 The second person is ) ' (by the third rusty person\056 The colorful dining\055) ' (table is near the first rusty person\054 and near ) ' ET Q 0 0.80078 0.94922 0 scn q 10 0 0 10 0 0 cm BT /R34 9.95100 Tf 1.00351 0 0 1 310.82000 455.27300 Tm (Ours\072) Tj /R36 9.95100 Tf 24.87750 0 Td ( Group of people sitting at a table with a dinner\056) Tj ET Q 1 1 0 0 scn q 10 0 0 10 0 0 cm BT /R34 7.96080 Tf 0.99923 0 0 1 310.82100 477.22700 Tm (Corpus\055Guided\072) Tj ET Q 0 0 0 1 scn q 10 0 0 10 0 0 cm BT /R36 7.96080 Tf 0.99923 0 0 1 366.51100 477.22700 Tm ( Three people are showing the bottle on the street\056) Tj ET Q 1 1 0 0 scn q 10 0 0 10 0 0 cm BT /R34 7.96080 Tf 0.99923 0 0 1 310.82100 467.27600 Tm (Midge\072) Tj ET Q 0 0 0 1 scn q 10 0 0 10 0 0 cm BT /R36 7.96080 Tf 0.99923 0 0 1 335.12200 467.27600 Tm ( People with a bottle at the table\056) Tj 1.01000 0 0 1 310.82100 486.68100 Tm (the second person\054 and near the third rusty person\056) Tj ET Q Q q 10 0 0 10 0 0 cm BT /R23 8.96600 Tf 1 0 0 1 308.86000 436.41000 Tm [ (Figure) -472 (1\056) -472.15000 (Qualitati) 24.02200 (v) 15.00900 (e) -470 (comparison\056) -976.00700 (A) -472.00900 (common) -471.99300 (input) -471.99800 (image) -471.00700 (is) ] TJ 10.96020 TL T* [ (sho) 23.99200 (wn) -418.00700 (in) -420.01700 (the) -420.01100 (upper) -419.99500 (left\056) -818.99800 (W) 79.01490 (e) -417.98000 (compare) -419.98900 (our) -420.02200 (result) -420.01100 (with) -419.01500 (Corpus\055) ] TJ T* [ (Guided) -250.01100 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 8.96600 Tf 1 0 0 1 340.49000 414.49000 Tm (48) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 8.96600 Tf 1 0 0 1 349.45000 414.49000 Tm [ (\135\054) -250.00900 (Midge) -249.98100 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 8.96600 Tf 1 0 0 1 385.56000 414.49000 Tm (28) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 8.96600 Tf 1 0 0 1 394.53000 414.49000 Tm [ (\135\054) -249.00700 (and) -249.98400 (BabyT) 78.98760 (alk) -249.00900 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 8.96600 Tf 1 0 0 1 457.06000 414.49000 Tm (18) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 8.96600 Tf 1 0 0 1 466.02000 414.49000 Tm (\135\056) Tj /R23 9.96300 Tf -145.20000 -33.21990 Td [ (In) -413.01700 (order) -413.98800 (to) -413.98800 (represent) -413.99800 (image) -413.01700 (contents\054) -454.98900 (such) -413.98800 (as) -413.99800 (objects\054) ] TJ -11.95980 -11.95980 Td [ (e) 24.00970 (v) 15.00180 (ents\054) -387.98600 (attrib) 19 (utes\054) -387.98600 (and) -360.99900 (their) -361.01800 (relations\054) -388.99800 (recent) -361.00900 (w) 9.01284 (orks) -359.99900 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 523.15000 369.31000 Tm (7) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 528.13000 369.31000 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 532.66000 369.31000 Tm (10) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 542.62000 369.31000 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 308.86000 357.36000 Tm (20) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 318.82000 357.36000 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 322.46000 357.36000 Tm (22) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 332.42000 357.36000 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 336.06000 357.36000 Tm (28) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 346.02000 357.36000 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 349.66000 357.36000 Tm (35) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 359.62000 357.36000 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 363.26000 357.36000 Tm (41) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 373.22000 357.36000 Tm (\054) Tj ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 376.86000 357.36000 Tm (43) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 386.82000 357.36000 Tm [ (\135) -241 (focus) -240.98900 (on) -241 (visual) -241.00400 (phrases) -240.99400 (describing) -241.00900 (im\055) ] TJ -77.95980 -11.96020 Td [ (age) -343.01500 (contents) -342.98300 (and) -343.00300 (their) -344 (relations\056) -588.99600 (F) 13.99220 (or) -342.99300 (e) 14.01180 (xample\054) -364.98800 (in) -342.98300 (order) -344.00200 (to) ] TJ 11.95000 TL T* [ (learn) -221.99100 (a) -222.99800 (general) -221.99100 (class) -223.00300 (of) -221.99800 (\223dog\054) 68.99060 (\224) -226.99700 (dogs) -222.99300 (in) -221.98800 (the) -222.99300 (follo) 24.01460 (wing) -222.01300 (phrases) ] TJ 11.96020 TL T* [ (should) -214.99200 (be) -215.99400 (considered) -215.01900 (in) -215.99000 (the) -214.99500 (same) -215.98000 (class\072) -293.00800 (\223white) -215.00900 (dog\224\054) -222.98800 (\223black) ] TJ 11.95000 TL T* [ (dog\224\054) -228.98900 (\223running) -223.00300 (dog\224\054) -228.98900 (and) -224.00800 (\223sleeping) -222.99300 (dog\056) 69.01510 (\224) -300.00200 (The) -224.00300 (semantic) -223.00300 (g) 3.98447 (ap) ] TJ 11.95980 TL T* [ (between) -349.98100 (image) -350.01100 (content) -349.98400 (can) -351.01100 (be) -350.00600 (narro) 24 (wed) -348.99600 (by) -349.99600 (learning) -350.01600 (each) ] TJ 11.95000 TL T* [ (phrase) -311.98200 (independently) 63.98430 (\054) -327.01800 (not) -313.00400 (just) -312.00900 (the) -313.01400 (single) -312.00900 (w) 9.01284 (ord) -311.01400 (\223dog\056) ] TJ /R39 9.58600 Tf 214.61000 0 Td [ <029f> 500.98000 <029f> ] TJ /R23 9.96300 Tf 14.45000 0 Td (A) Tj -229.06000 -11.96020 Td [ (caption) -343.98500 (for) -344 (an) -344.00700 (input) -344.01200 (image) -344.99200 (can) -343.99200 (then) -344.00200 (be) -344.00700 (generated) -344.00700 (by) -343.99700 (con\055) ] TJ T* [ (necting) -250.00500 (estimated) -249.99700 (phrases) -250.01200 (using) -249.98700 (a) -250.01200 (grammar) -249.98300 (model\056) ] TJ 11.95980 -12.65000 Td [ (Because) -281.99600 (phrases) -282.98800 (are) -283.00300 (combinations) -281.98100 (of) -283.00500 (objects\054) -291.00800 (attrib) 19 (utes\054) ] TJ -11.95980 -11.95980 Td [ (and) -218.98900 (e) 24.00970 (v) 15.00180 (ents\054) -224.02000 (a) -217.97900 (lar) 16 (ge) -217.01400 (number) -218.98900 (of) -219.01800 (phrases) -218.99900 (should) -218.98900 (be) -219.01300 (learned) -217.99400 (and) ] TJ T* [ (recognized\056) -425.01000 (Therefore\054) -298.01200 (the) -288 (number) -287.99400 (of) -289.00400 (training) -287.98400 (samples) -288.99900 (per) ] TJ 11.96020 TL T* [ (phrase) -321 (is) -322.01200 (much) -321.98200 (less) -322.01400 (than) -322.00900 (that) -320.99700 (for) -322.00700 (the) -321.99200 (usual) -321.98700 (object) -322.01700 (recogni\055) ] TJ 11.95000 TL T* [ (tion\056) -515.98900 (Recent) -318.98500 (lar) 17.01850 (ge\055scale) -318 (visual) -318.98800 <636c6173736902636174696f6e> -319.00700 (is) -318.99300 (tackled) -318.98300 (us\055) ] TJ 11.96020 TL T* [ (ing) -301.00600 (a) -300.00200 (combination) -300.98200 (of) -301.00100 (high\055dimensional) -300.99600 (image) -301.00100 (features) -300.98700 (and) ] TJ 11.95000 TL T* [ (linear) -268.00300 (weight) -266.98900 (v) 13.98240 (ector) -267.00100 (as) -267.98900 <636c6173736902657273> -267.98900 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 447.66000 177.34000 Tm (37) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 457.63000 177.34000 Tm [ (\135) -266.99400 (or) -267.98900 (using) -267.98400 (a) -268.00800 (deep) -267.98400 (con\055) ] TJ -148.77000 -11.95980 Td [ (v) 19 (olutional) -295.00800 (neural) -295.99000 (netw) 8.98833 (ork) -295.01700 (\133) ] TJ ET Q 0 1 0 rg q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 418.68000 165.38000 Tm (17) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R23 9.96300 Tf 1 0 0 1 428.64000 165.38000 Tm [ (\135\056) -448 (T) 78.99340 (o) -295.01200 (adopt) -295.98800 (these) -296.00200 (methods) -296.00200 (to) ] TJ -119.78000 -11.95000 Td [ (learn) -221.01100 (phrases\054) -226.98500 (ho) 24.00480 (we) 24.98260 (v) 15.00180 (er) 40.01620 (\054) -227 (learning) -220.98400 (man) 14.00200 (y) -220.98900 (parameters) -220.98900 (using) -221.99300 (too) ] TJ 11.96020 TL T* [ (fe) 24.01950 (w) -227.01900 (training) -229.01400 (samples) -229.01100 (per) -229.00200 (phrase) -228.00200 (w) 9.01284 (ould) -228.01700 (result) -228.98200 (in) -229.00700 (o) 13.98240 (v) 15.00180 (er) 20.02040 <2d027474696e672e> ] TJ 11.95980 -12.65000 Td [ (In) -356.01000 (order) -357.01900 (to) -356 (o) 13.98240 (v) 15.00180 (ercome) -356.00200 (the) -357.00400 (shortage) -356.99000 (of) -356.01000 (training) -356.99000 (samples\054) ] TJ -11.95980 -11.95000 Td [ (usage) -414.01200 (of) -415.01700 (a) -414.01700 (subspace) -413.99000 (is) -415.01200 (a) -414.01700 (reasonable) -414.98800 (w) 9.01284 (ay) -413.01300 (to) -415.00700 (approximate) ] TJ T* [ <636c6173736902657273> -352.01300 (for) -351.99800 (phrases\056) -616.00200 (T) 33.98320 (raditional) -350.99600 (multi) 24 (v) 25 (ariable) -350.99100 (methods\054) ] TJ 11.95000 TL T* [ (such) -332.98300 (as) -334.01400 (linear) -333.99000 (discriminant) -333.01400 (analysis\054) -355.01000 (can) -333.01400 (absorb) -334.00400 (the) -333.99000 (short\055) ] TJ 11.95980 TL T* [ (age) -320 (of) -320.99700 (training) -320.01500 (samples) -320.99200 (by) -320.00200 (reducing) -320.99200 (the) -319.99300 (dimension) -321.01200 (of) -320.99700 (fea\055) ] TJ -13.74020 -29.88980 Td (1) Tj ET Q Q Q q q 1 1 1 rg /a0 gs 48.40600 786.42200 515.18800 -52.69900 re f q /s5 gs /x6 Do Q q /s7 gs /x8 Do Q q /s9 gs /x10 Do Q q /s11 gs /x12 Do Q Q Q Q q 1 0 0 1 0 0 cm BT /F1 12 Tf 14.40000 TL ET 1 1 1 rg n 270 47 72 14 re f* 0.50000 0.50000 0.50000 rg BT /F2 9 Tf 10.80000 TL ET BT 1 0 0 1 297 50 Tm (2668) Tj T* ET Q endstream endobj 14 0 obj << /Filter /FlateDecode /Resources << /ExtGState << /a0 << /CA 1 /ca 1 >> >> /XObject << /x18 15 0 R >> >> /Length 28 /Group << /Type /Group /S /Transparency /CS /DeviceRGB /I true >> /BBox [ 78 746 96 765 ] /Type /XObject /Subtype /Form >> stream x+O4PH/VЯ0Pp 0 endstream endobj 15 0 obj << /Filter /FlateDecode /Resources 16 0 R /Length 107 /Type /XObject /BBox [ 78 746 96 765 ] /Subtype /Form >> stream xe AC̬wʠ =p,?]%+H-
Jc "82w8VSnGW;"
endstream
endobj
16 0 obj
<<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
>>
endobj
17 0 obj
<<
/Filter /FlateDecode
/Resources <<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
/XObject <<
/x15 18 0 R
>>
>>
/Length 28
/Group <<
/Type /Group
/S /Transparency
/CS /DeviceRGB
/I true
>>
/BBox [ 67 752 84 775 ]
/Type /XObject
/Subtype /Form
>>
stream
x+O4PH/VЯ04Up
0
endstream
endobj
18 0 obj
<<
/Filter /FlateDecode
/Resources 19 0 R
/Length 228
/Type /XObject
/BBox [ 67 752 84 775 ]
/Subtype /Form
>>
stream
xeQKn!s ?FPav6R٪TS.
b];15YyR
{7QL.\:Rv/x9l+L7h%1!}i/AI(kz"U&,YO![R hg{3}4/GyYF:!w}Gn+'xJcO9i뽼_-:`
endstream
endobj
19 0 obj
<<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
>>
endobj
20 0 obj
<<
/Filter /FlateDecode
/Resources <<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
/XObject <<
/x24 21 0 R
>>
>>
/Length 28
/Group <<
/Type /Group
/S /Transparency
/CS /DeviceRGB
/I true
>>
/BBox [ 133 751 479 772 ]
/Type /XObject
/Subtype /Form
>>
stream
x+O4PH/VЯ02Qp
0
endstream
endobj
21 0 obj
<<
/Filter /FlateDecode
/Resources 22 0 R
/Length 53008
/Type /XObject
/BBox [ 133 751 479 772 ]
/Subtype /Form
>>
stream
xlK8,8?DKs9mavd
{f-8*2Y@H >חk}y}uf`v)_s}1z#*Gw_gX jow\o'1c|Z^G<
A̺X}ay?IT|y~L.[ {Ȟb\3-3]_'X\竵0{+_۾oY-wj+ B;)Aa=/
1~a(>}m_K' >*Q1:?OPnGsQ_`?mzN
?WBB!lU n'S 1Ipzχ1xF_09,}?e){:̹~Gu7g {0mD`pdS)>;ys`Pµx'O
+혝 oI~!p7̵&m=dC+>5ALV"I1"S*CKha#03L4|@p灟vMRQ==*/m"r>@uqo9ͻ;Vz 'ȳ;IBzWR~ihwa8PlT>]fZ8y52)r빃W{B7G/q)}M]elS(k=5^A] сڸc'w!Ll'M=Ȥ뎓z;#կN>\q6r>|8}}Lr$Mo5:>syls+Ǵ`C?/9{Y||jD u#i^\rJ8a|oY+Rxl<{[؏s迨nYaյ[?/^u;+ ,bhL}wrj:ɴdM8^q|m'aQ6;0k_ar|E8HɃpN('&r`V;Rs볩Nk\&W=DDG#5]~M{VyCSRH9ܜXˏt7߾i9ADگ
USڝ|{(hbIRZXg @6Hɹ0Dq;si\Tv<1?mUŏV7bF
p7Y9sƨ*.uz_LբřsB݃ɹ^'_Yae91f+n%A̶d
Ċc#q
CF=Mt'\&ξ)z;L-4:n}41j:mӶ_)\݇ |T@uAƓxAN6Ql;}Ɲ{~_y"g%]nCi']I~,xa?feoɌr0u?V=phldx.Y;~nZL603> \`ęܓ