%PDF-1.3 1 0 obj << /Kids [ 3 0 R 4 0 R 5 0 R 6 0 R 7 0 R 8 0 R 9 0 R 10 0 R 11 0 R ] /Type /Pages /Count 9 >> endobj 2 0 obj << /Title (Unsupervised Learning From Narrated Instruction Videos) /Producer (PyPDF2) /Author (Jean\055Baptiste Alayrac\054 Piotr Bojanowski\054 Nishant Agrawal\054 Josef Sivic\054 Ivan Laptev\054 Simon Lacoste\055Julien) /Subject (2016 IEEE Conference on Computer Vision and Pattern Recognition) >> endobj 3 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 13 0 R /Resources << /XObject << /x8 14 0 R /x6 17 0 R /x12 20 0 R /x10 23 0 R >> /ExtGState << /s9 26 0 R /s11 29 0 R /R15 32 0 R /a0 << /CA 1 /ca 1 >> /s5 33 0 R /s7 36 0 R >> /Font << /F2 39 0 R /F1 40 0 R /R16 41 0 R /R18 45 0 R /R20 49 0 R /R22 53 0 R /R24 57 0 R /R26 61 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 64 0 R 65 0 R 66 0 R 67 0 R 68 0 R 69 0 R ] >> endobj 4 0 obj << /Parent 1 0 R /Rotate 0 /Contents 70 0 R /Resources << /XObject << /R89 71 0 R /R88 74 0 R /R83 75 0 R /R82 76 0 R /R81 77 0 R /R80 78 0 R /R87 79 0 R /R86 80 0 R /R85 81 0 R /R84 82 0 R /R101 83 0 R /R100 84 0 R /R102 85 0 R /R78 86 0 R /R79 87 0 R /R72 88 0 R /R73 89 0 R /R70 90 0 R /R76 91 0 R /R77 92 0 R /R74 93 0 R /R75 94 0 R /R98 95 0 R /R99 96 0 R /R90 97 0 R /R91 98 0 R /R92 99 0 R /R93 100 0 R /R94 101 0 R /R95 102 0 R /R96 103 0 R /R97 104 0 R /R65 105 0 R /R64 106 0 R /R67 107 0 R /R66 108 0 R /R63 109 0 R /R62 110 0 R /R69 111 0 R /R68 112 0 R >> /ExtGState << /R60 72 0 R /R15 32 0 R >> /Font << /R18 45 0 R /F2 114 0 R /F1 115 0 R /R16 41 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /Group 113 0 R /MediaBox [ 0 0 612 792 ] /Annots [ 116 0 R 117 0 R 118 0 R 119 0 R 120 0 R 121 0 R 122 0 R 123 0 R 124 0 R 125 0 R 126 0 R 127 0 R 128 0 R 129 0 R 130 0 R 131 0 R 132 0 R 133 0 R 134 0 R 135 0 R 136 0 R 137 0 R 138 0 R 139 0 R 140 0 R 141 0 R 142 0 R ] /Type /Page >> endobj 5 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 143 0 R /Resources << /ExtGState << /R15 32 0 R >> /Font << /F2 144 0 R /F1 145 0 R /R130 146 0 R /R126 149 0 R /R124 153 0 R /R122 157 0 R /R16 41 0 R /R26 61 0 R /R18 45 0 R /R22 53 0 R /R128 161 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 164 0 R 165 0 R 166 0 R 167 0 R 168 0 R 169 0 R 170 0 R 171 0 R 172 0 R 173 0 R ] >> endobj 6 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 174 0 R /Resources << /ExtGState << /R15 32 0 R >> /Font << /R156 175 0 R /R162 178 0 R /R160 183 0 R /R166 188 0 R /R164 192 0 R /F1 196 0 R /F2 197 0 R /R158 198 0 R /R130 146 0 R /R126 149 0 R /R124 153 0 R /R122 157 0 R /R16 41 0 R /R18 45 0 R /R22 53 0 R /R128 161 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 201 0 R 202 0 R 203 0 R 204 0 R 205 0 R 206 0 R 207 0 R 208 0 R 209 0 R 210 0 R 211 0 R 212 0 R 213 0 R 214 0 R 215 0 R 216 0 R 217 0 R 218 0 R 219 0 R ] >> endobj 7 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 220 0 R /Resources << /ExtGState << /R15 32 0 R >> /Font << /R162 178 0 R /R160 183 0 R /F1 221 0 R /F2 222 0 R /R126 149 0 R /R124 153 0 R /R122 157 0 R /R16 41 0 R /R18 45 0 R /R22 53 0 R /R128 161 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 223 0 R 224 0 R 225 0 R 226 0 R 227 0 R 228 0 R 229 0 R 230 0 R 231 0 R 232 0 R 233 0 R 234 0 R ] >> endobj 8 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 235 0 R /Resources << /ExtGState << /R15 32 0 R >> /Font << /R210 236 0 R /R212 239 0 R /R206 243 0 R /R204 246 0 R /R208 249 0 R /R130 146 0 R /R126 149 0 R /R124 153 0 R /R122 157 0 R /R128 161 0 R /R156 175 0 R /R162 178 0 R /R160 183 0 R /R166 188 0 R /R158 198 0 R /F2 253 0 R /F1 254 0 R /R16 41 0 R /R18 45 0 R /R20 49 0 R /R22 53 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 255 0 R 256 0 R 257 0 R 258 0 R 259 0 R 260 0 R 261 0 R 262 0 R 263 0 R 264 0 R 265 0 R 266 0 R 267 0 R 268 0 R 269 0 R 270 0 R ] >> endobj 9 0 obj << /Parent 1 0 R /Rotate 0 /Contents 271 0 R /Resources << /XObject << /R237 272 0 R /R236 276 0 R /R239 277 0 R /R240 278 0 R /R238 279 0 R >> /ExtGState << /R235 273 0 R /R234 274 0 R /R60 72 0 R /R15 32 0 R >> /Font << /F2 280 0 R /R160 183 0 R /F1 281 0 R /R130 146 0 R /R126 149 0 R /R124 153 0 R /R122 157 0 R /R16 41 0 R /R18 45 0 R /R22 53 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /Group 275 0 R /MediaBox [ 0 0 612 792 ] /Annots [ 282 0 R 283 0 R 284 0 R 285 0 R 286 0 R 287 0 R 288 0 R 289 0 R 290 0 R 291 0 R 292 0 R 293 0 R 294 0 R 295 0 R 296 0 R ] /Type /Page >> endobj 10 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 297 0 R /Resources << /XObject << /R254 298 0 R >> /ExtGState << /R15 32 0 R >> /Font << /F2 299 0 R /R126 149 0 R /F1 300 0 R /R122 157 0 R /R16 41 0 R /R18 45 0 R /R22 53 0 R >> /ProcSet [ /Text /ImageC /ImageB /PDF /ImageI ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 301 0 R 302 0 R 303 0 R 304 0 R 305 0 R 306 0 R 307 0 R ] >> endobj 11 0 obj << /Parent 1 0 R /Rotate 0 /Type /Page /Contents 308 0 R /Resources << /ExtGState << /R15 32 0 R >> /Font << /F2 309 0 R /F1 310 0 R /R16 41 0 R /R18 45 0 R /R22 53 0 R /R26 61 0 R >> /ProcSet [ /ImageC /Text /PDF /ImageI /ImageB ] >> /MediaBox [ 0 0 612 792 ] /Annots [ 311 0 R 312 0 R 313 0 R 314 0 R 315 0 R 316 0 R 317 0 R 318 0 R 319 0 R 320 0 R 321 0 R 322 0 R 323 0 R 324 0 R 325 0 R 326 0 R 327 0 R 328 0 R 329 0 R 330 0 R 331 0 R 332 0 R 333 0 R 334 0 R 335 0 R 336 0 R 337 0 R 338 0 R 339 0 R 340 0 R 341 0 R 342 0 R 343 0 R 344 0 R 345 0 R 346 0 R 347 0 R 348 0 R 349 0 R 350 0 R 351 0 R 352 0 R 353 0 R 354 0 R 355 0 R 356 0 R 357 0 R 358 0 R 359 0 R 360 0 R 361 0 R 362 0 R 363 0 R 364 0 R ] >> endobj 12 0 obj << /Type /Catalog /Pages 1 0 R >> endobj 13 0 obj << /Length 21018 >> stream q q q 0.10000 0 0 0.10000 0 0 cm /R15 gs 0 g q 10 0 0 10 0 0 cm BT /R16 14.34620 Tf 1 0 0 1 120.66100 675.78400 Tm [ (Unsuper) 10 (vised) -249.98900 (Lear) 14.98930 (ning) -249.99300 (fr) 18.00650 (om) -250.00800 (Narrated) -249.99300 (Instruction) -250.00800 (V) 37.01370 (ideos) ] TJ /R18 11.95520 Tf -34.61680 -37.30000 Td [ (Jean\055Baptiste) -250.00800 (Alayrac) ] TJ /R20 7.97010 Tf 105.24000 4.33906 Td [ <03> -219.31300 (y) -0.19911 ] TJ /R18 11.95520 Tf 34.98010 -4.33906 Td [ (Piotr) -250.01200 (Bojano) 24.98750 (wski) ] TJ /R20 7.97010 Tf 83.06410 4.33906 Td [ <03> -0.30019 ] TJ /R18 11.95520 Tf 29.00200 -4.33906 Td [ (Nishant) -249.98500 (Agra) 14.99740 (w) 9.99826 (al) ] TJ /R20 7.97010 Tf 84.03280 4.33906 Td [ <03> -219.31300 (z) -0.20217 ] TJ /R18 11.95520 Tf 34.98010 -4.33906 Td [ (Josef) -249.99300 (Si) 24.99160 (vic) ] TJ /R20 7.97010 Tf 51.83710 4.33906 Td [ <03> -0.30019 ] TJ /R18 11.95520 Tf -315.59600 -24.20700 Td [ (Iv) 25.00280 (an) -250.00800 (Lapte) 25.00590 (v) ] TJ /R20 7.97010 Tf 56.83520 4.33906 Td [ <03> -0.30019 ] TJ /R18 11.95520 Tf 47.28400 -4.33906 Td [ (Simon) -249.98700 (Lacoste\055Julien) ] TJ /R20 7.97010 Tf 103.93700 4.33906 Td [ (y) -0.19911 ] TJ /R16 11.95520 Tf -255.64500 -45.21720 Td (Abstract) Tj /R22 9.96260 Tf -83.92770 -24.04180 Td [ (W) 91.98650 (e) -408.01500 (addr) 36.99510 (ess) -408.00300 (the) -408.00300 (pr) 44.98390 (oblem) -407.99400 (of) -407.99800 (automatically) -407.99600 (learning) -408.01300 (the) ] TJ -11.95510 -11.47700 Td [ (main) -320.00500 (steps) -320.00500 (to) -320.01300 (complete) -320.01000 (a) -319.98100 (certain) -320.00300 (task\054) -338.00600 (suc) 14.98520 (h) -319.98100 (as) -320.01300 (c) 15.01220 (hanging) -319.98800 (a) ] TJ 11.47700 TL T* [ (car) -257.98900 (tir) 36.99440 (e) 9.99404 (\054) -259.01300 (fr) 44.98640 (om) -257.98900 (a) -257.99100 (set) -257.98800 (of) -257.00400 (narr) 15 (ated) -257.98400 (instruction) -258.01800 (videos\056) -332.98600 (The) -257.98100 (con\055) ] TJ T* [ (trib) 19.98320 (utions) -309.99100 (of) -310.99500 (this) -310 (paper) -310.01000 (ar) 36.98520 (e) -311.01200 (thr) 36.99260 (ee\055fold\056) -491.00100 (F) 45.01700 (ir) 10.01060 (st\054) -324.99000 (we) -310.01700 (de) 15.01710 (velop) -311 (a) ] TJ T* [ (ne) 15.01770 (w) -322.01100 (unsupervised) -321.98700 (learning) -321.98900 (appr) 44.99370 (oac) 14.98400 (h) -321.98100 (that) -322 (tak) 10.00570 (es) -321.98300 (advanta) 9.98608 (g) 10.00320 (e) ] TJ T* [ (of) -281 (the) -281.00400 (complementary) -280.98400 (natur) 37.00240 (e) -279.99700 (of) -281 (the) -281.00500 (input) -280.98300 (video) -281.00500 (and) -280.99700 (the) -281.00500 (as\055) ] TJ T* [ (sociated) -248.99000 (narr) 15 (ation\056) -309.01500 (The) -249.00300 (method) -248.99600 (solves) -247.98200 (two) -248.99300 (clustering) -249.01700 (pr) 44.98510 (ob\055) ] TJ 11.47770 TL T* [ (lems\054) -346.98700 (one) -326.99900 (in) -328.01100 (te) 20 (xt) -327.00200 (and) -328.00900 (one) -326.99800 (in) -328.01100 (video\054) -346.01000 (applied) -327.99400 (one) -327 (after) -328.00200 (eac) 15.01470 (h) ] TJ T* [ (other) -296.01000 (and) -296.01400 (link) 10.01300 (ed) -295.98900 (by) -295.98900 (joint) -295.99700 (constr) 15.00240 (aints) -295.99800 (to) -296.01700 (obtain) -297.01400 (a) -295.98500 (single) -296.00200 (co\055) ] TJ 11.47700 TL T* [ (her) 36.98100 (ent) -209.01600 (sequence) -208.98100 (of) -209.01200 (steps) -209.00400 (in) -207.99200 (b) -1.01454 (ot) 0.99248 (h) -209.01900 (modalities\056) -296.99700 (Second\054) -217.00800 (we) -209.01400 (col\055) ] TJ T* [ (lect) -320.02000 (and) -320.01000 (annotate) -319.99000 (a) -319 (ne) 15.01830 (w) -320.01200 (c) 15.01220 (hallenging) -320.01600 (dataset) -319.99300 (of) -320.01300 (r) 37.01830 (eal\055world) ] TJ T* [ (instruction) -334.00600 (videos) -334.00800 (fr) 44.98640 (om) -334.01500 (the) -332.99600 (Internet\056) -562.00300 (The) -334.00800 (dataset) -333.99100 (contains) ] TJ T* [ (about) -335.99500 (800\054000) -335.98900 (fr) 14.99140 (ames) -336.99300 (for) -336.00400 <027665> -335.98700 (dif) 18.01660 (fer) 36.98280 (ent) -336.01500 (tasks) ] TJ ET Q 0 0 1 rg q 10 0 0 10 0 0 cm BT /R18 6.97380 Tf 1 0 0 1 230.84600 419.58700 Tm (1) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R22 9.96260 Tf 1 0 0 1 238.18100 415.97100 Tm [ (that) -335.99800 (include) ] TJ -188.06900 -11.47700 Td [ (comple) 20.00830 (x) -327.98900 (inter) 14.99870 (actions) -327.99300 (between) -328.00700 (people) -328.99000 (and) -328.00900 (objects\054) -347.99600 (and) -328.00900 (ar) 36.98650 (e) ] TJ 11.47700 TL T* [ (captur) 36.99810 (ed) -247.01700 (in) -247.00500 (a) -247.99300 (variety) -247.00500 (of) -247.00500 (indoor) -246.98500 (and) -247.00300 (outdoor) -247.99800 (settings\056) -309.00500 (Thir) 37.00610 (d\054) ] TJ T* [ (we) -392.00400 (e) 19.99240 (xperimentally) -392.01700 (demonstr) 15.01100 (ate) -392.00600 (that) -391.98800 (the) -392.00600 (pr) 44.98510 (oposed) -391.99100 (method) ] TJ T* [ (can) -245.99300 (automatically) -247.00200 (disco) 10.01670 (ver) 110.99900 (\054) -247.01500 (in) -247.00500 (an) ] TJ /R18 9.96260 Tf 134.21700 0 Td [ (unsupervised) -246 (manner) ] TJ /R22 9.96260 Tf 84.90510 0 Td [ (\054) -247.01500 (the) ] TJ -219.12200 -11.47700 Td [ (main) -355.99900 (steps) -355.01900 (to) -356.00700 (ac) 15.01830 (hie) 14.98520 (ve) -355.98800 (the) -356.01200 (tas) 0.99861 (k) -355.98500 (and) -356.00400 (locate) -356.00400 (the) -354.99200 (steps) -356 (in) -356.00700 (the) ] TJ T* [ (input) -250.00700 (videos\056) ] TJ /R16 11.95520 Tf 37.13670 TL T* [ (1\056) -249.99000 (Intr) 18.01460 (oduction) ] TJ /R18 9.96260 Tf 11.95510 -19.06020 Td [ (Millions) -447.00700 (of) -446.98300 (people) -447.01000 (w) 10 (atch) -446.98100 (narrated) -446.98700 (instruction) -447.01600 (videos) ] TJ ET Q 0 0 1 rg q 10 0 0 10 0 0 cm BT /R18 6.97380 Tf 1 0 0 1 282.37700 294.52700 Tm (2) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R18 9.96260 Tf 1 0 0 1 50.11210 279.43500 Tm [ (to) -378.98300 (learn) -378.99600 (ne) 25.01600 (w) -378.99300 (tasks) -377.99600 (such) -378.98300 (as) -378.99400 (assembling) -378.99100 (IKEA) -378.98800 (furniture) -379.02000 (or) ] TJ 11.47810 TL T* [ (changing) -429.00300 (a) -428.01100 <036174> -429.00900 (car) -429 (tire\056) -844.98600 (Man) 14.99010 (y) -428.98200 (of) -428.98600 (such) -427.99500 (tasks) -429.00700 (ha) 19.99670 (v) 14.98280 (e) -428.99200 (lar) 17.99700 (ge) ] TJ 11.47700 TL T* [ (amounts) -337.98600 (of) -337.98100 (videos) -339.01700 (a) 19.99180 (v) 24.98110 (ailable) -338.00900 (on\055line\056) -573.99700 (F) 14.99260 (or) -337.98000 (e) 15.01220 (xample\054) -361.01600 (query\055) ] TJ T* [ (ing) -262.01800 (for) -261.99700 (\223ho) 24.98170 (w) -261.99200 (to) -261.98300 (change) -261.99300 (a) -262 (tire\224) -261.99300 (results) -262.01000 (in) -261.98300 (more) -261.98600 (than) -261.98300 (300\054000) ] TJ T* [ (hits) -276.98700 (on) -275.98300 (Y) 109.98500 (ouT) 44.98700 (ube\056) -390.00500 (Most) -276.98300 (of) -277.01100 (these) -275.98300 (videos\054) -284.00400 (ho) 24.98600 (we) 25.01300 (v) 14.98280 (er) 39.98600 (\054) -283.00900 (are) -276.98600 (made) ] TJ T* [ (with) -272.01100 (the) -271.00600 (intention) -271.98900 (to) -271.98200 (teach) -271.01500 (other) -271.98400 (people) -272.01800 (to) -271.00100 (perform) -271.98600 (the) -271.98600 (task) ] TJ T* [ (and) -313.98100 (do) -312.99700 (not) -314.00900 (pro) 14.98520 (vide) -314.01400 (direct) -314.01900 (supervisory) -313.01300 (signal) -314 (for) -313.98900 (automatic) ] TJ T* [ (learning) -205.00700 (algorithms\056) -294.99900 (De) 25.01420 (v) 14.00260 (el) 0.98268 (oping) -206.01700 (unsupervised) -204.98800 (methods) -206.00700 (that) ] TJ T* [ (could) -264.99700 (learn) -265.01500 (t) 0.98758 (asks) -265.00200 (from) -265.00400 (myriads) -264.98600 (of) -263.99400 (instruction) -265.00700 (videos) -264.99000 (on) -265.00500 (the) ] TJ T* [ (Internet) -225.99200 (is) -225.98800 (therefore) -226.00800 (a) -226.00600 (k) 10.00320 (e) 15.01220 (y) -225.99700 (challenge\056) -302.01300 (Such) -225.98200 (automatic) -226.00900 (cogni\055) ] TJ ET Q 3.98000 w 0 G 501.12100 1653.69000 m 1446.11000 1653.69000 l S q 10 0 0 10 0 0 cm BT /R24 5.97760 Tf 1 0 0 1 60.14100 158.76700 Tm [ <03> -0.90058 ] TJ /R18 7.97010 Tf 4.31680 -2.81328 Td [ (WILLO) 35.02010 (W) -248.00400 (project\055team\054) -248.98100 (D) ] TJ 85.34530 0.04023 Td (\264) Tj -0.44297 -0.04023 Td [ (epartement) -248.01900 (d\047Informatique) -248.02200 (de) -248.00300 (l\047Ecole) -247.98800 (Nor) 20.01500 (\055) ] TJ -99.24800 -9.08594 Td [ (male) -249.99100 (Sup) ] TJ 30.33010 0.03984 Td (\264) Tj -0.44219 -0.03984 Td [ (erieure\054) -250.01400 (ENS\057INRIA\057CNRS) -250.02400 (UMR) -250.00400 (8548\054) -250.01900 (P) 15.01280 (aris\054) -249.98500 (France\056) ] TJ /R24 5.97760 Tf -19.47110 -6.87695 Td [ (y) -0.10006 ] TJ /R18 7.97010 Tf 3.92930 -2.81289 Td [ (SIERRA) -340.00700 (project\055team\054) -362.98100 (D) ] TJ 82.41170 0.03984 Td (\264) Tj -0.44180 -0.03984 Td [ (epartement) -340.01300 (d\047Informatique) -340.01600 (de) -339.99800 (l\047Ecole) -339.98200 (Nor) 20.01500 (\055) ] TJ -96.31600 -9.08594 Td [ (male) -249.99100 (Sup) ] TJ 30.33010 0.03984 Td (\264) Tj -0.44219 -0.03984 Td [ (erieure\054) -250.01400 (ENS\057INRIA\057CNRS) -250.02400 (UMR) -250.00400 (8548\054) -250.01900 (P) 15.01280 (aris\054) -249.98500 (France\056) ] TJ /R24 5.97760 Tf -19.47110 -6.87734 Td [ (z) -0.10006 ] TJ /R18 7.97010 Tf 3.92930 -2.81289 Td [ (IIIT) -249.97900 (Hyderabad) ] TJ /R18 5.97760 Tf 0.10000 -6.80195 Td (1) Tj /R18 7.97010 Tf 3.48672 -2.81289 Td [ (Ho) 24.99650 (w) -311.99100 (to) -312.99200 (\072) -435.01700 (change) -311.97700 (a) -312.98000 (car) -311.97500 (tire\054) -328 (perform) -313.00900 (CardioPulmonary) -312.00900 (resuscitation) ] TJ -17.93280 -9.08594 Td [ (\050CPR\051\054) -250.00200 (jump) -249.98200 (a) -250 (car) 40.02390 (\054) -250.01300 (repot) -250.00200 (a) -250 (plant) -250.01100 (and) -249.97800 (mak) 9.98605 (e) -250 (cof) 25.00650 (fee) ] TJ /R18 5.97760 Tf 14.44610 -6.80234 Td (2) Tj /R18 7.97010 Tf 3.48672 -2.81289 Td [ (Some) -264.02100 (instruction) -262.99600 (videos) -264.00400 (on) -263.02100 (Y) 109.99500 (ouT) 44.99080 (ube) -263.99500 (ha) 19.99050 (v) 14.98370 (e) -264.01800 (tens) -262.99800 (of) -264.02100 (millions) -262.99800 (of) -264.02100 (vie) 24.99580 (ws\054) ] TJ -17.93280 -9.08594 Td (e\056g\056) Tj ET Q 0 0 1 rg q 10 0 0 10 0 0 cm BT /R26 7.97010 Tf 1 0 0 1 63.61290 81 Tm [ (www\056youtube\056com\057watch\077v\075J4\055) -62.98110 (GRH2nDvw) ] TJ ET Q 0 g q 10 0 0 10 0 0 cm BT /R18 7.97010 Tf 1 0 0 1 231.48400 81 Tm (\056) Tj /R18 9.96260 Tf 77.37810 496.73800 Td [ (ti) 24.99090 (v) 14.98280 (e) -260 (ability) -260 (w) 10 (ould) -260.01300 (enable) -259.99100 (constructing) -261.01500 (virtual) -260.01100 (assistants) -259.99100 (and) ] TJ 11.47700 TL T* [ (smart) -324.98700 (robots) -325.01900 (that) -324.99000 (learn) -325.00500 (ne) 25.01540 (w) -325 (skills) -325.98500 (from) -324.99500 (the) -324.99500 (Internet) -324.99500 (to\054) -344.00600 (for) ] TJ T* [ (e) 15.01220 (xample\054) -238.99700 (help) -235.98800 (people) -235.98500 (achie) 25.01540 (v) 14.98280 (e) -236.00500 (ne) 25.01540 (w) -235.99500 (tasks) -237 (i) 0.98513 (n) -237.01400 (unf) 10.00320 (amiliar) -236.01900 (situa\055) ] TJ (tions\056) ' 11.95510 -12.55200 Td [ (In) -241.01600 (this) -240.01400 (w) 10 (ork\054) -243.00800 (we) -240.98200 (consider) -241.00400 (instruct) 1.00964 (ion) -240.99900 (videos) -240.99400 (and) -241.01400 (de) 25.01540 (v) 14.98280 (elop) ] TJ -11.95510 -11.47700 Td [ (a) -357.98400 (method) -359.00900 (that) -358.00400 (learns) -358.01100 (a) -359.00400 (sequence) -358.01400 (of) -358.01900 (steps\054) -385.99000 (as) -358.01900 (well) -358.00400 (as) -358.99900 (their) ] TJ T* [ (te) 14.98030 (xtual) -219.98300 (and) -219.99800 (visual) -220.99500 (representations\054) -226.00600 (required) -219.99800 (to) -219.99300 (achie) 25.01540 (v) 14.98280 (e) -220.00700 (a) -220.98800 (cer) 19.98690 (\055) ] TJ T* [ (tain) -332.00800 (task\056) -556.00800 (F) 14.99260 (or) -331.98100 (e) 15.01220 (xample\054) -351.99800 (gi) 24.98600 (v) 14.98280 (en) -331.98400 (a) -331.98900 (set) -332.01300 (of) -331.98400 (narrated) -331.98400 (instruction) ] TJ 11.47810 TL T* [ (videos) -276.00800 (demonstrating) -276.01800 (ho) 24.98600 (w) -277.00800 (to) -275.98300 (change) -275.99300 (a) -275.99800 (car) -276.98300 (tire\054) -281.98200 (our) -277.00800 (method) ] TJ 11.47700 TL T* [ (automatically) -255.00900 (disco) 14.99750 (v) 14.98280 (ers) -255.01900 (consecuti) 25 (v) 14.98280 (e) -256.00100 (steps) -254.99700 (for) -255.01600 (this) -254.99200 (task) -255.98200 (such) ] TJ (as) ' /R22 9.96260 Tf 12.26290 0 Td [ (loosen) -397.98800 (the) -398.00500 (nuts) -397.99000 (of) -398 (the) -396.98300 (wheel) ] TJ /R18 9.96260 Tf 117.78200 0 Td (\054) Tj /R22 9.96260 Tf 6.82305 0 Td [ (jac) 20.00650 (k) -398.01700 (up) -398.00200 (the) -398.00200 (car) ] TJ /R18 9.96260 Tf 63.70700 0 Td (\054) Tj /R22 9.96260 Tf 6.82383 0 Td [ (r) 37.01830 (emo) 10.00320 (ve) ] TJ -207.39900 -11.47700 Td [ (the) -299.98200 (spar) 36.99630 (e) -299.99400 (tir) 36.99380 (e) ] TJ /R18 9.96260 Tf 56.38200 0 Td [ (and) -299.98400 (so) -300.01600 (on) -300.01900 (as) -299.98900 (illustrated) -301.00900 (in) -300.01900 (Figure) ] TJ ET Q 0 0 1 rg q 10 0 0 10 0 0 cm BT /R18 9.96260 Tf 1 0 0 1 501.29400 450.41500 Tm (1) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R18 9.96260 Tf 1 0 0 1 506.27500 450.41500 Tm [ (\056) -459.99700 (In) -299.98900 (addi\055) ] TJ -197.41300 -11.47700 Td [ (tion\054) -315.99900 (the) -301.98100 (method) -303.01800 (learns) -301.98100 (the) -302.99800 (visual) -302.98400 (and) -301.98400 (linguistic) -302.99400 (v) 24.98110 (ariability) ] TJ 11.47730 TL T* [ (of) -249.99500 (these) -249.98800 (steps) -250.01700 (from) -249.98800 (natural) -249.98300 (videos\056) ] TJ 11.95510 -12.55200 Td [ (Disco) 14.99500 (v) 14.98280 (ering) -217.01300 (k) 10.00320 (e) 15.01220 (y) -217.01800 (steps) -217.00300 (from) -217.01300 (instruction) -217.01300 (videos) -218.01800 (is) -217.00800 (a) -216.98800 (highly) ] TJ -11.95510 -11.47700 Td [ (challenging) -301.01300 (task\056) -465.00300 (First\054) -313.99200 (linguistic) -300.99400 (e) 15.01220 (xpressions) -301.98900 (for) -301.00900 (the) -301.97900 (same) ] TJ T* [ (step) -348.98600 (can) -349.98300 (ha) 19.99670 (v) 14.98280 (e) -349.00500 (high) -349.99800 (v) 24.98110 (ariability) -349.00500 (across) -350.01500 (videos\054) -373.99200 (for) -349.98100 (e) 15.01220 (xample\072) ] TJ T* [ (\223\056\056\056Loosen) -199.00100 (up) -199.01600 (the) -199.01800 (wheel) -199.01800 (nut) -199.00600 (just) -199.00100 (a) -198.99100 (little) -200.01600 (before) -198.99100 (you) -199.01100 (start) -199.01100 (jack\055) ] TJ T* [ (ing) -215.00600 (the) -215.99600 (car) 54.99820 (\056\056\056) 69.99570 (\224) -297.99400 (and) -215.01800 (\223\056\056\056Start) -216.01800 (to) -215.01300 (loosen) -214.99800 (the) -215.99300 (lug) -215.00300 (nuts) -214.99800 (just) -216.01800 (enough) ] TJ T* [ (to) -191.01500 (mak) 10.01060 (e) -190.99300 (them) -191.00700 (easy) -192.00500 (to) -191.01500 (turn) -191.01200 (by) -191.01700 (hand\056\056\056) 70.00790 (\224\056) -289.99100 (Second\054) -203 (the) -191.99700 (visual) -191.00200 (ap\055) ] TJ T* [ (pearance) -304.98300 (of) -305.00500 (each) -305.99600 (step) -304.99300 (v) 24.98110 (aries) -305.00800 (greatly) -304.99300 (between) -305.00300 (videos) -306.00300 (as) -305.00800 (the) ] TJ T* [ (people) -239.98400 (and) -239.99400 (objects) -240.01600 (are) -240.01100 (dif) 24.98600 (ferent\054) -241.99900 (the) -239.98900 (action) -239.98400 (is) -239.98400 (captured) -239.99400 (from) ] TJ T* [ (a) -321.01000 (dif) 24.98600 (ferent) -321.00800 (vie) 24.98360 (wpoint\054) -337.98800 (and) -321 (the) -320.99500 (w) 10.00320 (ay) -321.00500 (people) -320.99100 (perform) -320.99500 (actions) ] TJ T* [ (also) -330.00900 (v) 24.98110 (ary) 65.00630 (\056) -549.00400 (Finally) 65.01120 (\054) -350.01800 (there) -329.98400 (is) -330.00900 (also) -328.98900 (a) -329.98900 (v) 24.98110 (ariability) -329.98900 (of) -329.98400 (the) -330.01400 (o) 14.98280 (v) 14.98280 (erall) ] TJ T* [ (structure) -199.01100 (of) -198.00400 (the) -199.01800 (sequence) -198.00100 (of) -198.98600 (steps) -197.98700 (achie) 25.01540 (ving) -199.00100 (the) -197.99600 (task\056) -292.99500 (F) 14.99260 (or) -198.98600 (e) 15.01220 (x\055) ] TJ T* [ (ample\054) -223.98500 (some) -217.98300 (videos) -216.99800 (may) -217.99100 (omit) -216.99300 (some) -217.98300 (steps) -217.98300 (or) -216.98300 (change) -218.00300 (slightly) ] TJ T* [ (their) -249.98500 (order) 55.01040 (\056) ] TJ 11.95510 -12.55200 Td [ (T) 79.99160 (o) -253.01200 (address) -253.00700 (these) -253.00700 (challenges\054) -254.01600 (in) -253.00700 (this) -252.99200 (paper) -253.01700 (we) -253.01700 (de) 25.01540 (v) 14.98280 (elop) -253.00200 (an) ] TJ -11.95510 -11.47700 Td [ (unsupervised) -267.01700 (learni) 0.98758 (ng) -267.00400 (approach) -267.00900 (that) -266.01900 (tak) 10.00810 (es) -267.01400 (adv) 24.98110 (antage) -265.99000 (of) -267.01400 (the) ] TJ T* [ (complementarity) -356.98700 (of) -358.01600 (the) -356.99200 (visual) -357.01400 (signal) -357.01400 (in) -358.00900 (the) -356.98900 (video) -356.98000 (and) -358.01400 (the) ] TJ T* [ (corresponding) -418.99400 (natural) -417.99600 (language) -419.01300 (narration) -417.99400 (to) -419.01800 (resolv) 14.99260 (e) -418.99400 (their) ] TJ T* [ (ambiguities\056) -723 (W) 79.98660 (e) -388.01900 (assume) -387.01400 (that) -387.99900 (the) -386.98400 (same) -388.00400 (ordered) -388.01400 (sequence) ] TJ T* [ (of) -416.00700 (steps) -415.98900 (\050also) -417.01900 (called) -416.00200 (script) -415.98900 (in) -416 (the) -416 (NLP) -415.98900 (literature) -417.01400 (\133) ] TJ ET Q 0 0 1 rg q 10 0 0 10 0 0 cm BT /R18 9.96260 Tf 1 0 0 1 517.72800 207.24800 Tm (27) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R18 9.96260 Tf 1 0 0 1 527.69100 207.24800 Tm [ (\135\051) -416.01400 (is) ] TJ -218.82900 -11.47700 Td [ (common) -369.98200 (to) -368.98500 (all) -370.00700 (input) -369.98700 (videos) -369.99200 (of) -368.99700 (the) -370.00700 (same) -370.00700 (task\054) -399.01700 (b) 20.00160 (ut) -370.00700 (the) -370.00700 (ac\055) ] TJ 11.47700 TL T* [ (tual) -251.98200 (sequence) -251.99200 (and) -251.99200 (the) -251.99000 (indi) 25 (vidual) -252.01200 (steps) -252.01700 (are) -252.98700 (unkno) 25 (wn) -251.98700 (and) -251.99200 (are) ] TJ T* [ (learnt) -205.99700 (directly) -205.98500 (from) -205.01500 (data\056) -295.98500 (This) -205.98000 (is) -205.01000 (in) -205.99500 (contrast) -205.98500 (to) -205.99500 (other) -205.99500 (e) 15.01220 (xisting) ] TJ T* [ (methods) -242.00100 (for) -242.00100 (modeling) -243.01600 (instruction) -241.98900 (videos) -242.01400 (\133) ] TJ ET Q 0 0 1 rg q 10 0 0 10 0 0 cm BT /R18 9.96260 Tf 1 0 0 1 475.89500 161.34000 Tm (20) Tj ET Q 0 g q 10 0 0 10 0 0 cm BT /R18 9.96260 Tf 1 0 0 1 485.85800 161.34000 Tm [ (\135) -242.00400 (that) -241.98400 (assume) -242.99900 (a) ] TJ -176.99600 -11.47700 Td [ (script) -312.98900 (\050recipe\051) -313.01600 (is) -312.99400 (kno) 24.99090 (wn) -313 (and) -313.00200 <0278> 14.99750 (ed) -313.00700 (in) -312.99700 (adv) 24.98110 (ance\056) -500 (W) 79.98660 (e) -313.01200 (address) ] TJ 11.47810 TL T* [ (the) -285.98400 (problem) -286.00400 (by) -285.98200 <02727374> -284.98700 (performing) -286.00600 (temporal) -286.00600 (clustering) -286.00100 (of) -285.99100 (te) 14.98280 (xt) ] TJ 11.47700 TL T* [ (follo) 24.99830 (wed) -358.01600 (by) -358.00900 (clustering) -357.98700 (in) -358.00900 (video\054) -385.01400 (where) -357.98400 (the) -358.00900 (tw) 10.00810 (o) -358.01400 (clustering) ] TJ T* [ (tasks) -361.01800 (are) -361.01100 (link) 9.98363 (ed) -360.99900 (by) -360.98900 (joint) -361.00600 (constraints\056) -644.01000 (The) -360.99400 (complementary) ] TJ T* [ (nature) -299.00400 (of) -297.98700 (the) -299.00200 (tw) 10.00810 (o) -297.98500 (clustering) -299.01900 (problems) -297.99400 (helps) -298.98900 (to) -298.01900 (resolv) 14.99260 (e) -299.01400 (am\055) ] TJ T* [ (biguities) -219.99500 (in) -219.01000 (the) -219.99500 (tw) 10.00810 (o) -219.99800 (indi) 25 (vidual) -218.99800 (modalities\056) -300.01900 (F) 14.99260 (or) -220.00300 (e) 15.01220 (xample\054) -226.01600 (tw) 10.00810 (o) ] TJ T* [ (video) -328.00700 (se) 15.01960 (gments) -327.99200 (with) -326.98200 (v) 14.98280 (ery) -327.98900 (dif) 24.98600 (ferent) -327.98900 (appearance) -328 (b) 20.00160 (ut) -328.01400 (depict\055) ] TJ -13.74100 -29.88790 Td (1) Tj ET Q Q Q q q 1 1 1 rg /a0 gs 48.40600 786.42200 515.18800 -52.69900 re f q /s5 gs /x6 Do Q q /s7 gs /x8 Do Q q /s9 gs /x10 Do Q q /s11 gs /x12 Do Q Q Q Q q 1 0 0 1 0 0 cm BT /F1 12 Tf 14.40000 TL ET 1 1 1 rg n 270 47 72 14 re f* 0.50000 0.50000 0.50000 rg BT /F2 9 Tf 10.80000 TL ET BT 1 0 0 1 297 50 Tm (4575) Tj T* ET Q endstream endobj 14 0 obj << /Filter /FlateDecode /Resources << /ExtGState << /a0 << /CA 1 /ca 1 >> >> /XObject << /x18 15 0 R >> >> /Length 28 /Group << /Type /Group /S /Transparency /CS /DeviceRGB /I true >> /BBox [ 78 746 96 765 ] /Type /XObject /Subtype /Form >> stream x+O4PH/VЯ0Pp 0 endstream endobj 15 0 obj << /Filter /FlateDecode /Resources 16 0 R /Length 107 /Type /XObject /BBox [ 78 746 96 765 ] /Subtype /Form >> stream xe AC̬wʠ =p,?]%+H-
Jc "82w8VSnGW;"
endstream
endobj
16 0 obj
<<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
>>
endobj
17 0 obj
<<
/Filter /FlateDecode
/Resources <<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
/XObject <<
/x15 18 0 R
>>
>>
/Length 28
/Group <<
/Type /Group
/S /Transparency
/CS /DeviceRGB
/I true
>>
/BBox [ 67 752 84 775 ]
/Type /XObject
/Subtype /Form
>>
stream
x+O4PH/VЯ04Up
0
endstream
endobj
18 0 obj
<<
/Filter /FlateDecode
/Resources 19 0 R
/Length 228
/Type /XObject
/BBox [ 67 752 84 775 ]
/Subtype /Form
>>
stream
xeQKn!s ?FPav6R٪TS.
b];15YyR
{7QL.\:Rv/x9l+L7h%1!}i/AI(kz"U&,YO![R hg{3}4/GyYF:!w}Gn+'xJcO9i뽼_-:`
endstream
endobj
19 0 obj
<<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
>>
endobj
20 0 obj
<<
/Filter /FlateDecode
/Resources <<
/ExtGState <<
/a0 <<
/CA 1
/ca 1
>>
>>
/XObject <<
/x24 21 0 R
>>
>>
/Length 28
/Group <<
/Type /Group
/S /Transparency
/CS /DeviceRGB
/I true
>>
/BBox [ 132 751 480 772 ]
/Type /XObject
/Subtype /Form
>>
stream
x+O4PH/VЯ02Qp
0
endstream
endobj
21 0 obj
<<
/Filter /FlateDecode
/Resources 22 0 R
/Length 53223
/Type /XObject
/BBox [ 132 751 480 772 ]
/Subtype /Form
>>
stream
xtI:6%Q㨈?7rA= u%6 ?Y(WbWo{B>9
x`Znϳ|8{3?0x*z ǃ|,@:w>`c|*ϻⳅKO3`g
:_|}}><.6`Z{{3]#<_o"~:ͺgk7/Ұ@|K yp ]03ʷCmş8˽Y?>(3!Bwqs.Z8,~~=rMT̩y+/*w: uBZ_`ߵp`%M?ɝ1ɳw=vDۉy&xb4Q>d@ sg~lA