|
51 | 51 | "from pyspark.sql import SparkSession\n", |
52 | 52 | "from pyspark.sql.types import StructType, StructField\n", |
53 | 53 | "from pyspark.sql.types import StringType, FloatType, IntegerType, LongType\n", |
| 54 | + "import warnings\n", |
| 55 | + "warnings.simplefilter(action='ignore', category=FutureWarning)\n", |
54 | 56 | "\n", |
55 | 57 | "from recommenders.utils.timer import Timer\n", |
56 | 58 | "from recommenders.datasets import movielens\n", |
|
106 | 108 | "cell_type": "code", |
107 | 109 | "execution_count": 3, |
108 | 110 | "metadata": {}, |
109 | | - "outputs": [ |
110 | | - { |
111 | | - "name": "stderr", |
112 | | - "output_type": "stream", |
113 | | - "text": [ |
114 | | - "22/01/13 16:26:50 WARN Utils: Your hostname, pdfocal resolves to a loopback address: 127.0.1.1; using 10.211.55.61 instead (on interface enp0s5)\n", |
115 | | - "22/01/13 16:26:50 WARN Utils: Set SPARK_LOCAL_IP if you need to bind to another address\n", |
116 | | - "WARNING: An illegal reflective access operation has occurred\n", |
117 | | - "WARNING: Illegal reflective access by org.apache.spark.unsafe.Platform (file:/home/test/miniconda3/envs/reco/lib/python3.8/site-packages/pyspark/jars/spark-unsafe_2.12-3.2.0.jar) to constructor java.nio.DirectByteBuffer(long,int)\n", |
118 | | - "WARNING: Please consider reporting this to the maintainers of org.apache.spark.unsafe.Platform\n", |
119 | | - "WARNING: Use --illegal-access=warn to enable warnings of further illegal reflective access operations\n", |
120 | | - "WARNING: All illegal access operations will be denied in a future release\n", |
121 | | - "Using Spark's default log4j profile: org/apache/spark/log4j-defaults.properties\n", |
122 | | - "Setting default log level to \"WARN\".\n", |
123 | | - "To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).\n", |
124 | | - "22/01/13 16:26:51 WARN NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable\n" |
125 | | - ] |
126 | | - } |
127 | | - ], |
| 111 | + "outputs": [], |
128 | 112 | "source": [ |
129 | 113 | "# the following settings work well for debugging locally on VM - change when running on a cluster\n", |
130 | 114 | "# set up a giant single executor with many threads and specify memory cap\n", |
131 | | - "spark = start_or_get_spark(\"ALS PySpark\", memory=\"16g\", config={\"spark.sql.analyzer.failAmbiguousSelfJoin\": \"false\"})" |
| 115 | + "spark = start_or_get_spark(\"ALS PySpark\", memory=\"16g\")\n", |
| 116 | + "spark.conf.set(\"spark.sql.analyzer.failAmbiguousSelfJoin\", \"false\")" |
132 | 117 | ] |
133 | 118 | }, |
134 | 119 | { |
|
147 | 132 | "name": "stderr", |
148 | 133 | "output_type": "stream", |
149 | 134 | "text": [ |
150 | | - "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.81k/4.81k [00:20<00:00, 239KB/s]\n", |
| 135 | + "100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 4.81k/4.81k [00:05<00:00, 882KB/s]\n", |
151 | 136 | " \r" |
152 | 137 | ] |
153 | 138 | }, |
|
266 | 251 | "execution_count": 7, |
267 | 252 | "metadata": {}, |
268 | 253 | "outputs": [ |
269 | | - { |
270 | | - "name": "stderr", |
271 | | - "output_type": "stream", |
272 | | - "text": [ |
273 | | - "22/01/13 16:27:59 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.JNIBLAS\n", |
274 | | - "22/01/13 16:27:59 WARN InstanceBuilder$NativeBLAS: Failed to load implementation from:dev.ludovic.netlib.blas.ForeignLinkerBLAS\n", |
275 | | - "22/01/13 16:27:59 WARN InstanceBuilder$NativeLAPACK: Failed to load implementation from:dev.ludovic.netlib.lapack.JNILAPACK\n" |
276 | | - ] |
277 | | - }, |
278 | 254 | { |
279 | 255 | "name": "stdout", |
280 | 256 | "output_type": "stream", |
281 | 257 | "text": [ |
282 | | - "Took 10.751604393999514 seconds for training.\n" |
| 258 | + "Took 7.5410127229988575 seconds for training.\n" |
283 | 259 | ] |
284 | 260 | } |
285 | 261 | ], |
|
308 | 284 | "name": "stderr", |
309 | 285 | "output_type": "stream", |
310 | 286 | "text": [ |
311 | | - "22/01/13 16:28:12 WARN Column: Constructing trivially true equals predicate, 'UserId#0 = UserId#0'. Perhaps you need to use aliases.\n", |
312 | | - "[Stage 126:===================================================> (194 + 2) / 200]\r" |
| 287 | + "[Stage 126:====================================================>(198 + 2) / 200]\r" |
313 | 288 | ] |
314 | 289 | }, |
315 | 290 | { |
316 | 291 | "name": "stdout", |
317 | 292 | "output_type": "stream", |
318 | 293 | "text": [ |
319 | | - "Took 25.308568059001118 seconds for prediction.\n" |
| 294 | + "Took 25.246142672998758 seconds for prediction.\n" |
320 | 295 | ] |
321 | 296 | }, |
322 | 297 | { |
|
367 | 342 | "|UserId|MovieId|prediction|\n", |
368 | 343 | "+------+-------+----------+\n", |
369 | 344 | "| 1| 587| 4.1602826|\n", |
370 | | - "| 1| 869| 2.7732866|\n", |
371 | | - "| 1| 1208| 2.0333834|\n", |
372 | | - "| 1| 1348| 1.0019258|\n", |
373 | | - "| 1| 1357| 0.9430025|\n", |
| 345 | + "| 1| 869| 2.7732863|\n", |
| 346 | + "| 1| 1208| 2.033383|\n", |
| 347 | + "| 1| 1348| 1.0019257|\n", |
| 348 | + "| 1| 1357| 0.9430026|\n", |
374 | 349 | "| 1| 1677| 2.8777318|\n", |
375 | 350 | "| 2| 80| 2.351385|\n", |
376 | | - "| 2| 472| 2.5865324|\n", |
377 | | - "| 2| 582| 3.9548614|\n", |
378 | | - "| 2| 838| 0.9482964|\n", |
379 | | - "| 2| 975| 3.1133537|\n", |
380 | | - "| 2| 1260| 1.9871742|\n", |
| 351 | + "| 2| 472| 2.5865319|\n", |
| 352 | + "| 2| 582| 3.9548612|\n", |
| 353 | + "| 2| 838| 0.9482963|\n", |
| 354 | + "| 2| 975| 3.1133535|\n", |
| 355 | + "| 2| 1260| 1.9871743|\n", |
381 | 356 | "| 2| 1325| 1.2368056|\n", |
382 | 357 | "| 2| 1381| 3.5477588|\n", |
383 | | - "| 2| 1530| 2.0882902|\n", |
384 | | - "| 3| 22| 3.1524534|\n", |
385 | | - "| 3| 57| 3.6980166|\n", |
386 | | - "| 3| 89| 3.9733818|\n", |
387 | | - "| 3| 367| 3.6629043|\n", |
388 | | - "| 3| 1091| 0.9144471|\n", |
| 358 | + "| 2| 1530| 2.08829|\n", |
| 359 | + "| 3| 22| 3.1524537|\n", |
| 360 | + "| 3| 57| 3.6980162|\n", |
| 361 | + "| 3| 89| 3.9733813|\n", |
| 362 | + "| 3| 367| 3.6629045|\n", |
| 363 | + "| 3| 1091| 0.9144474|\n", |
389 | 364 | "+------+-------+----------+\n", |
390 | 365 | "only showing top 20 rows\n", |
391 | 366 | "\n" |
|
412 | 387 | "name": "stderr", |
413 | 388 | "output_type": "stream", |
414 | 389 | "text": [ |
415 | | - "/home/test/miniconda3/envs/reco/lib/python3.8/site-packages/pyspark/sql/context.py:125: FutureWarning: Deprecated in 3.0.0. Use SparkSession.builder.getOrCreate() instead.\n", |
416 | | - " warnings.warn(\n", |
417 | 390 | " \r" |
418 | 391 | ] |
419 | 392 | } |
|
482 | 455 | "name": "stderr", |
483 | 456 | "output_type": "stream", |
484 | 457 | "text": [ |
485 | | - "\r", |
486 | | - "[Stage 499:==========> (169 + 2) / 200][Stage 500:====> (3 + 0) / 10]\r", |
487 | | - "\r", |
488 | | - "[Stage 499:============>(185 + 2) / 200][Stage 500:====> (3 + 0) / 10]\r" |
| 458 | + "[Stage 500:=============================================> (171 + 3) / 200]\r" |
489 | 459 | ] |
490 | 460 | }, |
491 | 461 | { |
|
495 | 465 | "+------+-------+------+---------+----------+\n", |
496 | 466 | "|UserId|MovieId|Rating|Timestamp|prediction|\n", |
497 | 467 | "+------+-------+------+---------+----------+\n", |
498 | | - "| 580| 148| 4.0|884125773| 3.4059544|\n", |
499 | | - "| 406| 148| 3.0|879540276| 2.7134616|\n", |
500 | | - "| 916| 148| 2.0|880843892| 2.224198|\n", |
501 | | - "| 663| 148| 4.0|889492989| 2.7143617|\n", |
| 468 | + "| 580| 148| 4.0|884125773| 3.4059548|\n", |
| 469 | + "| 406| 148| 3.0|879540276| 2.7134619|\n", |
| 470 | + "| 916| 148| 2.0|880843892| 2.2241986|\n", |
| 471 | + "| 663| 148| 4.0|889492989| 2.714362|\n", |
502 | 472 | "| 330| 148| 4.0|876544781| 4.52321|\n", |
503 | 473 | "| 935| 148| 4.0|884472892| 4.3838587|\n", |
504 | | - "| 308| 148| 3.0|887740788| 2.616949|\n", |
505 | | - "| 20| 148| 5.0|879668713| 4.37212|\n", |
| 474 | + "| 308| 148| 3.0|887740788| 2.6169493|\n", |
| 475 | + "| 20| 148| 5.0|879668713| 4.3721194|\n", |
506 | 476 | "| 923| 148| 4.0|880387474| 3.9818575|\n", |
507 | | - "| 455| 148| 3.0|879110346| 3.076419|\n", |
| 477 | + "| 455| 148| 3.0|879110346| 3.0764186|\n", |
508 | 478 | "| 15| 148| 3.0|879456049| 2.9913845|\n", |
509 | | - "| 374| 148| 4.0|880392992| 3.2223387|\n", |
510 | | - "| 880| 148| 2.0|880167030| 2.8111987|\n", |
511 | | - "| 677| 148| 4.0|889399265| 3.8451848|\n", |
512 | | - "| 49| 148| 1.0|888068195| 1.3751595|\n", |
| 479 | + "| 374| 148| 4.0|880392992| 3.2223384|\n", |
| 480 | + "| 880| 148| 2.0|880167030| 2.8111982|\n", |
| 481 | + "| 677| 148| 4.0|889399265| 3.8451843|\n", |
| 482 | + "| 49| 148| 1.0|888068195| 1.3751594|\n", |
513 | 483 | "| 244| 148| 2.0|880605071| 2.6781514|\n", |
514 | 484 | "| 84| 148| 4.0|883452274| 3.6721768|\n", |
515 | | - "| 627| 148| 3.0|879530463| 2.636207|\n", |
516 | | - "| 434| 148| 3.0|886724797| 3.0973825|\n", |
| 485 | + "| 627| 148| 3.0|879530463| 2.6362069|\n", |
| 486 | + "| 434| 148| 3.0|886724797| 3.0973828|\n", |
517 | 487 | "| 793| 148| 4.0|875104498| 2.2886577|\n", |
518 | 488 | "+------+-------+------+---------+----------+\n", |
519 | 489 | "only showing top 20 rows\n", |
|
525 | 495 | "output_type": "stream", |
526 | 496 | "text": [ |
527 | 497 | "\r", |
528 | | - "[Stage 500:============================> (5 + 2) / 10]\r", |
| 498 | + "[Stage 500:=================================================> (186 + 3) / 200]\r", |
529 | 499 | "\r", |
530 | 500 | " \r" |
531 | 501 | ] |
|
546 | 516 | "name": "stderr", |
547 | 517 | "output_type": "stream", |
548 | 518 | "text": [ |
549 | | - " \r" |
| 519 | + "[Stage 775:==============================================> (174 + 2) / 200]\r" |
550 | 520 | ] |
551 | 521 | }, |
552 | 522 | { |
|
559 | 529 | "Explained variance:\t0.265916\n", |
560 | 530 | "R squared:\t0.259532\n" |
561 | 531 | ] |
| 532 | + }, |
| 533 | + { |
| 534 | + "name": "stderr", |
| 535 | + "output_type": "stream", |
| 536 | + "text": [ |
| 537 | + "\r", |
| 538 | + " \r" |
| 539 | + ] |
562 | 540 | } |
563 | 541 | ], |
564 | 542 | "source": [ |
|
577 | 555 | "execution_count": 14, |
578 | 556 | "metadata": {}, |
579 | 557 | "outputs": [ |
580 | | - { |
581 | | - "name": "stderr", |
582 | | - "output_type": "stream", |
583 | | - "text": [ |
584 | | - "/home/test/miniconda3/envs/reco/lib/python3.8/site-packages/papermill/iorw.py:50: FutureWarning: pyarrow.HadoopFileSystem is deprecated as of 2.0.0, please use pyarrow.fs.HadoopFileSystem instead.\n", |
585 | | - " from pyarrow import HadoopFileSystem\n" |
586 | | - ] |
587 | | - }, |
588 | 558 | { |
589 | 559 | "data": { |
590 | 560 | "application/scrapbook.scrap.json+json": { |
|
657 | 627 | "name": "stderr", |
658 | 628 | "output_type": "stream", |
659 | 629 | "text": [ |
| 630 | + "\r", |
| 631 | + "[Stage 904:> (0 + 2) / 2]\r", |
| 632 | + "\r", |
660 | 633 | " \r" |
661 | 634 | ] |
662 | 635 | }, |
|
681 | 654 | { |
682 | 655 | "data": { |
683 | 656 | "application/scrapbook.scrap.json+json": { |
684 | | - "data": 0.9674342218475348, |
| 657 | + "data": 0.9674342234414528, |
685 | 658 | "encoder": "json", |
686 | 659 | "name": "rmse", |
687 | 660 | "version": 1 |
|
699 | 672 | { |
700 | 673 | "data": { |
701 | 674 | "application/scrapbook.scrap.json+json": { |
702 | | - "data": 0.7533395123214025, |
| 675 | + "data": 0.7533395161385739, |
703 | 676 | "encoder": "json", |
704 | 677 | "name": "mae", |
705 | 678 | "version": 1 |
|
724 | 697 | { |
725 | 698 | "data": { |
726 | 699 | "application/scrapbook.scrap.json+json": { |
727 | | - "data": 0.2659161988067019, |
| 700 | + "data": 0.2659161968930053, |
728 | 701 | "encoder": "json", |
729 | 702 | "name": "exp_var", |
730 | 703 | "version": 1 |
|
742 | 715 | { |
743 | 716 | "data": { |
744 | 717 | "application/scrapbook.scrap.json+json": { |
745 | | - "data": 0.25953227528757383, |
| 718 | + "data": 0.2595322728476255, |
746 | 719 | "encoder": "json", |
747 | 720 | "name": "rsquared", |
748 | 721 | "version": 1 |
|
760 | 733 | { |
761 | 734 | "data": { |
762 | 735 | "application/scrapbook.scrap.json+json": { |
763 | | - "data": 10.751604393999514, |
| 736 | + "data": 7.5410127229988575, |
764 | 737 | "encoder": "json", |
765 | 738 | "name": "train_time", |
766 | 739 | "version": 1 |
|
778 | 751 | { |
779 | 752 | "data": { |
780 | 753 | "application/scrapbook.scrap.json+json": { |
781 | | - "data": 25.308568059001118, |
| 754 | + "data": 25.246142672998758, |
782 | 755 | "encoder": "json", |
783 | 756 | "name": "test_time", |
784 | 757 | "version": 1 |
|
0 commit comments